ummaya 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (482) hide show
  1. package/README.md +15 -2
  2. package/bin/ummaya +10 -1
  3. package/bun.lock +180 -244
  4. package/npm-shrinkwrap.json +760 -1760
  5. package/package.json +39 -22
  6. package/prompts/manifest.yaml +1 -1
  7. package/prompts/system_v1.md +1 -0
  8. package/pyproject.toml +27 -2
  9. package/specs/2803-document-production-hardening/contracts/document-tools.schema.json +1043 -0
  10. package/src/ummaya/_canonical/__init__.py +2 -0
  11. package/src/ummaya/_canonical/baselines.yaml +113 -0
  12. package/src/ummaya/engine/engine.py +29 -132
  13. package/src/ummaya/evidence/__init__.py +21 -2
  14. package/src/ummaya/evidence/dataset_contract.py +193 -0
  15. package/src/ummaya/evidence/document_authoring_cases.py +33 -0
  16. package/src/ummaya/evidence/document_harness.py +313 -0
  17. package/src/ummaya/evidence/document_viewer_ux.py +391 -0
  18. package/src/ummaya/evidence/gates.py +70 -0
  19. package/src/ummaya/evidence/json_types.py +20 -0
  20. package/src/ummaya/evidence/models.py +88 -1
  21. package/src/ummaya/evidence/output_payload.py +89 -0
  22. package/src/ummaya/evidence/payload_documents.py +233 -0
  23. package/src/ummaya/evidence/route_contracts.py +224 -0
  24. package/src/ummaya/evidence/route_helpers.py +150 -0
  25. package/src/ummaya/evidence/runner.py +81 -212
  26. package/src/ummaya/evidence/source_provenance.py +246 -0
  27. package/src/ummaya/evidence/source_provenance_redaction.py +176 -0
  28. package/src/ummaya/evidence/tool_layer.py +39 -0
  29. package/src/ummaya/evidence/tool_layer_models.py +151 -0
  30. package/src/ummaya/ipc/adapter_manifest_emitter.py +26 -10
  31. package/src/ummaya/ipc/document_intent_normalization.py +185 -0
  32. package/src/ummaya/ipc/frame_schema.py +5 -5
  33. package/src/ummaya/ipc/route_diagnostics.py +73 -0
  34. package/src/ummaya/ipc/stdio.py +1109 -477
  35. package/src/ummaya/llm/client.py +102 -3
  36. package/src/ummaya/llm/config.py +8 -3
  37. package/src/ummaya/primitives/__init__.py +6 -2
  38. package/src/ummaya/primitives/delegation.py +1 -1
  39. package/src/ummaya/primitives/document.py +28 -0
  40. package/src/ummaya/settings.py +0 -3
  41. package/src/ummaya/tools/discovery_bridge.py +17 -1
  42. package/src/ummaya/tools/documents/__init__.py +297 -0
  43. package/src/ummaya/tools/documents/adapter_registry.py +487 -0
  44. package/src/ummaya/tools/documents/archive_container_probe.py +167 -0
  45. package/src/ummaya/tools/documents/artifact_store.py +454 -0
  46. package/src/ummaya/tools/documents/authoring.py +283 -0
  47. package/src/ummaya/tools/documents/baselines.py +132 -0
  48. package/src/ummaya/tools/documents/capability.py +331 -0
  49. package/src/ummaya/tools/documents/contracts.py +112 -0
  50. package/src/ummaya/tools/documents/conversion.py +521 -0
  51. package/src/ummaya/tools/documents/diff.py +275 -0
  52. package/src/ummaya/tools/documents/engines.py +163 -0
  53. package/src/ummaya/tools/documents/evaluation.py +291 -0
  54. package/src/ummaya/tools/documents/explicit_values.py +108 -0
  55. package/src/ummaya/tools/documents/fixtures.py +174 -0
  56. package/src/ummaya/tools/documents/format_completion_audit.py +471 -0
  57. package/src/ummaya/tools/documents/formats/__init__.py +2 -0
  58. package/src/ummaya/tools/documents/formats/archive.py +528 -0
  59. package/src/ummaya/tools/documents/formats/base.py +41 -0
  60. package/src/ummaya/tools/documents/formats/code_file.py +211 -0
  61. package/src/ummaya/tools/documents/formats/data_file.py +272 -0
  62. package/src/ummaya/tools/documents/formats/hwp.py +284 -0
  63. package/src/ummaya/tools/documents/formats/hwpx.py +1837 -0
  64. package/src/ummaya/tools/documents/formats/odf.py +435 -0
  65. package/src/ummaya/tools/documents/formats/ooxml.py +1030 -0
  66. package/src/ummaya/tools/documents/formats/passive.py +766 -0
  67. package/src/ummaya/tools/documents/formats/pdf.py +702 -0
  68. package/src/ummaya/tools/documents/formats/text_web.py +268 -0
  69. package/src/ummaya/tools/documents/hwp_conversion_probe.py +178 -0
  70. package/src/ummaya/tools/documents/hwp_direct_candidate.py +141 -0
  71. package/src/ummaya/tools/documents/inspection.py +289 -0
  72. package/src/ummaya/tools/documents/intake.py +1079 -0
  73. package/src/ummaya/tools/documents/legacy_office_promotion_probe.py +366 -0
  74. package/src/ummaya/tools/documents/models.py +1598 -0
  75. package/src/ummaya/tools/documents/odf_promotion_probe.py +167 -0
  76. package/src/ummaya/tools/documents/orchestrator.py +96 -0
  77. package/src/ummaya/tools/documents/passive_capability_probe.py +251 -0
  78. package/src/ummaya/tools/documents/patch.py +170 -0
  79. package/src/ummaya/tools/documents/pdfa_conformance.py +284 -0
  80. package/src/ummaya/tools/documents/pdfa_promotion_probe.py +198 -0
  81. package/src/ummaya/tools/documents/permissions.py +110 -0
  82. package/src/ummaya/tools/documents/planner.py +616 -0
  83. package/src/ummaya/tools/documents/registry.py +2733 -0
  84. package/src/ummaya/tools/documents/render.py +978 -0
  85. package/src/ummaya/tools/documents/render_comparison.py +113 -0
  86. package/src/ummaya/tools/documents/render_comparison_models.py +74 -0
  87. package/src/ummaya/tools/documents/render_comparison_regions.py +73 -0
  88. package/src/ummaya/tools/documents/render_comparison_style.py +161 -0
  89. package/src/ummaya/tools/documents/reread.py +157 -0
  90. package/src/ummaya/tools/documents/runtime_authoring.py +244 -0
  91. package/src/ummaya/tools/documents/runtime_authoring_bundle.py +76 -0
  92. package/src/ummaya/tools/documents/scorecard.py +184 -0
  93. package/src/ummaya/tools/documents/socratic_planner.py +193 -0
  94. package/src/ummaya/tools/documents/style.py +48 -0
  95. package/src/ummaya/tools/documents/tool_defs.py +523 -0
  96. package/src/ummaya/tools/documents/validate.py +347 -0
  97. package/src/ummaya/tools/executor.py +29 -0
  98. package/src/ummaya/tools/live_proxy.py +0 -3
  99. package/src/ummaya/tools/models.py +5 -1
  100. package/src/ummaya/tools/register_all.py +8 -0
  101. package/src/ummaya/tools/registry.py +10 -1
  102. package/src/ummaya/tools/routing/__init__.py +59 -0
  103. package/src/ummaya/tools/routing/builder.py +105 -0
  104. package/src/ummaya/tools/routing/cards.py +29 -0
  105. package/src/ummaya/tools/routing/decision_service.py +534 -0
  106. package/src/ummaya/tools/routing/decision_types.py +74 -0
  107. package/src/ummaya/tools/routing/feasibility.py +122 -0
  108. package/src/ummaya/tools/routing/intent.py +17 -0
  109. package/src/ummaya/tools/routing/intent_extractor.py +207 -0
  110. package/src/ummaya/tools/routing/intent_patterns.py +160 -0
  111. package/src/ummaya/tools/routing/intent_public_data.py +150 -0
  112. package/src/ummaya/tools/routing/intent_types.py +48 -0
  113. package/src/ummaya/tools/routing/lint.py +78 -0
  114. package/src/ummaya/tools/routing/metadata.py +174 -0
  115. package/src/ummaya/tools/routing/projection.py +340 -0
  116. package/src/ummaya/tools/routing/retrieval_policy.py +629 -0
  117. package/src/ummaya/tools/routing/schema.py +81 -0
  118. package/src/ummaya/tools/routing/types.py +96 -0
  119. package/src/ummaya/tools/routing_index.py +2 -2
  120. package/src/ummaya/tools/search.py +34 -746
  121. package/tests/fixtures/documents/public_forms/baselines.yaml +113 -0
  122. package/tui/bun.lock +126 -305
  123. package/tui/package.json +35 -22
  124. package/tui/src/.cc-byte-identical-whitelist.yaml +266 -0
  125. package/tui/src/QueryEngine.ts +12 -8
  126. package/tui/src/bridge/inboundAttachments.ts +3 -3
  127. package/tui/src/cli/handlers/auth.ts +3 -12
  128. package/tui/src/cli/handlers/mcp.tsx +0 -1
  129. package/tui/src/cli/print.ts +8 -9
  130. package/tui/src/commands/insights.ts +1 -1
  131. package/tui/src/commands/install-github-app/types.ts +8 -30
  132. package/tui/src/commands/plugin/types.ts +6 -28
  133. package/tui/src/commands/plugin/unifiedTypes.ts +4 -26
  134. package/tui/src/commands/rename/generateSessionName.ts +1 -1
  135. package/tui/src/components/Feedback.tsx +1 -1
  136. package/tui/src/components/LogoV2/EmergencyTip.tsx +11 -2
  137. package/tui/src/components/LogoV2/WelcomeV2.tsx +1 -3
  138. package/tui/src/components/ScrollKeybindingHandler.tsx +6 -6
  139. package/tui/src/components/Spinner/types.ts +6 -28
  140. package/tui/src/components/agents/generateAgent.ts +1 -1
  141. package/tui/src/components/agents/new-agent-creation/types.ts +4 -26
  142. package/tui/src/components/config/EnvSecretIsolatedEditor.tsx +1 -1
  143. package/tui/src/components/mcp/types.ts +16 -38
  144. package/tui/src/components/messages/AssistantToolUseMessage.tsx +3 -2
  145. package/tui/src/components/messages/UserCrossSessionMessage.ts +16 -4
  146. package/tui/src/components/messages/UserForkBoilerplateMessage.ts +16 -4
  147. package/tui/src/components/messages/UserGitHubWebhookMessage.ts +16 -4
  148. package/tui/src/components/messages/UserToolResultMessage/utils.tsx +3 -2
  149. package/tui/src/components/permissions/MonitorPermissionRequest/MonitorPermissionRequest.ts +9 -4
  150. package/tui/src/components/permissions/ReviewArtifactPermissionRequest/ReviewArtifactPermissionRequest.ts +9 -4
  151. package/tui/src/components/primitive/DocumentSocraticReviewBlock.tsx +129 -0
  152. package/tui/src/components/primitive/DocumentToolResultCard.tsx +224 -0
  153. package/tui/src/components/primitive/documentSocraticReview.ts +215 -0
  154. package/tui/src/components/primitive/index.tsx +43 -1
  155. package/tui/src/components/primitive/types.ts +137 -0
  156. package/tui/src/components/ui/option.ts +4 -26
  157. package/tui/src/constants/common.ts +0 -2
  158. package/tui/src/constants/prompts.ts +4 -3
  159. package/tui/src/constants/querySource.ts +4 -26
  160. package/tui/src/entrypoints/sdk/controlTypes.ts +26 -48
  161. package/tui/src/entrypoints/sdk/coreTypes.generated.ts +3 -25
  162. package/tui/src/entrypoints/sdk/runtimeTypes.ts +38 -60
  163. package/tui/src/entrypoints/sdk/sdkUtilityTypes.ts +4 -26
  164. package/tui/src/entrypoints/sdk/settingsTypes.generated.ts +3 -25
  165. package/tui/src/entrypoints/sdk/toolTypes.ts +3 -25
  166. package/tui/src/hooks/toolPermission/handlers/interactiveHandler.ts +10 -0
  167. package/tui/src/hooks/useApiKeyVerification.ts +1 -1
  168. package/tui/src/hooks/useVirtualScroll.ts +1 -1
  169. package/tui/src/ink/ink.tsx +33 -14
  170. package/tui/src/ink/reconciler.ts +2 -3
  171. package/tui/src/ink/render-to-screen.ts +30 -10
  172. package/tui/src/ipc/bridge.ts +62 -15
  173. package/tui/src/ipc/bridgeSingleton.ts +5 -1
  174. package/tui/src/ipc/codec.ts +3 -3
  175. package/tui/src/ipc/frames.generated.ts +12 -12
  176. package/tui/src/ipc/llmClient.ts +151 -27
  177. package/tui/src/ipc/schema/frame.schema.json +1 -1
  178. package/tui/src/keybindings/defaultBindings.ts +4 -0
  179. package/tui/src/main.tsx +32 -15
  180. package/tui/src/native-ts/file-index/index.ts +33 -3
  181. package/tui/src/observability/surface.ts +2 -2
  182. package/tui/src/probes/toolRegistryProbe.tsx +3 -1
  183. package/tui/src/projectOnboardingState.ts +7 -6
  184. package/tui/src/query/chatMessageTypes.ts +18 -0
  185. package/tui/src/query/chatMessagesBuilder.ts +1 -1
  186. package/tui/src/query/deps.ts +1 -1
  187. package/tui/src/query/messageGuards.ts +106 -0
  188. package/tui/src/query/publicDataTerminalRepair.ts +384 -0
  189. package/tui/src/query/run.ts +1075 -0
  190. package/tui/src/query/supportBoundary.ts +168 -0
  191. package/tui/src/query/toolResultErrors.ts +103 -0
  192. package/tui/src/query/toolRunner.ts +687 -0
  193. package/tui/src/query/unavailableToolRepair.ts +118 -0
  194. package/tui/src/query.ts +9 -2186
  195. package/tui/src/screens/REPL.tsx +40 -29
  196. package/tui/src/services/api/adapterManifest.ts +4 -0
  197. package/tui/src/services/api/backendChat/events.ts +117 -0
  198. package/tui/src/services/api/backendChat/finalMessage.ts +40 -0
  199. package/tui/src/services/api/backendChat/frame.ts +9 -0
  200. package/tui/src/services/api/backendChat/streaming.ts +430 -0
  201. package/tui/src/services/api/backendChat/types.ts +62 -0
  202. package/tui/src/services/api/backendChat.ts +1 -0
  203. package/tui/src/services/api/client.ts +65 -2
  204. package/tui/src/services/api/errorUtils.ts +5 -5
  205. package/tui/src/services/api/errors.ts +1 -1
  206. package/tui/src/services/api/logging.ts +1 -1
  207. package/tui/src/services/api/ummaya/evidence.ts +194 -0
  208. package/tui/src/services/api/ummaya/messages.ts +255 -0
  209. package/tui/src/services/api/ummaya/nonStreaming.ts +66 -0
  210. package/tui/src/services/api/ummaya/provider.ts +200 -0
  211. package/tui/src/services/api/ummaya/reasoning.ts +24 -0
  212. package/tui/src/services/api/ummaya/request.ts +200 -0
  213. package/tui/src/services/api/ummaya/selectionContext.ts +240 -0
  214. package/tui/src/services/api/ummaya/streaming.ts +365 -0
  215. package/tui/src/services/api/ummaya/streamingPayload.ts +129 -0
  216. package/tui/src/services/api/ummaya/streamingReader.ts +40 -0
  217. package/tui/src/services/api/ummaya/toolSelection.ts +217 -0
  218. package/tui/src/services/api/ummaya/types.ts +110 -0
  219. package/tui/src/services/api/ummaya/usage.ts +30 -0
  220. package/tui/src/services/api/ummaya.ts +26 -418
  221. package/tui/src/services/api/withRetry.ts +1 -1
  222. package/tui/src/services/awaySummary.ts +2 -2
  223. package/tui/src/services/claudeAiLimits.ts +1 -1
  224. package/tui/src/services/compact/autoCompact.ts +1 -1
  225. package/tui/src/services/compact/compact.ts +1 -1
  226. package/tui/src/services/lsp/types.ts +8 -30
  227. package/tui/src/services/tips/types.ts +6 -28
  228. package/tui/src/services/tokenEstimation.ts +1 -1
  229. package/tui/src/services/toolRegistry/bootGuard.ts +5 -5
  230. package/tui/src/services/toolUseSummary/toolUseSummaryGenerator.ts +1 -1
  231. package/tui/src/services/tools/toolExecution.ts +94 -1
  232. package/tui/src/store/pendingPermissionSlot.ts +1 -1
  233. package/tui/src/store/session-store.ts +10 -36
  234. package/tui/src/stubs/any-stub.ts +15 -10
  235. package/tui/src/stubs/color-diff-napi.ts +37 -23
  236. package/tui/src/stubs/globals.d.ts +3 -3
  237. package/tui/src/stubs/macro-preload.ts +23 -12
  238. package/tui/src/tools/AdapterTool/AdapterTool.ts +1207 -714
  239. package/tui/src/tools/AdapterTool/routeDiagnostics.ts +75 -0
  240. package/tui/src/tools/AgentTool/AgentTool.tsx +84 -1371
  241. package/tui/src/tools/AgentTool/agentToolHandoff.ts +114 -0
  242. package/tui/src/tools/AgentTool/agentToolPartialResult.ts +16 -0
  243. package/tui/src/tools/AgentTool/agentToolProgress.ts +32 -0
  244. package/tui/src/tools/AgentTool/agentToolResolver.ts +161 -0
  245. package/tui/src/tools/AgentTool/agentToolResult.ts +163 -0
  246. package/tui/src/tools/AgentTool/agentToolUtils.ts +14 -686
  247. package/tui/src/tools/AgentTool/asyncAgentLifecycle.ts +208 -0
  248. package/tui/src/tools/AgentTool/asyncLifecycle.ts +153 -0
  249. package/tui/src/tools/AgentTool/backgroundedCompletion.ts +126 -0
  250. package/tui/src/tools/AgentTool/backgroundedLifecycle.ts +174 -0
  251. package/tui/src/tools/AgentTool/foregroundBackground.ts +83 -0
  252. package/tui/src/tools/AgentTool/foregroundDrain.tsx +133 -0
  253. package/tui/src/tools/AgentTool/foregroundFinalize.ts +98 -0
  254. package/tui/src/tools/AgentTool/foregroundLifecycle.tsx +237 -0
  255. package/tui/src/tools/AgentTool/foregroundProgress.tsx +169 -0
  256. package/tui/src/tools/AgentTool/foregroundTask.ts +89 -0
  257. package/tui/src/tools/AgentTool/forkSubagent.ts +1 -12
  258. package/tui/src/tools/AgentTool/forkSubagentGate.ts +34 -0
  259. package/tui/src/tools/AgentTool/launchRouting.ts +203 -0
  260. package/tui/src/tools/AgentTool/lifecycle.ts +244 -0
  261. package/tui/src/tools/AgentTool/mcpRouting.ts +73 -0
  262. package/tui/src/tools/AgentTool/orchestrationSupport.ts +70 -0
  263. package/tui/src/tools/AgentTool/permissions.ts +39 -0
  264. package/tui/src/tools/AgentTool/promptSetup.ts +181 -0
  265. package/tui/src/tools/AgentTool/remoteRouting.ts +62 -0
  266. package/tui/src/tools/AgentTool/resultMapping.ts +116 -0
  267. package/tui/src/tools/AgentTool/resumeAgent.ts +39 -107
  268. package/tui/src/tools/AgentTool/resumeAgentHelpers.ts +140 -0
  269. package/tui/src/tools/AgentTool/runAgent.ts +1 -1
  270. package/tui/src/tools/AgentTool/runtimeConfig.ts +57 -0
  271. package/tui/src/tools/AgentTool/schemas.ts +196 -0
  272. package/tui/src/tools/AgentTool/sourceVerificationPropagation.ts +263 -0
  273. package/tui/src/tools/AgentTool/worktreeLifecycle.ts +105 -0
  274. package/tui/src/tools/AskUserQuestionTool/AskUserQuestionTool.tsx +174 -202
  275. package/tui/src/tools/BashTool/BashTool.tsx +71 -1072
  276. package/tui/src/tools/BashTool/bashCommandHelpers.ts +12 -12
  277. package/tui/src/tools/BashTool/bashPermissions/astPreflight.ts +173 -0
  278. package/tui/src/tools/BashTool/bashPermissions/classifierChecks.ts +199 -0
  279. package/tui/src/tools/BashTool/bashPermissions/compoundGuards.ts +53 -0
  280. package/tui/src/tools/BashTool/bashPermissions/constants.ts +99 -0
  281. package/tui/src/tools/BashTool/bashPermissions/index.ts +38 -0
  282. package/tui/src/tools/BashTool/bashPermissions/legacyMisparsing.ts +62 -0
  283. package/tui/src/tools/BashTool/bashPermissions/main.ts +135 -0
  284. package/tui/src/tools/BashTool/bashPermissions/normalizedCommands.ts +33 -0
  285. package/tui/src/tools/BashTool/bashPermissions/operatorFlow.ts +98 -0
  286. package/tui/src/tools/BashTool/bashPermissions/permissionChecks.ts +200 -0
  287. package/tui/src/tools/BashTool/bashPermissions/prefixSuggestions.ts +88 -0
  288. package/tui/src/tools/BashTool/bashPermissions/promptClassifierRules.ts +125 -0
  289. package/tui/src/tools/BashTool/bashPermissions/ruleDelegates.ts +19 -0
  290. package/tui/src/tools/BashTool/bashPermissions/ruleMatching.ts +145 -0
  291. package/tui/src/tools/BashTool/bashPermissions/sandboxAutoAllow.ts +75 -0
  292. package/tui/src/tools/BashTool/bashPermissions/subcommandFlow.ts +205 -0
  293. package/tui/src/tools/BashTool/bashPermissions/subcommandGuards.ts +73 -0
  294. package/tui/src/tools/BashTool/bashPermissions/subcommandResultHelpers.ts +116 -0
  295. package/tui/src/tools/BashTool/bashPermissions/types.ts +26 -0
  296. package/tui/src/tools/BashTool/bashPermissions/wrapperStripping.ts +139 -0
  297. package/tui/src/tools/BashTool/bashPermissions.ts +26 -2621
  298. package/tui/src/tools/BashTool/call.ts +202 -0
  299. package/tui/src/tools/BashTool/callLoader.ts +35 -0
  300. package/tui/src/tools/BashTool/commandClassification.ts +151 -0
  301. package/tui/src/tools/BashTool/commandClassificationLoader.ts +40 -0
  302. package/tui/src/tools/BashTool/cwdReset.ts +33 -0
  303. package/tui/src/tools/BashTool/lineTruncation.ts +11 -0
  304. package/tui/src/tools/BashTool/modeValidation.ts +13 -1
  305. package/tui/src/tools/BashTool/outputPersistence.ts +42 -0
  306. package/tui/src/tools/BashTool/permissionClassification.ts +66 -0
  307. package/tui/src/tools/BashTool/permissionLoader.ts +44 -0
  308. package/tui/src/tools/BashTool/resultLoader.ts +29 -0
  309. package/tui/src/tools/BashTool/resultMapping.ts +83 -0
  310. package/tui/src/tools/BashTool/sandboxPolicy.ts +79 -0
  311. package/tui/src/tools/BashTool/schemas.ts +65 -0
  312. package/tui/src/tools/BashTool/sedEditExecution.ts +59 -0
  313. package/tui/src/tools/BashTool/shellExecution.tsx +245 -0
  314. package/tui/src/tools/BashTool/shellOutputUtils.ts +85 -0
  315. package/tui/src/tools/BashTool/shellPermissionGauntlet.ts +97 -0
  316. package/tui/src/tools/BashTool/uiLoader.ts +37 -0
  317. package/tui/src/tools/BriefTool/upload.ts +1 -1
  318. package/tui/src/tools/CalculatorTool/parser.ts +2 -2
  319. package/tui/src/tools/DocumentPrimitive/DocumentPrimitive.ts +262 -0
  320. package/tui/src/tools/DocumentPrimitive/dispatchNormalization.ts +270 -0
  321. package/tui/src/tools/DocumentPrimitive/documentDestinationPath.ts +18 -0
  322. package/tui/src/tools/DocumentPrimitive/documentMutationGuard.ts +22 -0
  323. package/tui/src/tools/DocumentPrimitive/documentPatchNormalization.ts +248 -0
  324. package/tui/src/tools/DocumentPrimitive/documentSourceVerification.ts +245 -0
  325. package/tui/src/tools/DocumentPrimitive/documentSourceVerificationFields.ts +103 -0
  326. package/tui/src/tools/DocumentPrimitive/modelVisibleOutput.ts +40 -0
  327. package/tui/src/tools/DocumentPrimitive/prompt.ts +35 -0
  328. package/tui/src/tools/FileEditTool/FileEditTool.ts +9 -507
  329. package/tui/src/tools/FileEditTool/call.ts +228 -0
  330. package/tui/src/tools/FileEditTool/validateInput.ts +196 -0
  331. package/tui/src/tools/FileReadTool/imageProcessor.ts +13 -0
  332. package/tui/src/tools/FileWriteTool/FileWriteTool.ts +7 -300
  333. package/tui/src/tools/FileWriteTool/call.ts +223 -0
  334. package/tui/src/tools/FileWriteTool/validateInput.ts +80 -0
  335. package/tui/src/tools/ListMcpResourcesTool/ListMcpResourcesTool.ts +19 -3
  336. package/tui/src/tools/LookupPrimitive/LookupPrimitive.ts +25 -32
  337. package/tui/src/tools/LookupPrimitive/prompt.ts +0 -2
  338. package/tui/src/tools/MCPTool/trustPolicy.ts +118 -0
  339. package/tui/src/tools/McpAuthTool/McpAuthTool.ts +21 -3
  340. package/tui/src/tools/NotebookEditTool/NotebookEditTool.ts +7 -326
  341. package/tui/src/tools/NotebookEditTool/call.ts +254 -0
  342. package/tui/src/tools/NotebookEditTool/notebookModel.ts +51 -0
  343. package/tui/src/tools/NotebookEditTool/validateInput.ts +142 -0
  344. package/tui/src/tools/PowerShellTool/PowerShellTool.tsx +46 -937
  345. package/tui/src/tools/PowerShellTool/acceptEditsCommandValidation.ts +162 -0
  346. package/tui/src/tools/PowerShellTool/call.ts +179 -0
  347. package/tui/src/tools/PowerShellTool/callLoader.ts +37 -0
  348. package/tui/src/tools/PowerShellTool/commandClassification.ts +86 -0
  349. package/tui/src/tools/PowerShellTool/modeValidation.ts +25 -332
  350. package/tui/src/tools/PowerShellTool/outputPersistence.ts +42 -0
  351. package/tui/src/tools/PowerShellTool/permissionClassification.ts +28 -0
  352. package/tui/src/tools/PowerShellTool/resultLoader.ts +31 -0
  353. package/tui/src/tools/PowerShellTool/resultMapping.ts +75 -0
  354. package/tui/src/tools/PowerShellTool/schemas.ts +40 -0
  355. package/tui/src/tools/PowerShellTool/shellExecution.tsx +258 -0
  356. package/tui/src/tools/PowerShellTool/symlinkModeValidation.ts +44 -0
  357. package/tui/src/tools/PowerShellTool/uiLoader.ts +37 -0
  358. package/tui/src/tools/PowerShellTool/validation.ts +39 -0
  359. package/tui/src/tools/ReadMcpResourceTool/ReadMcpResourceTool.ts +19 -3
  360. package/tui/src/tools/ResolveLocationPrimitive/ResolveLocationPrimitive.ts +1 -11
  361. package/tui/src/tools/ResolveLocationPrimitive/prompt.ts +2 -6
  362. package/tui/src/tools/SkillTool/SkillTool.ts +2 -2
  363. package/tui/src/tools/SubmitPrimitive/SubmitPrimitive.ts +27 -10
  364. package/tui/src/tools/TaskCreateTool/TaskCreateTool.ts +16 -2
  365. package/tui/src/tools/TaskGetTool/TaskGetTool.ts +23 -3
  366. package/tui/src/tools/TaskListTool/TaskListTool.ts +22 -4
  367. package/tui/src/tools/TaskOutputTool/TaskOutputTool.tsx +46 -547
  368. package/tui/src/tools/TaskOutputTool/lookup.ts +216 -0
  369. package/tui/src/tools/TaskOutputTool/render.tsx +257 -0
  370. package/tui/src/tools/TaskOutputTool/schemas.ts +55 -0
  371. package/tui/src/tools/TaskOutputTool/serialization.ts +36 -0
  372. package/tui/src/tools/TaskStopTool/TaskStopTool.ts +10 -0
  373. package/tui/src/tools/TaskUpdateTool/TaskUpdateTool.ts +14 -364
  374. package/tui/src/tools/TaskUpdateTool/completion.ts +62 -0
  375. package/tui/src/tools/TaskUpdateTool/schemas.ts +62 -0
  376. package/tui/src/tools/TaskUpdateTool/serialization.ts +46 -0
  377. package/tui/src/tools/TaskUpdateTool/statusUpdate.ts +247 -0
  378. package/tui/src/tools/TodoWriteTool/TodoWriteTool.ts +21 -2
  379. package/tui/src/tools/ToolSearchTool/ToolSearchTool.ts +21 -302
  380. package/tui/src/tools/ToolSearchTool/ccSupportTools.ts +223 -0
  381. package/tui/src/tools/ToolSearchTool/descriptionCache.ts +50 -0
  382. package/tui/src/tools/ToolSearchTool/keywordSearch.ts +216 -0
  383. package/tui/src/tools/ToolSearchTool/prompt.ts +10 -4
  384. package/tui/src/tools/ToolSearchTool/resultMapping.ts +30 -0
  385. package/tui/src/tools/ToolSearchTool/schemas.ts +30 -0
  386. package/tui/src/tools/ToolSearchTool/searchPool.ts +47 -0
  387. package/tui/src/tools/ToolSearchTool/supportIntentHints.ts +140 -0
  388. package/tui/src/tools/TranslateTool/TranslateTool.ts +1 -1
  389. package/tui/src/tools/VerifyPrimitive/VerifyPrimitive.ts +2 -1
  390. package/tui/src/tools/WebFetchTool/WebFetchTool.ts +43 -138
  391. package/tui/src/tools/WebFetchTool/call.ts +227 -0
  392. package/tui/src/tools/WebFetchTool/resolvedAddressSafety.ts +78 -0
  393. package/tui/src/tools/WebFetchTool/sourceVerification.ts +204 -0
  394. package/tui/src/tools/WebFetchTool/types.ts +23 -0
  395. package/tui/src/tools/WebFetchTool/urlSafety.ts +181 -0
  396. package/tui/src/tools/WebFetchTool/utils.ts +1 -1
  397. package/tui/src/tools/WebSearchTool/UI.tsx +0 -1
  398. package/tui/src/tools/WebSearchTool/WebSearchTool.ts +9 -313
  399. package/tui/src/tools/WebSearchTool/call.ts +33 -0
  400. package/tui/src/tools/WebSearchTool/responseMapping.ts +190 -0
  401. package/tui/src/tools/WebSearchTool/resultBlock.ts +47 -0
  402. package/tui/src/tools/WebSearchTool/schemas.ts +47 -0
  403. package/tui/src/tools/WebSearchTool/toolSchema.ts +12 -0
  404. package/tui/src/tools/WorkspaceToolAdapter/WorkspaceToolAdapter.ts +79 -0
  405. package/tui/src/tools/WorkspaceToolAdapter/allowedRootPolicy.ts +85 -0
  406. package/tui/src/tools/WorkspaceToolAdapter/documentFormatGuards.ts +73 -0
  407. package/tui/src/tools/WorkspaceToolAdapter/inputNormalization.ts +105 -0
  408. package/tui/src/tools/WorkspaceToolAdapter/mcpExposurePolicy.ts +64 -0
  409. package/tui/src/tools/WorkspaceToolAdapter/toolDefFactory.ts +215 -0
  410. package/tui/src/tools/WorkspaceToolAdapter/toolNames.ts +6 -0
  411. package/tui/src/tools/WorkspaceToolAdapter/workspacePolicy.ts +15 -0
  412. package/tui/src/tools/_shared/dispatchPrimitive.ts +6 -6
  413. package/tui/src/tools/_shared/documentChangeToPatch.ts +125 -0
  414. package/tui/src/tools/_shared/documentDispatchArguments.ts +87 -0
  415. package/tui/src/tools/_shared/documentPrimitiveTimeout.ts +13 -0
  416. package/tui/src/tools/_shared/documentToolResultRender.ts +98 -0
  417. package/tui/src/tools/_shared/pendingCallRegistry.ts +1 -6
  418. package/tui/src/tools/_shared/rootPrimitiveInput.ts +1 -0
  419. package/tui/src/tools/_shared/toolChoiceRepair/documentCompletionPatterns.ts +58 -0
  420. package/tui/src/tools/_shared/toolChoiceRepair/documentCompletionPrompt.ts +271 -0
  421. package/tui/src/tools/_shared/toolChoiceRepair/documentRepair.ts +452 -0
  422. package/tui/src/tools/_shared/toolChoiceRepair/messageAccess.ts +80 -0
  423. package/tui/src/tools/_shared/toolChoiceRepair/publicDataRepair.ts +92 -0
  424. package/tui/src/tools/_shared/toolChoiceRepair/supportRepair.ts +135 -0
  425. package/tui/src/tools/_shared/toolChoiceRepair.ts +55 -860
  426. package/tui/src/tools/shared/mockDisclaimer.ts +1 -1
  427. package/tui/src/tools.ts +39 -190
  428. package/tui/src/types/fileSuggestion.ts +4 -26
  429. package/tui/src/types/generated/events_mono/claude_code/v1/claude_code_internal_event.ts +186 -148
  430. package/tui/src/types/generated/events_mono/common/v1/auth.ts +25 -11
  431. package/tui/src/types/generated/events_mono/growthbook/v1/growthbook_experiment_event.ts +47 -30
  432. package/tui/src/types/generated/google/protobuf/timestamp.ts +21 -7
  433. package/tui/src/types/message.ts +80 -102
  434. package/tui/src/types/messageQueueTypes.ts +6 -28
  435. package/tui/src/types/notebook.ts +16 -38
  436. package/tui/src/types/statusLine.ts +4 -26
  437. package/tui/src/types/tools.ts +24 -46
  438. package/tui/src/types/utils.ts +6 -28
  439. package/tui/src/upstreamproxy/relay.ts +7 -3
  440. package/tui/src/upstreamproxy/upstreamproxy.ts +1 -1
  441. package/tui/src/utils/assistantMessageFactories.ts +9 -3
  442. package/tui/src/utils/auth.ts +129 -139
  443. package/tui/src/utils/bash/ast.ts +23 -23
  444. package/tui/src/utils/bash/bashParser.ts +5 -5
  445. package/tui/src/utils/billing.ts +1 -1
  446. package/tui/src/utils/claudeDesktop.ts +4 -4
  447. package/tui/src/utils/collapseReadSearch.ts +3 -3
  448. package/tui/src/utils/cronTasks.ts +1 -1
  449. package/tui/src/utils/execFileNoThrow.ts +1 -1
  450. package/tui/src/utils/filePersistence/types.ts +16 -38
  451. package/tui/src/utils/forkedAgent.ts +1 -1
  452. package/tui/src/utils/gracefulShutdown.ts +4 -4
  453. package/tui/src/utils/heapDumpService.ts +12 -8
  454. package/tui/src/utils/hooks/apiQueryHookHelper.ts +1 -1
  455. package/tui/src/utils/hooks/execPromptHook.ts +1 -1
  456. package/tui/src/utils/hooks/skillImprovement.ts +1 -1
  457. package/tui/src/utils/mcp/dateTimeParser.ts +1 -1
  458. package/tui/src/utils/messages.ts +18 -0
  459. package/tui/src/utils/migrateSessions.ts +3 -3
  460. package/tui/src/utils/model/model.ts +6 -6
  461. package/tui/src/utils/permissions/yoloClassifier.ts +1 -1
  462. package/tui/src/utils/plugins/headlessPluginInstall.ts +1 -1
  463. package/tui/src/utils/plugins/mcpPluginIntegration.ts +1 -1
  464. package/tui/src/utils/plugins/mcpbHandler.ts +1 -1
  465. package/tui/src/utils/plugins/pluginLoader.ts +8 -8
  466. package/tui/src/utils/protectedNamespace.ts +5 -3
  467. package/tui/src/utils/rawJsonToolCall.ts +242 -0
  468. package/tui/src/utils/ripgrep.ts +16 -7
  469. package/tui/src/utils/sessionTitle.ts +1 -1
  470. package/tui/src/utils/settings/permissionValidation.ts +14 -2
  471. package/tui/src/utils/shell/prefix.ts +1 -1
  472. package/tui/src/utils/sideQuery.ts +1 -1
  473. package/tui/src/utils/systemThemeWatcher.ts +13 -3
  474. package/tui/src/utils/teleport.tsx +1 -1
  475. package/uv.lock +426 -45
  476. package/tui/src/services/api/claude.ts +0 -3540
  477. package/tui/src/tools/_shared/directPublicDataGuard.ts +0 -362
  478. package/tui/src/tools/_shared/kmaAnalysisGuard.ts +0 -197
  479. package/tui/src/tools/_shared/kmaAviationGuard.ts +0 -70
  480. package/tui/src/tools/_shared/nmcAedGuard.ts +0 -234
  481. package/tui/src/tools/_shared/protectedCheckGuard.ts +0 -207
  482. package/tui/src/tools/_shared/textToolCallGuard.ts +0 -91
@@ -0,0 +1,766 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ """Known-only passive adapters for non-promoted document families."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import csv
7
+ import gzip
8
+ import io
9
+ import json
10
+ import tarfile
11
+ import zipfile
12
+ from html.parser import HTMLParser
13
+ from pathlib import Path
14
+ from typing import TYPE_CHECKING
15
+ from xml.etree import ElementTree as StdElementTree
16
+
17
+ import yaml
18
+ from defusedxml import ElementTree # type: ignore[import-untyped]
19
+
20
+ from ummaya.tools.documents.models import (
21
+ DocumentExtraction,
22
+ DocumentFormat,
23
+ ImageReference,
24
+ KnownDocumentFormat,
25
+ MetadataValue,
26
+ ParagraphBlock,
27
+ TableBlock,
28
+ TableCell,
29
+ )
30
+
31
+ if TYPE_CHECKING:
32
+ from ummaya.tools.documents.tool_defs import DocumentFieldPatch
33
+
34
+
35
+ _ODF_FORMATS: tuple[KnownDocumentFormat, ...] = (
36
+ KnownDocumentFormat.odt,
37
+ KnownDocumentFormat.ods,
38
+ KnownDocumentFormat.odp,
39
+ )
40
+ _DATA_FORMATS: tuple[KnownDocumentFormat, ...] = (
41
+ KnownDocumentFormat.csv,
42
+ KnownDocumentFormat.tsv,
43
+ KnownDocumentFormat.xml,
44
+ KnownDocumentFormat.rdf,
45
+ KnownDocumentFormat.ttl,
46
+ KnownDocumentFormat.lod,
47
+ KnownDocumentFormat.json,
48
+ KnownDocumentFormat.jsonl,
49
+ KnownDocumentFormat.yaml,
50
+ KnownDocumentFormat.yml,
51
+ KnownDocumentFormat.geojson,
52
+ KnownDocumentFormat.gpx,
53
+ KnownDocumentFormat.kml,
54
+ KnownDocumentFormat.fasta,
55
+ KnownDocumentFormat.sgml,
56
+ KnownDocumentFormat.dtd,
57
+ KnownDocumentFormat.hml,
58
+ KnownDocumentFormat.etc,
59
+ )
60
+ _TEXT_WEB_FORMATS: tuple[KnownDocumentFormat, ...] = (
61
+ KnownDocumentFormat.html,
62
+ KnownDocumentFormat.htm,
63
+ KnownDocumentFormat.txt,
64
+ KnownDocumentFormat.rtf,
65
+ KnownDocumentFormat.md,
66
+ )
67
+ _LEGACY_OFFICE_FORMATS: tuple[KnownDocumentFormat, ...] = (
68
+ KnownDocumentFormat.doc,
69
+ KnownDocumentFormat.xls,
70
+ KnownDocumentFormat.ppt,
71
+ )
72
+ _CODE_FORMATS: tuple[KnownDocumentFormat, ...] = (KnownDocumentFormat.python,)
73
+ _IMAGE_FORMATS: tuple[KnownDocumentFormat, ...] = (
74
+ KnownDocumentFormat.png,
75
+ KnownDocumentFormat.jpg,
76
+ KnownDocumentFormat.jpeg,
77
+ KnownDocumentFormat.gif,
78
+ KnownDocumentFormat.tif,
79
+ KnownDocumentFormat.tiff,
80
+ KnownDocumentFormat.bmp,
81
+ KnownDocumentFormat.webp,
82
+ )
83
+ _GEOSPATIAL_FORMATS: tuple[KnownDocumentFormat, ...] = (
84
+ KnownDocumentFormat.shp,
85
+ KnownDocumentFormat.shx,
86
+ KnownDocumentFormat.dbf,
87
+ KnownDocumentFormat.prj,
88
+ KnownDocumentFormat.stl,
89
+ )
90
+ _MEDIA_FORMATS: tuple[KnownDocumentFormat, ...] = (
91
+ KnownDocumentFormat.wav,
92
+ KnownDocumentFormat.mp3,
93
+ KnownDocumentFormat.mp4,
94
+ )
95
+ _ARCHIVE_FORMATS: tuple[KnownDocumentFormat, ...] = (
96
+ KnownDocumentFormat.epub,
97
+ KnownDocumentFormat.zip,
98
+ KnownDocumentFormat.seven_z,
99
+ KnownDocumentFormat.tar,
100
+ KnownDocumentFormat.gz,
101
+ )
102
+
103
+ _KNOWN_BY_EXTENSION = {
104
+ ".odt": KnownDocumentFormat.odt,
105
+ ".ods": KnownDocumentFormat.ods,
106
+ ".odp": KnownDocumentFormat.odp,
107
+ ".doc": KnownDocumentFormat.doc,
108
+ ".xls": KnownDocumentFormat.xls,
109
+ ".ppt": KnownDocumentFormat.ppt,
110
+ ".csv": KnownDocumentFormat.csv,
111
+ ".tsv": KnownDocumentFormat.tsv,
112
+ ".xml": KnownDocumentFormat.xml,
113
+ ".rdf": KnownDocumentFormat.rdf,
114
+ ".ttl": KnownDocumentFormat.ttl,
115
+ ".lod": KnownDocumentFormat.lod,
116
+ ".json": KnownDocumentFormat.json,
117
+ ".jsonl": KnownDocumentFormat.jsonl,
118
+ ".yaml": KnownDocumentFormat.yaml,
119
+ ".yml": KnownDocumentFormat.yml,
120
+ ".geojson": KnownDocumentFormat.geojson,
121
+ ".gpx": KnownDocumentFormat.gpx,
122
+ ".kml": KnownDocumentFormat.kml,
123
+ ".fasta": KnownDocumentFormat.fasta,
124
+ ".sgml": KnownDocumentFormat.sgml,
125
+ ".dtd": KnownDocumentFormat.dtd,
126
+ ".hml": KnownDocumentFormat.hml,
127
+ ".etc": KnownDocumentFormat.etc,
128
+ ".py": KnownDocumentFormat.python,
129
+ ".html": KnownDocumentFormat.html,
130
+ ".htm": KnownDocumentFormat.htm,
131
+ ".txt": KnownDocumentFormat.txt,
132
+ ".rtf": KnownDocumentFormat.rtf,
133
+ ".md": KnownDocumentFormat.md,
134
+ ".png": KnownDocumentFormat.png,
135
+ ".jpg": KnownDocumentFormat.jpg,
136
+ ".jpeg": KnownDocumentFormat.jpeg,
137
+ ".gif": KnownDocumentFormat.gif,
138
+ ".tif": KnownDocumentFormat.tif,
139
+ ".tiff": KnownDocumentFormat.tiff,
140
+ ".bmp": KnownDocumentFormat.bmp,
141
+ ".webp": KnownDocumentFormat.webp,
142
+ ".shp": KnownDocumentFormat.shp,
143
+ ".shx": KnownDocumentFormat.shx,
144
+ ".dbf": KnownDocumentFormat.dbf,
145
+ ".prj": KnownDocumentFormat.prj,
146
+ ".stl": KnownDocumentFormat.stl,
147
+ ".wav": KnownDocumentFormat.wav,
148
+ ".mp3": KnownDocumentFormat.mp3,
149
+ ".mp4": KnownDocumentFormat.mp4,
150
+ ".epub": KnownDocumentFormat.epub,
151
+ ".zip": KnownDocumentFormat.zip,
152
+ ".7z": KnownDocumentFormat.seven_z,
153
+ ".tar": KnownDocumentFormat.tar,
154
+ ".gz": KnownDocumentFormat.gz,
155
+ }
156
+
157
+ _IMAGE_MIME = {
158
+ KnownDocumentFormat.png: "image/png",
159
+ KnownDocumentFormat.jpg: "image/jpeg",
160
+ KnownDocumentFormat.jpeg: "image/jpeg",
161
+ KnownDocumentFormat.gif: "image/gif",
162
+ KnownDocumentFormat.tif: "image/tiff",
163
+ KnownDocumentFormat.tiff: "image/tiff",
164
+ KnownDocumentFormat.bmp: "image/bmp",
165
+ KnownDocumentFormat.webp: "image/webp",
166
+ }
167
+
168
+
169
+ class _KnownOnlyAdapterBase:
170
+ """Shared known-only adapter behavior."""
171
+
172
+ adapter_id: str
173
+ known_formats: tuple[KnownDocumentFormat, ...]
174
+ promoted_formats: tuple[DocumentFormat, ...] = ()
175
+
176
+ @property
177
+ def engine_id(self) -> str:
178
+ """Return adapter id for diagnostics."""
179
+ return self.adapter_id
180
+
181
+ def normalize_fill_patches(
182
+ self,
183
+ patches: tuple[DocumentFieldPatch, ...],
184
+ *,
185
+ extraction: DocumentExtraction | None,
186
+ ) -> tuple[DocumentFieldPatch, ...]:
187
+ """No passive adapter normalizes fill patches because writes are not promoted."""
188
+ _ = extraction
189
+ return patches
190
+
191
+
192
+ class OdfDocumentAdapter(_KnownOnlyAdapterBase):
193
+ """Read-only ODF package candidate backed by ZIP/XML inspection."""
194
+
195
+ adapter_id = "odf-package-read-only-adapter"
196
+ known_formats = _ODF_FORMATS
197
+
198
+ def inspect(self, path: Path, *, artifact_id: str) -> DocumentExtraction:
199
+ """Extract ODF package text from content.xml without claiming mutation."""
200
+ known_format = _known_format(path)
201
+ paragraphs: list[ParagraphBlock] = []
202
+ warnings: list[str] = []
203
+ metadata: dict[str, MetadataValue] = _base_metadata(
204
+ path,
205
+ known_format=known_format,
206
+ adapter_id=self.adapter_id,
207
+ mutation_policy="read_only_odf_candidate",
208
+ )
209
+ try:
210
+ with zipfile.ZipFile(path) as archive:
211
+ metadata["package_entry_count"] = len(archive.infolist())
212
+ if "content.xml" in archive.namelist():
213
+ root = ElementTree.fromstring(archive.read("content.xml"))
214
+ paragraphs = _paragraphs_from_text_lines(
215
+ artifact_id,
216
+ _xml_text_lines(root),
217
+ source_prefix="content.xml",
218
+ )
219
+ else:
220
+ warnings.append("ODF package does not contain content.xml.")
221
+ except zipfile.BadZipFile:
222
+ warnings.append("ODF read-only candidate could not open the package as ZIP.")
223
+
224
+ return DocumentExtraction(
225
+ artifact_id=artifact_id,
226
+ paragraphs=paragraphs,
227
+ metadata=metadata,
228
+ warnings=warnings,
229
+ )
230
+
231
+
232
+ class DataFileDocumentAdapter(_KnownOnlyAdapterBase):
233
+ """Read-only data-file adapter with serializer round-trip evidence."""
234
+
235
+ adapter_id = "data-file-read-only-adapter"
236
+ known_formats = _DATA_FORMATS
237
+
238
+ def inspect(self, path: Path, *, artifact_id: str) -> DocumentExtraction:
239
+ """Parse structured data files through safe local serializers."""
240
+ known_format = _known_format(path)
241
+ if known_format in {KnownDocumentFormat.csv, KnownDocumentFormat.tsv}:
242
+ return _inspect_delimited(path, artifact_id=artifact_id, known_format=known_format)
243
+ if known_format in {
244
+ KnownDocumentFormat.json,
245
+ KnownDocumentFormat.jsonl,
246
+ KnownDocumentFormat.geojson,
247
+ }:
248
+ return _inspect_json(path, artifact_id=artifact_id, known_format=known_format)
249
+ if known_format in {KnownDocumentFormat.yaml, KnownDocumentFormat.yml}:
250
+ return _inspect_yaml(path, artifact_id=artifact_id, known_format=known_format)
251
+ if known_format in {
252
+ KnownDocumentFormat.xml,
253
+ KnownDocumentFormat.rdf,
254
+ KnownDocumentFormat.gpx,
255
+ KnownDocumentFormat.kml,
256
+ KnownDocumentFormat.hml,
257
+ }:
258
+ return _inspect_xml(path, artifact_id=artifact_id, known_format=known_format)
259
+ if known_format in {
260
+ KnownDocumentFormat.ttl,
261
+ KnownDocumentFormat.lod,
262
+ KnownDocumentFormat.fasta,
263
+ KnownDocumentFormat.sgml,
264
+ KnownDocumentFormat.dtd,
265
+ KnownDocumentFormat.etc,
266
+ }:
267
+ return _inspect_text_data(path, artifact_id=artifact_id, known_format=known_format)
268
+ return DocumentExtraction(
269
+ artifact_id=artifact_id,
270
+ metadata=_base_metadata(
271
+ path,
272
+ known_format=known_format,
273
+ adapter_id=self.adapter_id,
274
+ mutation_policy="read_only_data_file",
275
+ ),
276
+ warnings=[f"No passive data parser is implemented for {known_format.value}."],
277
+ )
278
+
279
+
280
+ class LegacyOfficeDocumentAdapter(_KnownOnlyAdapterBase):
281
+ """Metadata-only adapter for pre-OOXML Office binaries."""
282
+
283
+ adapter_id = "legacy-office-metadata-only-adapter"
284
+ known_formats = _LEGACY_OFFICE_FORMATS
285
+
286
+ def inspect(self, path: Path, *, artifact_id: str) -> DocumentExtraction:
287
+ """Identify legacy Office documents without parsing binary internals."""
288
+ known_format = _known_format(path)
289
+ return DocumentExtraction(
290
+ artifact_id=artifact_id,
291
+ metadata=_base_metadata(
292
+ path,
293
+ known_format=known_format,
294
+ adapter_id=self.adapter_id,
295
+ mutation_policy="conversion_required_legacy_office",
296
+ ),
297
+ warnings=[
298
+ "Legacy Office binary inspection is metadata-only until an explicit local "
299
+ "conversion bridge is approved."
300
+ ],
301
+ )
302
+
303
+
304
+ class TextWebExportAdapter(_KnownOnlyAdapterBase):
305
+ """Read-only HTML, text, RTF, and Markdown export adapter."""
306
+
307
+ adapter_id = "text-web-export-read-only-adapter"
308
+ known_formats = _TEXT_WEB_FORMATS
309
+
310
+ def inspect(self, path: Path, *, artifact_id: str) -> DocumentExtraction:
311
+ """Extract visible text lines from text and web-export formats."""
312
+ known_format = _known_format(path)
313
+ payload = path.read_text(encoding="utf-8", errors="replace")
314
+ lines = (
315
+ _html_text_lines(payload)
316
+ if known_format in {KnownDocumentFormat.html, KnownDocumentFormat.htm}
317
+ else _plain_text_lines(_strip_minimal_rtf(payload))
318
+ )
319
+ return DocumentExtraction(
320
+ artifact_id=artifact_id,
321
+ paragraphs=_paragraphs_from_text_lines(
322
+ artifact_id,
323
+ lines,
324
+ source_prefix=path.name,
325
+ ),
326
+ metadata=_base_metadata(
327
+ path,
328
+ known_format=known_format,
329
+ adapter_id=self.adapter_id,
330
+ mutation_policy="read_only_text_export",
331
+ ),
332
+ )
333
+
334
+
335
+ class CodeFileDocumentAdapter(_KnownOnlyAdapterBase):
336
+ """Read-only source-code export adapter for public-data attachments."""
337
+
338
+ adapter_id = "code-file-read-only-adapter"
339
+ known_formats = _CODE_FORMATS
340
+
341
+ def inspect(self, path: Path, *, artifact_id: str) -> DocumentExtraction:
342
+ """Extract source lines for context without using the document writer."""
343
+ payload = path.read_text(encoding="utf-8", errors="replace")
344
+ return DocumentExtraction(
345
+ artifact_id=artifact_id,
346
+ paragraphs=_paragraphs_from_text_lines(
347
+ artifact_id,
348
+ _plain_text_lines(payload)[:200],
349
+ source_prefix=path.name,
350
+ ),
351
+ metadata=_base_metadata(
352
+ path,
353
+ known_format=_known_format(path),
354
+ adapter_id=self.adapter_id,
355
+ mutation_policy="read_only_code_file",
356
+ ),
357
+ warnings=["Code files are not public-form documents and cannot be mutated here."],
358
+ )
359
+
360
+
361
+ class ImageScanDocumentAdapter(_KnownOnlyAdapterBase):
362
+ """Extraction-only image/scan adapter."""
363
+
364
+ adapter_id = "image-scan-extraction-only-adapter"
365
+ known_formats = _IMAGE_FORMATS
366
+
367
+ def inspect(self, path: Path, *, artifact_id: str) -> DocumentExtraction:
368
+ """Return an image reference without claiming OCR or write support."""
369
+ known_format = _known_format(path)
370
+ return DocumentExtraction(
371
+ artifact_id=artifact_id,
372
+ images=[
373
+ ImageReference(
374
+ image_id=f"image-{known_format.value}",
375
+ source_path=str(path),
376
+ content_type=_IMAGE_MIME.get(known_format, "image/unknown"),
377
+ )
378
+ ],
379
+ metadata=_base_metadata(
380
+ path,
381
+ known_format=known_format,
382
+ adapter_id=self.adapter_id,
383
+ mutation_policy="extraction_only",
384
+ ),
385
+ warnings=["Image scan adapter does not mutate raster originals."],
386
+ )
387
+
388
+
389
+ class GeospatialDocumentAdapter(_KnownOnlyAdapterBase):
390
+ """Metadata-only geospatial and 3D model adapter."""
391
+
392
+ adapter_id = "geospatial-metadata-only-adapter"
393
+ known_formats = _GEOSPATIAL_FORMATS
394
+
395
+ def inspect(self, path: Path, *, artifact_id: str) -> DocumentExtraction:
396
+ """Classify GIS/model artifacts without claiming document editing."""
397
+ known_format = _known_format(path)
398
+ paragraphs = (
399
+ _paragraphs_from_text_lines(
400
+ artifact_id,
401
+ _plain_text_lines(path.read_text(encoding="utf-8", errors="replace"))[:40],
402
+ source_prefix=path.name,
403
+ )
404
+ if known_format in {KnownDocumentFormat.prj, KnownDocumentFormat.stl}
405
+ else []
406
+ )
407
+ return DocumentExtraction(
408
+ artifact_id=artifact_id,
409
+ paragraphs=paragraphs,
410
+ metadata=_base_metadata(
411
+ path,
412
+ known_format=known_format,
413
+ adapter_id=self.adapter_id,
414
+ mutation_policy="metadata_only_geospatial_asset",
415
+ ),
416
+ warnings=[
417
+ "Geospatial and 3D geometry files are classified for routing, not mutated "
418
+ "as public documents."
419
+ ],
420
+ )
421
+
422
+
423
+ class MediaAssetDocumentAdapter(_KnownOnlyAdapterBase):
424
+ """Metadata-only audio/video adapter."""
425
+
426
+ adapter_id = "media-asset-metadata-only-adapter"
427
+ known_formats = _MEDIA_FORMATS
428
+
429
+ def inspect(self, path: Path, *, artifact_id: str) -> DocumentExtraction:
430
+ """Classify media attachments without transcription or mutation claims."""
431
+ return DocumentExtraction(
432
+ artifact_id=artifact_id,
433
+ metadata=_base_metadata(
434
+ path,
435
+ known_format=_known_format(path),
436
+ adapter_id=self.adapter_id,
437
+ mutation_policy="metadata_only_media_asset",
438
+ ),
439
+ warnings=[
440
+ "Media files need a dedicated transcription or extraction adapter before "
441
+ "content can be written into a public document derivative."
442
+ ],
443
+ )
444
+
445
+
446
+ class ArchiveDocumentSetAdapter(_KnownOnlyAdapterBase):
447
+ """Read-only archive enumerator for secure child routing."""
448
+
449
+ adapter_id = "archive-document-set-read-only-adapter"
450
+ known_formats = _ARCHIVE_FORMATS
451
+
452
+ def __init__(
453
+ self,
454
+ known_formats: tuple[KnownDocumentFormat, ...] | None = None,
455
+ ) -> None:
456
+ self.known_formats = known_formats or _ARCHIVE_FORMATS
457
+
458
+ def inspect(self, path: Path, *, artifact_id: str) -> DocumentExtraction:
459
+ """Enumerate archive members without mutating children in place."""
460
+ known_format = _known_format(path)
461
+ names, warnings = _archive_member_names(path, known_format=known_format)
462
+ metadata = _base_metadata(
463
+ path,
464
+ known_format=known_format,
465
+ adapter_id=self.adapter_id,
466
+ mutation_policy="archive_read_only",
467
+ )
468
+ metadata["entry_count"] = len(names)
469
+ metadata["child_mutation_policy"] = "route_children_as_derivatives"
470
+ return DocumentExtraction(
471
+ artifact_id=artifact_id,
472
+ paragraphs=_paragraphs_from_text_lines(
473
+ artifact_id,
474
+ names,
475
+ source_prefix=path.name,
476
+ ),
477
+ metadata=metadata,
478
+ warnings=warnings,
479
+ )
480
+
481
+
482
+ def _inspect_delimited(
483
+ path: Path,
484
+ *,
485
+ artifact_id: str,
486
+ known_format: KnownDocumentFormat,
487
+ ) -> DocumentExtraction:
488
+ delimiter = "\t" if known_format is KnownDocumentFormat.tsv else ","
489
+ payload = path.read_text(encoding="utf-8-sig", errors="replace")
490
+ rows = list(csv.reader(io.StringIO(payload), delimiter=delimiter))
491
+ serialized = io.StringIO()
492
+ writer = csv.writer(serialized, delimiter=delimiter, lineterminator="\n")
493
+ writer.writerows(rows)
494
+ reparsed = list(csv.reader(io.StringIO(serialized.getvalue()), delimiter=delimiter))
495
+ metadata = _base_metadata(
496
+ path,
497
+ known_format=known_format,
498
+ adapter_id=DataFileDocumentAdapter.adapter_id,
499
+ mutation_policy="read_only_data_file",
500
+ )
501
+ metadata.update(
502
+ {
503
+ "serializer": known_format.value,
504
+ "round_trip_passed": rows == reparsed,
505
+ "row_count": len(rows),
506
+ "column_count": max((len(row) for row in rows), default=0),
507
+ }
508
+ )
509
+ return DocumentExtraction(
510
+ artifact_id=artifact_id,
511
+ tables=[_table_from_rows(rows, source_path=path.name)],
512
+ paragraphs=_paragraphs_from_text_lines(
513
+ artifact_id,
514
+ [",".join(row) for row in rows],
515
+ source_prefix=path.name,
516
+ ),
517
+ metadata=metadata,
518
+ )
519
+
520
+
521
+ def _inspect_json(
522
+ path: Path,
523
+ *,
524
+ artifact_id: str,
525
+ known_format: KnownDocumentFormat,
526
+ ) -> DocumentExtraction:
527
+ payload = path.read_text(encoding="utf-8")
528
+ if known_format is KnownDocumentFormat.jsonl:
529
+ values = [json.loads(line) for line in payload.splitlines() if line.strip()]
530
+ serialized = "\n".join(
531
+ json.dumps(value, ensure_ascii=False, sort_keys=True) for value in values
532
+ )
533
+ reparsed: object = [json.loads(line) for line in serialized.splitlines()]
534
+ parsed: object = values
535
+ else:
536
+ parsed = json.loads(payload)
537
+ serialized = json.dumps(parsed, ensure_ascii=False, sort_keys=True)
538
+ reparsed = json.loads(serialized)
539
+ return _structured_data_extraction(
540
+ artifact_id,
541
+ path=path,
542
+ known_format=known_format,
543
+ serializer=known_format.value,
544
+ parsed=parsed,
545
+ round_trip_passed=parsed == reparsed,
546
+ )
547
+
548
+
549
+ def _inspect_yaml(
550
+ path: Path,
551
+ *,
552
+ artifact_id: str,
553
+ known_format: KnownDocumentFormat,
554
+ ) -> DocumentExtraction:
555
+ parsed = yaml.safe_load(path.read_text(encoding="utf-8"))
556
+ serialized = yaml.safe_dump(parsed, allow_unicode=True, sort_keys=True)
557
+ return _structured_data_extraction(
558
+ artifact_id,
559
+ path=path,
560
+ known_format=known_format,
561
+ serializer="yaml.safe_load/safe_dump",
562
+ parsed=parsed,
563
+ round_trip_passed=parsed == yaml.safe_load(serialized),
564
+ )
565
+
566
+
567
+ def _inspect_xml(
568
+ path: Path,
569
+ *,
570
+ artifact_id: str,
571
+ known_format: KnownDocumentFormat,
572
+ ) -> DocumentExtraction:
573
+ root = ElementTree.fromstring(path.read_bytes())
574
+ serialized = StdElementTree.tostring(root, encoding="unicode")
575
+ reparsed = ElementTree.fromstring(serialized.encode("utf-8"))
576
+ return _structured_data_extraction(
577
+ artifact_id,
578
+ path=path,
579
+ known_format=known_format,
580
+ serializer="defusedxml.ElementTree",
581
+ parsed={"root_tag": _local_name(root.tag), "text": " ".join(_xml_text_lines(root))},
582
+ round_trip_passed=_local_name(root.tag) == _local_name(reparsed.tag),
583
+ )
584
+
585
+
586
+ def _inspect_text_data(
587
+ path: Path,
588
+ *,
589
+ artifact_id: str,
590
+ known_format: KnownDocumentFormat,
591
+ ) -> DocumentExtraction:
592
+ payload = path.read_text(encoding="utf-8", errors="replace")
593
+ lines = _plain_text_lines(payload)[:200]
594
+ metadata = _base_metadata(
595
+ path,
596
+ known_format=known_format,
597
+ adapter_id=DataFileDocumentAdapter.adapter_id,
598
+ mutation_policy="read_only_data_file",
599
+ )
600
+ metadata["serializer"] = "plain-text-preview"
601
+ metadata["round_trip_passed"] = True
602
+ metadata["line_count"] = len(lines)
603
+ return DocumentExtraction(
604
+ artifact_id=artifact_id,
605
+ paragraphs=_paragraphs_from_text_lines(
606
+ artifact_id,
607
+ lines,
608
+ source_prefix=path.name,
609
+ ),
610
+ metadata=metadata,
611
+ )
612
+
613
+
614
+ def _structured_data_extraction(
615
+ artifact_id: str,
616
+ *,
617
+ path: Path,
618
+ known_format: KnownDocumentFormat,
619
+ serializer: str,
620
+ parsed: object,
621
+ round_trip_passed: bool,
622
+ ) -> DocumentExtraction:
623
+ metadata = _base_metadata(
624
+ path,
625
+ known_format=known_format,
626
+ adapter_id=DataFileDocumentAdapter.adapter_id,
627
+ mutation_policy="read_only_data_file",
628
+ )
629
+ metadata.update(
630
+ {
631
+ "serializer": serializer,
632
+ "round_trip_passed": round_trip_passed,
633
+ "root_type": type(parsed).__name__,
634
+ }
635
+ )
636
+ lines = _structured_preview_lines(parsed)
637
+ return DocumentExtraction(
638
+ artifact_id=artifact_id,
639
+ paragraphs=_paragraphs_from_text_lines(
640
+ artifact_id,
641
+ lines,
642
+ source_prefix=path.name,
643
+ ),
644
+ metadata=metadata,
645
+ )
646
+
647
+
648
+ def _table_from_rows(rows: list[list[str]], *, source_path: str) -> TableBlock:
649
+ cells: list[TableCell] = []
650
+ for row_index, row in enumerate(rows):
651
+ for column_index, value in enumerate(row):
652
+ cells.append(
653
+ TableCell(
654
+ row_index=row_index,
655
+ column_index=column_index,
656
+ text=value,
657
+ source_path=f"{source_path}#r{row_index + 1}c{column_index + 1}",
658
+ )
659
+ )
660
+ return TableBlock(block_id="data-table-001", source_path=source_path, cells=cells)
661
+
662
+
663
+ def _archive_member_names(
664
+ path: Path,
665
+ *,
666
+ known_format: KnownDocumentFormat,
667
+ ) -> tuple[list[str], list[str]]:
668
+ if known_format is KnownDocumentFormat.zip:
669
+ with zipfile.ZipFile(path) as archive:
670
+ return _safe_member_names(archive.namelist()), []
671
+ if known_format is KnownDocumentFormat.tar:
672
+ with tarfile.open(path) as archive:
673
+ return _safe_member_names(archive.getnames()), []
674
+ if known_format is KnownDocumentFormat.gz:
675
+ with gzip.open(path) as payload:
676
+ payload.read(1)
677
+ return [path.with_suffix("").name or path.name], [
678
+ "Gzip payload is treated as one compressed child candidate."
679
+ ]
680
+ return [], ["7z archive enumeration is known but not promoted without a 7z runtime."]
681
+
682
+
683
+ def _safe_member_names(names: list[str]) -> list[str]:
684
+ return sorted(name for name in names if name and not name.startswith("/") and ".." not in name)
685
+
686
+
687
+ def _html_text_lines(payload: str) -> list[str]:
688
+ parser = _VisibleTextParser()
689
+ parser.feed(payload)
690
+ return parser.lines
691
+
692
+
693
+ def _strip_minimal_rtf(payload: str) -> str:
694
+ if not payload.lstrip().startswith("{\\rtf"):
695
+ return payload
696
+ stripped = payload.replace("\\par", "\n")
697
+ return "".join(ch for ch in stripped if ch not in "{}")
698
+
699
+
700
+ def _plain_text_lines(payload: str) -> list[str]:
701
+ return [line.strip() for line in payload.splitlines() if line.strip()]
702
+
703
+
704
+ def _xml_text_lines(root: StdElementTree.Element) -> list[str]:
705
+ return [text.strip() for text in root.itertext() if text and text.strip()]
706
+
707
+
708
+ def _structured_preview_lines(value: object) -> list[str]:
709
+ if isinstance(value, dict):
710
+ return [f"{key}: {preview}" for key, preview in list(value.items())[:20]]
711
+ if isinstance(value, list):
712
+ return [json.dumps(item, ensure_ascii=False, sort_keys=True) for item in value[:20]]
713
+ return [str(value)]
714
+
715
+
716
+ def _paragraphs_from_text_lines(
717
+ artifact_id: str,
718
+ lines: list[str],
719
+ *,
720
+ source_prefix: str,
721
+ ) -> list[ParagraphBlock]:
722
+ return [
723
+ ParagraphBlock(
724
+ block_id=f"{artifact_id}-line-{index:03d}",
725
+ text=line,
726
+ source_path=f"{source_prefix}#line[{index}]",
727
+ )
728
+ for index, line in enumerate(lines, start=1)
729
+ if line
730
+ ]
731
+
732
+
733
+ def _base_metadata(
734
+ path: Path,
735
+ *,
736
+ known_format: KnownDocumentFormat,
737
+ adapter_id: str,
738
+ mutation_policy: str,
739
+ ) -> dict[str, MetadataValue]:
740
+ return {
741
+ "adapter_id": adapter_id,
742
+ "known_format": known_format.value,
743
+ "mutation_policy": mutation_policy,
744
+ "byte_size": path.stat().st_size,
745
+ }
746
+
747
+
748
+ def _known_format(path: Path) -> KnownDocumentFormat:
749
+ return _KNOWN_BY_EXTENSION.get(path.suffix.lower(), KnownDocumentFormat.txt)
750
+
751
+
752
+ def _local_name(tag: str) -> str:
753
+ return tag.rsplit("}", 1)[-1] if "}" in tag else tag
754
+
755
+
756
+ class _VisibleTextParser(HTMLParser):
757
+ """Small HTML text extractor for passive public-form exports."""
758
+
759
+ def __init__(self) -> None:
760
+ super().__init__(convert_charrefs=True)
761
+ self.lines: list[str] = []
762
+
763
+ def handle_data(self, data: str) -> None:
764
+ text = data.strip()
765
+ if text:
766
+ self.lines.append(text)