agent-world 0.13.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (263) hide show
  1. package/README.md +90 -17
  2. package/dist/cli/commands.d.ts +7 -1
  3. package/dist/cli/commands.js +27 -10
  4. package/dist/cli/hitl.d.ts +4 -1
  5. package/dist/cli/hitl.js +55 -20
  6. package/dist/cli/index.js +249 -97
  7. package/dist/cli/system-events.d.ts +27 -0
  8. package/dist/cli/system-events.js +63 -0
  9. package/dist/core/activity-tracker.d.ts +26 -0
  10. package/dist/core/activity-tracker.d.ts.map +1 -1
  11. package/dist/core/activity-tracker.js +21 -4
  12. package/dist/core/activity-tracker.js.map +1 -1
  13. package/dist/core/anthropic-direct.d.ts +2 -0
  14. package/dist/core/anthropic-direct.d.ts.map +1 -1
  15. package/dist/core/anthropic-direct.js +43 -1
  16. package/dist/core/anthropic-direct.js.map +1 -1
  17. package/dist/core/chat-constants.d.ts +12 -0
  18. package/dist/core/chat-constants.d.ts.map +1 -1
  19. package/dist/core/chat-constants.js +5 -0
  20. package/dist/core/chat-constants.js.map +1 -1
  21. package/dist/core/create-agent-tool.d.ts +5 -0
  22. package/dist/core/create-agent-tool.d.ts.map +1 -1
  23. package/dist/core/create-agent-tool.js +57 -34
  24. package/dist/core/create-agent-tool.js.map +1 -1
  25. package/dist/core/events/index.d.ts +5 -2
  26. package/dist/core/events/index.d.ts.map +1 -1
  27. package/dist/core/events/index.js +5 -2
  28. package/dist/core/events/index.js.map +1 -1
  29. package/dist/core/events/memory-manager.d.ts +26 -1
  30. package/dist/core/events/memory-manager.d.ts.map +1 -1
  31. package/dist/core/events/memory-manager.js +877 -72
  32. package/dist/core/events/memory-manager.js.map +1 -1
  33. package/dist/core/events/orchestrator.d.ts +8 -0
  34. package/dist/core/events/orchestrator.d.ts.map +1 -1
  35. package/dist/core/events/orchestrator.js +203 -36
  36. package/dist/core/events/orchestrator.js.map +1 -1
  37. package/dist/core/events/persistence.d.ts +21 -14
  38. package/dist/core/events/persistence.d.ts.map +1 -1
  39. package/dist/core/events/persistence.js +100 -35
  40. package/dist/core/events/persistence.js.map +1 -1
  41. package/dist/core/events/publishers.d.ts +13 -7
  42. package/dist/core/events/publishers.d.ts.map +1 -1
  43. package/dist/core/events/publishers.js +53 -37
  44. package/dist/core/events/publishers.js.map +1 -1
  45. package/dist/core/events/subscribers.d.ts +17 -14
  46. package/dist/core/events/subscribers.d.ts.map +1 -1
  47. package/dist/core/events/subscribers.js +61 -148
  48. package/dist/core/events/subscribers.js.map +1 -1
  49. package/dist/core/events/title-scheduler.d.ts +27 -0
  50. package/dist/core/events/title-scheduler.d.ts.map +1 -0
  51. package/dist/core/events/title-scheduler.js +135 -0
  52. package/dist/core/events/title-scheduler.js.map +1 -0
  53. package/dist/core/events/tool-bridge-logging.d.ts +4 -1
  54. package/dist/core/events/tool-bridge-logging.d.ts.map +1 -1
  55. package/dist/core/events/tool-bridge-logging.js +112 -13
  56. package/dist/core/events/tool-bridge-logging.js.map +1 -1
  57. package/dist/core/events-metadata.d.ts.map +1 -1
  58. package/dist/core/events-metadata.js +8 -4
  59. package/dist/core/events-metadata.js.map +1 -1
  60. package/dist/core/export.d.ts +1 -1
  61. package/dist/core/export.d.ts.map +1 -1
  62. package/dist/core/export.js +2 -15
  63. package/dist/core/export.js.map +1 -1
  64. package/dist/core/feature-path-logging.d.ts +50 -0
  65. package/dist/core/feature-path-logging.d.ts.map +1 -0
  66. package/dist/core/feature-path-logging.js +130 -0
  67. package/dist/core/feature-path-logging.js.map +1 -0
  68. package/dist/core/file-tools.d.ts +57 -1
  69. package/dist/core/file-tools.d.ts.map +1 -1
  70. package/dist/core/file-tools.js +329 -29
  71. package/dist/core/file-tools.js.map +1 -1
  72. package/dist/core/google-direct.d.ts +6 -1
  73. package/dist/core/google-direct.d.ts.map +1 -1
  74. package/dist/core/google-direct.js +76 -7
  75. package/dist/core/google-direct.js.map +1 -1
  76. package/dist/core/heartbeat.d.ts +34 -0
  77. package/dist/core/heartbeat.d.ts.map +1 -0
  78. package/dist/core/heartbeat.js +153 -0
  79. package/dist/core/heartbeat.js.map +1 -0
  80. package/dist/core/hitl-tool.d.ts +6 -12
  81. package/dist/core/hitl-tool.d.ts.map +1 -1
  82. package/dist/core/hitl-tool.js +66 -88
  83. package/dist/core/hitl-tool.js.map +1 -1
  84. package/dist/core/hitl.d.ts +61 -4
  85. package/dist/core/hitl.d.ts.map +1 -1
  86. package/dist/core/hitl.js +324 -60
  87. package/dist/core/hitl.js.map +1 -1
  88. package/dist/core/index.d.ts +11 -7
  89. package/dist/core/index.d.ts.map +1 -1
  90. package/dist/core/index.js +10 -6
  91. package/dist/core/index.js.map +1 -1
  92. package/dist/core/llm-manager.d.ts +15 -0
  93. package/dist/core/llm-manager.d.ts.map +1 -1
  94. package/dist/core/llm-manager.js +325 -40
  95. package/dist/core/llm-manager.js.map +1 -1
  96. package/dist/core/load-skill-tool.d.ts +36 -3
  97. package/dist/core/load-skill-tool.d.ts.map +1 -1
  98. package/dist/core/load-skill-tool.js +807 -93
  99. package/dist/core/load-skill-tool.js.map +1 -1
  100. package/dist/core/logger.d.ts +14 -0
  101. package/dist/core/logger.d.ts.map +1 -1
  102. package/dist/core/logger.js +15 -0
  103. package/dist/core/logger.js.map +1 -1
  104. package/dist/core/managers.d.ts +18 -50
  105. package/dist/core/managers.d.ts.map +1 -1
  106. package/dist/core/managers.js +340 -502
  107. package/dist/core/managers.js.map +1 -1
  108. package/dist/core/mcp-server-registry.d.ts +16 -1
  109. package/dist/core/mcp-server-registry.d.ts.map +1 -1
  110. package/dist/core/mcp-server-registry.js +162 -12
  111. package/dist/core/mcp-server-registry.js.map +1 -1
  112. package/dist/core/message-cutoff.d.ts +29 -0
  113. package/dist/core/message-cutoff.d.ts.map +1 -0
  114. package/dist/core/message-cutoff.js +63 -0
  115. package/dist/core/message-cutoff.js.map +1 -0
  116. package/dist/core/message-edit-manager.d.ts +54 -0
  117. package/dist/core/message-edit-manager.d.ts.map +1 -0
  118. package/dist/core/message-edit-manager.js +602 -0
  119. package/dist/core/message-edit-manager.js.map +1 -0
  120. package/dist/core/message-prep.d.ts +2 -0
  121. package/dist/core/message-prep.d.ts.map +1 -1
  122. package/dist/core/message-prep.js +39 -12
  123. package/dist/core/message-prep.js.map +1 -1
  124. package/dist/core/message-processing-control.d.ts +1 -0
  125. package/dist/core/message-processing-control.d.ts.map +1 -1
  126. package/dist/core/message-processing-control.js +23 -6
  127. package/dist/core/message-processing-control.js.map +1 -1
  128. package/dist/core/openai-direct.d.ts +9 -3
  129. package/dist/core/openai-direct.d.ts.map +1 -1
  130. package/dist/core/openai-direct.js +267 -33
  131. package/dist/core/openai-direct.js.map +1 -1
  132. package/dist/core/optional-tracers/opik-runtime.d.ts +32 -0
  133. package/dist/core/optional-tracers/opik-runtime.d.ts.map +1 -0
  134. package/dist/core/optional-tracers/opik-runtime.js +141 -0
  135. package/dist/core/optional-tracers/opik-runtime.js.map +1 -0
  136. package/dist/core/queue-manager.d.ts +84 -0
  137. package/dist/core/queue-manager.d.ts.map +1 -0
  138. package/dist/core/queue-manager.js +814 -0
  139. package/dist/core/queue-manager.js.map +1 -0
  140. package/dist/core/reasoning-controls.d.ts +30 -0
  141. package/dist/core/reasoning-controls.d.ts.map +1 -0
  142. package/dist/core/reasoning-controls.js +118 -0
  143. package/dist/core/reasoning-controls.js.map +1 -0
  144. package/dist/core/reliability-config.d.ts +82 -0
  145. package/dist/core/reliability-config.d.ts.map +1 -0
  146. package/dist/core/reliability-config.js +106 -0
  147. package/dist/core/reliability-config.js.map +1 -0
  148. package/dist/core/reliability-runtime.d.ts +53 -0
  149. package/dist/core/reliability-runtime.d.ts.map +1 -0
  150. package/dist/core/reliability-runtime.js +92 -0
  151. package/dist/core/reliability-runtime.js.map +1 -0
  152. package/dist/core/security/guardrails.d.ts +21 -0
  153. package/dist/core/security/guardrails.d.ts.map +1 -0
  154. package/dist/core/security/guardrails.js +111 -0
  155. package/dist/core/security/guardrails.js.map +1 -0
  156. package/dist/core/send-message-tool.d.ts +79 -0
  157. package/dist/core/send-message-tool.d.ts.map +1 -0
  158. package/dist/core/send-message-tool.js +222 -0
  159. package/dist/core/send-message-tool.js.map +1 -0
  160. package/dist/core/shell-cmd-tool.d.ts +82 -1
  161. package/dist/core/shell-cmd-tool.d.ts.map +1 -1
  162. package/dist/core/shell-cmd-tool.js +854 -42
  163. package/dist/core/shell-cmd-tool.js.map +1 -1
  164. package/dist/core/skill-registry.d.ts +2 -0
  165. package/dist/core/skill-registry.d.ts.map +1 -1
  166. package/dist/core/skill-registry.js +52 -2
  167. package/dist/core/skill-registry.js.map +1 -1
  168. package/dist/core/storage/eventStorage/fileEventStorage.d.ts +5 -0
  169. package/dist/core/storage/eventStorage/fileEventStorage.d.ts.map +1 -1
  170. package/dist/core/storage/eventStorage/fileEventStorage.js +61 -0
  171. package/dist/core/storage/eventStorage/fileEventStorage.js.map +1 -1
  172. package/dist/core/storage/eventStorage/memoryEventStorage.d.ts +5 -0
  173. package/dist/core/storage/eventStorage/memoryEventStorage.d.ts.map +1 -1
  174. package/dist/core/storage/eventStorage/memoryEventStorage.js +34 -0
  175. package/dist/core/storage/eventStorage/memoryEventStorage.js.map +1 -1
  176. package/dist/core/storage/eventStorage/sqliteEventStorage.d.ts +1 -0
  177. package/dist/core/storage/eventStorage/sqliteEventStorage.d.ts.map +1 -1
  178. package/dist/core/storage/eventStorage/sqliteEventStorage.js +19 -2
  179. package/dist/core/storage/eventStorage/sqliteEventStorage.js.map +1 -1
  180. package/dist/core/storage/eventStorage/types.d.ts +6 -0
  181. package/dist/core/storage/eventStorage/types.d.ts.map +1 -1
  182. package/dist/core/storage/eventStorage/types.js +1 -0
  183. package/dist/core/storage/eventStorage/types.js.map +1 -1
  184. package/dist/core/storage/eventStorage/validation.d.ts.map +1 -1
  185. package/dist/core/storage/eventStorage/validation.js +2 -1
  186. package/dist/core/storage/eventStorage/validation.js.map +1 -1
  187. package/dist/core/storage/github-world-import.d.ts +84 -0
  188. package/dist/core/storage/github-world-import.d.ts.map +1 -0
  189. package/dist/core/storage/github-world-import.js +365 -0
  190. package/dist/core/storage/github-world-import.js.map +1 -0
  191. package/dist/core/storage/memory-storage.d.ts +19 -8
  192. package/dist/core/storage/memory-storage.d.ts.map +1 -1
  193. package/dist/core/storage/memory-storage.js +147 -49
  194. package/dist/core/storage/memory-storage.js.map +1 -1
  195. package/dist/core/storage/queue-storage.d.ts +1 -0
  196. package/dist/core/storage/queue-storage.d.ts.map +1 -1
  197. package/dist/core/storage/queue-storage.js +3 -2
  198. package/dist/core/storage/queue-storage.js.map +1 -1
  199. package/dist/core/storage/sqlite-storage.d.ts +14 -9
  200. package/dist/core/storage/sqlite-storage.d.ts.map +1 -1
  201. package/dist/core/storage/sqlite-storage.js +131 -154
  202. package/dist/core/storage/sqlite-storage.js.map +1 -1
  203. package/dist/core/storage/storage-factory.d.ts +3 -0
  204. package/dist/core/storage/storage-factory.d.ts.map +1 -1
  205. package/dist/core/storage/storage-factory.js +175 -89
  206. package/dist/core/storage/storage-factory.js.map +1 -1
  207. package/dist/core/storage/world-storage.d.ts +1 -1
  208. package/dist/core/storage/world-storage.d.ts.map +1 -1
  209. package/dist/core/storage/world-storage.js +5 -1
  210. package/dist/core/storage/world-storage.js.map +1 -1
  211. package/dist/core/storage-init.d.ts +11 -0
  212. package/dist/core/storage-init.d.ts.map +1 -0
  213. package/dist/core/storage-init.js +122 -0
  214. package/dist/core/storage-init.js.map +1 -0
  215. package/dist/core/subscription.d.ts +8 -1
  216. package/dist/core/subscription.d.ts.map +1 -1
  217. package/dist/core/subscription.js +130 -23
  218. package/dist/core/subscription.js.map +1 -1
  219. package/dist/core/tool-approval.d.ts +45 -0
  220. package/dist/core/tool-approval.d.ts.map +1 -0
  221. package/dist/core/tool-approval.js +223 -0
  222. package/dist/core/tool-approval.js.map +1 -0
  223. package/dist/core/tool-execution-envelope.d.ts +87 -0
  224. package/dist/core/tool-execution-envelope.d.ts.map +1 -0
  225. package/dist/core/tool-execution-envelope.js +168 -0
  226. package/dist/core/tool-execution-envelope.js.map +1 -0
  227. package/dist/core/tool-utils.d.ts +7 -2
  228. package/dist/core/tool-utils.d.ts.map +1 -1
  229. package/dist/core/tool-utils.js +81 -17
  230. package/dist/core/tool-utils.js.map +1 -1
  231. package/dist/core/types.d.ts +67 -19
  232. package/dist/core/types.d.ts.map +1 -1
  233. package/dist/core/types.js +3 -0
  234. package/dist/core/types.js.map +1 -1
  235. package/dist/core/utils.d.ts +7 -0
  236. package/dist/core/utils.d.ts.map +1 -1
  237. package/dist/core/utils.js +71 -21
  238. package/dist/core/utils.js.map +1 -1
  239. package/dist/core/web-fetch-tool.d.ts +72 -0
  240. package/dist/core/web-fetch-tool.d.ts.map +1 -0
  241. package/dist/core/web-fetch-tool.js +491 -0
  242. package/dist/core/web-fetch-tool.js.map +1 -0
  243. package/dist/core/world-registry.d.ts +84 -0
  244. package/dist/core/world-registry.d.ts.map +1 -0
  245. package/dist/core/world-registry.js +247 -0
  246. package/dist/core/world-registry.js.map +1 -0
  247. package/dist/public/assets/index-Be-1xtV-.js +104 -0
  248. package/dist/public/assets/index-tsDdiXDU.css +1 -0
  249. package/dist/public/index.html +2 -2
  250. package/dist/public/mcp-sandbox-proxy.html +148 -0
  251. package/dist/server/api.js +260 -18
  252. package/dist/server/error-response.d.ts +27 -0
  253. package/dist/server/error-response.js +77 -0
  254. package/dist/server/index.d.ts +2 -1
  255. package/dist/server/index.js +6 -2
  256. package/dist/server/sse-handler.d.ts +11 -1
  257. package/dist/server/sse-handler.js +194 -34
  258. package/migrations/0015_add_message_queue.sql +36 -0
  259. package/migrations/0016_add_world_heartbeat.sql +13 -0
  260. package/migrations/0017_add_title_provenance.sql +7 -0
  261. package/package.json +31 -10
  262. package/dist/public/assets/index-BW41BxMy.css +0 -1
  263. package/dist/public/assets/index-kO6UJFwK.js +0 -96
@@ -27,6 +27,21 @@
27
27
  * - Uses universal validation framework for consistent parameter checking
28
28
  *
29
29
  * Recent Changes:
30
+ * - 2026-03-12: Shared tool approval flow now persists durable approval prompt/resolution messages for replay-safe shell approval history.
31
+ * - 2026-03-12: Added `toolPermission` enforcement: 'read' level blocks execution with an error result; 'ask' level forces every invocation through HITL approval regardless of risk tier.
32
+ * - 2026-03-06: Added explicit canonical failure reasons for shell validation/policy failures so approval denials and validation errors no longer masquerade as non-zero exits.
33
+ * - 2026-03-06: Unified shell continuation output on one bounded-preview result contract, removed `smart`-mode branching, and stopped persisting a synthetic assistant stdout mirror message after shell completion.
34
+ * - 2026-03-06: Added canonical shell error-result formatting helper so upstream tool persistence can normalize shell failures without falling back to ad hoc error strings.
35
+ * - 2026-03-05: Hardened timeout termination to target process groups/process trees (SIGTERM + SIGKILL fallback) and removed child-process builtin timeout to keep timeout outcomes deterministic in the tool layer.
36
+ * - 2026-03-05: Switched shell timeout grace config to shared reliability config helper.
37
+ * - 2026-03-01: Prevented `./` and `../` parameter tokens from being misclassified as `<skill-id>/<path>` so non-skill shell paths remain unchanged.
38
+ * - 2026-02-28: Generalized skill-relative path fallback to work with any folder prefix, removing `scripts/`-specific behavior.
39
+ * - 2026-02-28: Added skill-aware script path resolution so `<skill-id>/scripts/<file>` parameters are auto-resolved to absolute paths under the skill root directory.
40
+ * - 2026-02-28: Added deterministic shell risk tiering (`allow`/`hitl_required`/`block`) with per-call HITL approve/deny gating via shared `requestToolApproval` helper for high-risk in-scope commands.
41
+ * - 2026-02-24: Required explicit chatId context for stdout/stderr streaming event emission to preserve chat isolation under strict frontend filtering.
42
+ * - 2026-02-21: Streamed stderr via legacy `tool-stream` events while streaming stdout as assistant SSE; persisted only finalized stdout assistant message after execution completes.
43
+ * - 2026-02-21: Added assistant-style SSE start/chunk/end streaming for shell runtime output so command chunks are delivered as assistant stream events instead of `tool-stream` messages.
44
+ * - 2026-02-21: Added minimal LLM shell-result mode (`status` + `exit_code` semantics) for tool-call continuations, excluding stdout/stderr transcript bodies.
30
45
  * - 2026-02-15: Moved core cwd-boundary enforcement into `executeShellCommand` via optional `trustedWorkingDirectory` execution option.
31
46
  * - 2026-02-15: Added optional `output_format=json` for machine-readable command results.
32
47
  * - 2026-02-15: Added optional `artifact_paths` support with SHA-256 hashing and byte-size metadata for files within trusted scope.
@@ -63,16 +78,22 @@
63
78
  * - Initial implementation for shell_cmd LLM tool
64
79
  */
65
80
  import { spawn } from 'child_process';
66
- import { resolve, join, relative } from 'path';
81
+ import { resolve, join, relative, dirname } from 'path';
67
82
  import { createHash } from 'crypto';
68
83
  import { homedir } from 'os';
69
- import { realpathSync, promises as fsPromises } from 'fs';
84
+ import { existsSync, readdirSync, realpathSync, promises as fsPromises } from 'fs';
70
85
  import { createCategoryLogger } from './logger.js';
86
+ import { getShellTimeoutKillGraceMs } from './reliability-config.js';
71
87
  import { validateToolParameters } from './tool-utils.js';
72
- import { publishSSE } from './events/index.js';
88
+ import { requestToolApproval } from './tool-approval.js';
89
+ import { publishSSE } from './events/publishers.js';
73
90
  import { getDefaultWorkingDirectory, getEnvValueFromText } from './utils.js';
91
+ import { getSkillSourcePath, getSkills } from './skill-registry.js';
92
+ import { buildToolArtifactPreviewUrl, createArtifactToolPreview, createTextToolPreview, serializeToolExecutionEnvelope, } from './tool-execution-envelope.js';
74
93
  import { createShellProcessExecution, transitionShellProcessExecution, attachShellProcessHandle, markShellProcessCancelRequested, listShellProcessExecutions, getShellProcessExecution, cancelShellProcessExecution, deleteShellProcessExecution, stopShellProcessesForChatScope, subscribeShellProcessStatus, clearShellProcessRegistryForTests } from './shell-process-registry.js';
75
94
  const logger = createCategoryLogger('shell-cmd');
95
+ const SHELL_RISK_APPROVE_OPTION = 'approve';
96
+ const SHELL_RISK_DENY_OPTION = 'deny';
76
97
  /**
77
98
  * Resolve directory path, handling tilde expansion and relative paths
78
99
  */
@@ -86,6 +107,28 @@ function resolveDirectory(directory) {
86
107
  return resolve(directory);
87
108
  }
88
109
  const DEFAULT_MIN_OUTPUT_CHARS = 400;
110
+ const DEFAULT_LLM_PREVIEW_OUTPUT_CHARS = 1200;
111
+ function inferShellFailureReason(errorMessage) {
112
+ const normalized = String(errorMessage || '').trim().toLowerCase();
113
+ if (!normalized) {
114
+ return undefined;
115
+ }
116
+ if (normalized.includes('approval required')
117
+ || normalized.includes('request was not approved')
118
+ || normalized.includes('command not executed:')) {
119
+ return 'approval_denied';
120
+ }
121
+ if (normalized.includes('invalid command')
122
+ || normalized.includes('invalid json in tool arguments')
123
+ || normalized.includes('invalid tool call payload')
124
+ || normalized.includes('working directory mismatch')
125
+ || normalized.includes('outside world working directory')
126
+ || normalized.includes('blocked dangerous operation')
127
+ || normalized.includes('cannot be executed')) {
128
+ return 'validation_error';
129
+ }
130
+ return undefined;
131
+ }
89
132
  function buildOutputSnippet(content, maxOutputChars) {
90
133
  if (!content) {
91
134
  return { text: '', truncated: false };
@@ -299,6 +342,181 @@ function tokenizeInlineCommandArgs(command) {
299
342
  function tokenizeCommand(command) {
300
343
  return command.match(/"([^"\\]|\\.)*"|'([^'\\]|\\.)*'|[^\s]+/g) ?? [];
301
344
  }
345
+ function normalizeExecutable(command) {
346
+ const executable = getExecutableName(command).toLowerCase();
347
+ return executable.endsWith('.exe') ? executable.slice(0, -4) : executable;
348
+ }
349
+ function normalizeParameterTokens(parameters) {
350
+ if (!Array.isArray(parameters)) {
351
+ return [];
352
+ }
353
+ return parameters
354
+ .filter((parameter) => typeof parameter === 'string')
355
+ .map((parameter) => stripWrappingQuotes(parameter).trim())
356
+ .filter(Boolean);
357
+ }
358
+ function hasFlag(parameters, aliases) {
359
+ const aliasSet = new Set(aliases.map((alias) => alias.toLowerCase()));
360
+ return parameters.some((parameter) => {
361
+ const lowered = parameter.toLowerCase();
362
+ if (aliasSet.has(lowered))
363
+ return true;
364
+ if (lowered.startsWith('--')) {
365
+ return false;
366
+ }
367
+ if (lowered.startsWith('-') && lowered.length > 2) {
368
+ const shortFlags = lowered.slice(1).split('');
369
+ for (const shortFlag of shortFlags) {
370
+ if (aliasSet.has(`-${shortFlag}`)) {
371
+ return true;
372
+ }
373
+ }
374
+ }
375
+ return false;
376
+ });
377
+ }
378
+ function isSystemCriticalPath(token) {
379
+ const normalized = token.trim().replace(/\\/g, '/').toLowerCase();
380
+ if (!normalized)
381
+ return false;
382
+ if (normalized === '/' || normalized === '~' || normalized === '/root') {
383
+ return true;
384
+ }
385
+ if (/^[a-z]:\/$/.test(normalized)) {
386
+ return true;
387
+ }
388
+ const criticalPrefixes = [
389
+ '/etc',
390
+ '/usr',
391
+ '/bin',
392
+ '/sbin',
393
+ '/lib',
394
+ '/opt',
395
+ '/var',
396
+ '/system',
397
+ '/library',
398
+ '/private',
399
+ '/proc',
400
+ '/sys',
401
+ '/dev'
402
+ ];
403
+ return criticalPrefixes.some((prefix) => normalized === prefix || normalized.startsWith(`${prefix}/`));
404
+ }
405
+ function hasWildcardTarget(parameters) {
406
+ return parameters.some((token) => token.includes('*') || token.includes('?'));
407
+ }
408
+ function assessRmRisk(parameters) {
409
+ const hasRecursive = hasFlag(parameters, ['-r', '-R', '--recursive']);
410
+ const hasForce = hasFlag(parameters, ['-f', '--force']);
411
+ const hasNoPreserveRoot = hasFlag(parameters, ['--no-preserve-root']);
412
+ const pathTargets = parameters
413
+ .map((token) => extractPathToken(token) ?? token)
414
+ .map((token) => stripWrappingQuotes(token));
415
+ const hasCriticalTarget = pathTargets.some((token) => isSystemCriticalPath(token));
416
+ if (hasNoPreserveRoot || (hasRecursive && hasForce && hasCriticalTarget)) {
417
+ return {
418
+ tier: 'block',
419
+ reason: 'catastrophic_delete_target',
420
+ tags: ['risk:destructive', 'risk:delete', 'risk:critical-target']
421
+ };
422
+ }
423
+ return {
424
+ tier: 'hitl_required',
425
+ reason: hasWildcardTarget(parameters) ? 'destructive_delete_wildcard' : 'destructive_delete',
426
+ tags: ['risk:destructive', 'risk:delete']
427
+ };
428
+ }
429
+ export function classifyShellCommandRisk(command, parameters) {
430
+ if (typeof command !== 'string' || !command.trim()) {
431
+ return {
432
+ tier: 'allow',
433
+ reason: 'invalid_or_empty_command',
434
+ tags: ['risk:none']
435
+ };
436
+ }
437
+ const executable = normalizeExecutable(command);
438
+ const parameterTokens = normalizeParameterTokens(parameters);
439
+ const hasUrl = parameterTokens.some((token) => /^https?:\/\//i.test(token));
440
+ if (['rm', 'rmdir', 'unlink', 'del', 'erase'].includes(executable)) {
441
+ return assessRmRisk(parameterTokens);
442
+ }
443
+ if (['mkfs', 'mkfs.ext4', 'mkfs.xfs', 'mkfs.btrfs', 'fdisk', 'sfdisk', 'parted'].includes(executable)) {
444
+ return {
445
+ tier: 'block',
446
+ reason: 'catastrophic_disk_operation',
447
+ tags: ['risk:destructive', 'risk:disk']
448
+ };
449
+ }
450
+ if (executable === 'dd' && parameterTokens.some((token) => token.toLowerCase().startsWith('of=/dev/'))) {
451
+ return {
452
+ tier: 'block',
453
+ reason: 'catastrophic_disk_write',
454
+ tags: ['risk:destructive', 'risk:disk']
455
+ };
456
+ }
457
+ if (['chmod', 'chown', 'chgrp'].includes(executable) && hasFlag(parameterTokens, ['-r', '-R', '--recursive'])) {
458
+ return {
459
+ tier: 'hitl_required',
460
+ reason: 'recursive_permission_change',
461
+ tags: ['risk:permissions', 'risk:recursive']
462
+ };
463
+ }
464
+ if (executable === 'git' && parameterTokens[0]?.toLowerCase() === 'clean' && hasFlag(parameterTokens, ['-f', '-d', '-x'])) {
465
+ return {
466
+ tier: 'hitl_required',
467
+ reason: 'destructive_git_clean',
468
+ tags: ['risk:destructive', 'risk:git']
469
+ };
470
+ }
471
+ if (['curl', 'wget'].includes(executable) && hasUrl && hasFlag(parameterTokens, ['-o', '-O', '--output-document'])) {
472
+ return {
473
+ tier: 'hitl_required',
474
+ reason: 'remote_download',
475
+ tags: ['risk:network', 'risk:download']
476
+ };
477
+ }
478
+ return {
479
+ tier: 'allow',
480
+ reason: 'low_risk_command',
481
+ tags: ['risk:none']
482
+ };
483
+ }
484
+ async function requestShellCommandRiskApproval(options) {
485
+ const approval = await requestToolApproval({
486
+ world: options.world,
487
+ chatId: options.chatId,
488
+ toolCallId: options.toolCallId,
489
+ title: 'Approve risky shell command?',
490
+ message: [
491
+ `Command: ${options.command} ${options.parameters.join(' ')}`.trim(),
492
+ `Risk: ${options.risk.reason}`,
493
+ `Trusted directory: ${options.resolvedDirectory}`,
494
+ 'Proceed with this command?',
495
+ ].join('\n'),
496
+ defaultOptionId: SHELL_RISK_DENY_OPTION,
497
+ options: [
498
+ { id: SHELL_RISK_APPROVE_OPTION, label: 'Approve', description: 'Run this command once.' },
499
+ { id: SHELL_RISK_DENY_OPTION, label: 'Deny', description: 'Do not run this command.' },
500
+ ],
501
+ approvedOptionIds: [SHELL_RISK_APPROVE_OPTION],
502
+ metadata: {
503
+ tool: 'shell_cmd',
504
+ riskTier: options.risk.tier,
505
+ riskReason: options.risk.reason,
506
+ riskTags: options.risk.tags,
507
+ command: options.command,
508
+ parameters: options.parameters,
509
+ cwd: options.resolvedDirectory,
510
+ ...(options.toolCallId ? { toolCallId: options.toolCallId } : {}),
511
+ },
512
+ agentName: options.agentName || null,
513
+ messages: options.messages,
514
+ });
515
+ return {
516
+ approved: approval.approved,
517
+ reason: approval.reason,
518
+ };
519
+ }
302
520
  function hasDisallowedShellSyntax(value) {
303
521
  if (!value)
304
522
  return false;
@@ -408,7 +626,7 @@ function findInlineScriptExecutionFlag(command, parameters) {
408
626
  }
409
627
  return null;
410
628
  }
411
- export function validateShellCommandScope(command, parameters, trustedWorkingDirectory) {
629
+ export function validateShellCommandScope(command, parameters, trustedWorkingDirectory, additionalTrustedRoots) {
412
630
  const singleCommandValidation = validateSingleCommandContract(command);
413
631
  if (!singleCommandValidation.valid) {
414
632
  return singleCommandValidation;
@@ -450,14 +668,177 @@ export function validateShellCommandScope(command, parameters, trustedWorkingDir
450
668
  continue;
451
669
  const resolvedPath = resolveTokenPath(token, trustedWorkingDirectory);
452
670
  if (!isPathWithinTrustedDirectory(resolvedPath, trustedWorkingDirectory)) {
453
- return {
454
- valid: false,
455
- error: `Working directory mismatch: path "${token}" is outside world working directory "${trustedWorkingDirectory}".`
456
- };
671
+ const withinAdditionalRoot = (additionalTrustedRoots || []).some((root) => isPathWithinTrustedDirectory(resolvedPath, root));
672
+ if (!withinAdditionalRoot) {
673
+ return {
674
+ valid: false,
675
+ error: `Working directory mismatch: path "${token}" is outside world working directory "${trustedWorkingDirectory}".`
676
+ };
677
+ }
457
678
  }
458
679
  }
459
680
  return { valid: true };
460
681
  }
682
+ const SKILL_DIR_PREFIXES = ['.agents/skills/', 'skills/'];
683
+ function extractSkillIdAndRemainder(param) {
684
+ for (const prefix of SKILL_DIR_PREFIXES) {
685
+ if (param.startsWith(prefix)) {
686
+ const afterPrefix = param.slice(prefix.length);
687
+ const slashIndex = afterPrefix.indexOf('/');
688
+ if (slashIndex <= 0)
689
+ continue;
690
+ const skillId = afterPrefix.slice(0, slashIndex);
691
+ const remainder = afterPrefix.slice(slashIndex + 1);
692
+ if (skillId && remainder)
693
+ return { skillId, remainder };
694
+ }
695
+ }
696
+ const slashIndex = param.indexOf('/');
697
+ if (slashIndex <= 0)
698
+ return null;
699
+ const skillId = param.slice(0, slashIndex);
700
+ if (skillId === '.' || skillId === '..' || skillId.startsWith('.') || skillId.startsWith('-')) {
701
+ return null;
702
+ }
703
+ const remainder = param.slice(slashIndex + 1);
704
+ if (!remainder)
705
+ return null;
706
+ return { skillId, remainder };
707
+ }
708
+ function resolveWithPrefixFallback(skillRoot, relativePath, requireExisting = true) {
709
+ const directCandidate = join(skillRoot, relativePath);
710
+ if (!requireExisting || existsSync(directCandidate)) {
711
+ return directCandidate;
712
+ }
713
+ const slashIndex = relativePath.indexOf('/');
714
+ if (slashIndex <= 0) {
715
+ return null;
716
+ }
717
+ const withoutFirstSegment = relativePath.slice(slashIndex + 1);
718
+ if (!withoutFirstSegment) {
719
+ return null;
720
+ }
721
+ const fallbackCandidate = join(skillRoot, withoutFirstSegment);
722
+ if (!requireExisting || existsSync(fallbackCandidate)) {
723
+ return fallbackCandidate;
724
+ }
725
+ return null;
726
+ }
727
+ function resolveFromRuntimeSkillsRoot(param, runtimeSkillsRoot) {
728
+ if (!runtimeSkillsRoot)
729
+ return null;
730
+ if (!param.includes('/'))
731
+ return null;
732
+ if (!existsSync(runtimeSkillsRoot))
733
+ return null;
734
+ let entries = [];
735
+ try {
736
+ entries = readdirSync(runtimeSkillsRoot, { withFileTypes: true, encoding: 'utf8' });
737
+ }
738
+ catch {
739
+ return null;
740
+ }
741
+ for (const entry of entries) {
742
+ const isDirectory = entry.isDirectory();
743
+ const isSymlink = typeof entry.isSymbolicLink === 'function' && entry.isSymbolicLink();
744
+ if (!isDirectory && !isSymlink)
745
+ continue;
746
+ const skillRoot = join(runtimeSkillsRoot, entry.name);
747
+ const candidatePath = resolveWithPrefixFallback(skillRoot, param);
748
+ if (candidatePath) {
749
+ return { absolutePath: candidatePath, skillRoot };
750
+ }
751
+ }
752
+ return null;
753
+ }
754
+ function resolveBareSkillPath(param, runtimeSkillsRoot) {
755
+ if (!param.includes('/'))
756
+ return null;
757
+ const runtimeMatch = resolveFromRuntimeSkillsRoot(param, runtimeSkillsRoot);
758
+ if (runtimeMatch) {
759
+ return runtimeMatch;
760
+ }
761
+ const skills = getSkills();
762
+ for (const skill of skills) {
763
+ const sourcePath = getSkillSourcePath(skill.skill_id);
764
+ if (!sourcePath)
765
+ continue;
766
+ const skillRoot = dirname(sourcePath);
767
+ const candidatePath = resolveWithPrefixFallback(skillRoot, param);
768
+ if (candidatePath) {
769
+ return { absolutePath: candidatePath, skillRoot };
770
+ }
771
+ }
772
+ return null;
773
+ }
774
+ function hasActiveSkillContext(messages, chatId) {
775
+ if (!Array.isArray(messages)) {
776
+ return false;
777
+ }
778
+ for (let index = messages.length - 1; index >= 0; index -= 1) {
779
+ const message = messages[index];
780
+ if (!message || typeof message !== 'object') {
781
+ continue;
782
+ }
783
+ const messageChatId = typeof message.chatId === 'string' ? message.chatId.trim() : '';
784
+ if (chatId && messageChatId && messageChatId !== chatId) {
785
+ continue;
786
+ }
787
+ if (message.role !== 'tool') {
788
+ continue;
789
+ }
790
+ const content = typeof message.content === 'string' ? message.content : '';
791
+ if (content.includes('<skill_context id="')) {
792
+ return true;
793
+ }
794
+ }
795
+ return false;
796
+ }
797
+ export function resolveSkillScriptParameters(parameters, runtimeSkillsRoot, options) {
798
+ const skillRootsSet = new Set();
799
+ const allowBareScriptsResolution = options?.allowBareScriptsResolution === true;
800
+ const resolvedParameters = parameters.map((param) => {
801
+ const parsed = extractSkillIdAndRemainder(param);
802
+ if (parsed) {
803
+ const hasExplicitSkillPrefix = SKILL_DIR_PREFIXES.some((prefix) => param.startsWith(prefix));
804
+ const sourcePath = getSkillSourcePath(parsed.skillId);
805
+ const hasRuntimeSkillDir = Boolean(runtimeSkillsRoot)
806
+ && existsSync(join(runtimeSkillsRoot, parsed.skillId));
807
+ const shouldAttemptExplicitResolution = hasExplicitSkillPrefix || Boolean(sourcePath) || hasRuntimeSkillDir;
808
+ if (shouldAttemptExplicitResolution) {
809
+ if (sourcePath) {
810
+ const skillRoot = dirname(sourcePath);
811
+ const absolutePath = resolveWithPrefixFallback(skillRoot, parsed.remainder, false);
812
+ if (absolutePath && isPathWithinTrustedDirectory(absolutePath, skillRoot)) {
813
+ skillRootsSet.add(skillRoot);
814
+ return absolutePath;
815
+ }
816
+ }
817
+ if (runtimeSkillsRoot) {
818
+ const candidateSkillRoot = join(runtimeSkillsRoot, parsed.skillId);
819
+ const candidatePath = resolveWithPrefixFallback(candidateSkillRoot, parsed.remainder);
820
+ if (candidatePath) {
821
+ skillRootsSet.add(candidateSkillRoot);
822
+ return candidatePath;
823
+ }
824
+ }
825
+ if (hasExplicitSkillPrefix) {
826
+ return param;
827
+ }
828
+ }
829
+ }
830
+ if (!allowBareScriptsResolution) {
831
+ return param;
832
+ }
833
+ const bareMatch = resolveBareSkillPath(param, runtimeSkillsRoot);
834
+ if (bareMatch) {
835
+ skillRootsSet.add(bareMatch.skillRoot);
836
+ return bareMatch.absolutePath;
837
+ }
838
+ return param;
839
+ });
840
+ return { resolvedParameters, skillRoots: [...skillRootsSet] };
841
+ }
461
842
  export function stopShellCommandsForChat(worldId, chatId) {
462
843
  return stopShellProcessesForChatScope(worldId, chatId);
463
844
  }
@@ -506,6 +887,7 @@ export async function executeShellCommand(command, parameters = [], directory, o
506
887
  let timedOut = false;
507
888
  let aborted = false;
508
889
  let processExited = false;
890
+ let timeoutForceKillHandle = null;
509
891
  let unsubscribeStatusListener = null;
510
892
  const result = {
511
893
  executionId,
@@ -545,17 +927,66 @@ export async function executeShellCommand(command, parameters = [], directory, o
545
927
  const childProcess = spawn(command, quotedParams, {
546
928
  cwd: resolvedDirectory,
547
929
  shell: true, // Use shell to enable PATH resolution and shell features
548
- timeout: timeout
930
+ detached: process.platform !== 'win32',
549
931
  });
550
932
  attachShellProcessHandle(executionId, childProcess);
551
933
  transitionShellProcessExecution(executionId, 'running', {
552
934
  startedAt: new Date().toISOString()
553
935
  });
936
+ const sendTerminationSignal = (signal) => {
937
+ const pid = childProcess.pid;
938
+ // On Unix-like systems, detached child uses its own process group;
939
+ // signaling negative PID targets the full group/tree.
940
+ if (pid && process.platform !== 'win32') {
941
+ try {
942
+ process.kill(-pid, signal);
943
+ return;
944
+ }
945
+ catch {
946
+ // Fall back to direct child signal below.
947
+ }
948
+ }
949
+ if (process.platform === 'win32') {
950
+ // Best effort process-tree termination on Windows.
951
+ try {
952
+ const taskkill = spawn('taskkill', ['/PID', String(pid), '/T', '/F'], {
953
+ stdio: 'ignore',
954
+ windowsHide: true,
955
+ });
956
+ taskkill.unref();
957
+ return;
958
+ }
959
+ catch {
960
+ // Fall back to direct child signal below.
961
+ }
962
+ }
963
+ try {
964
+ childProcess.kill(signal);
965
+ }
966
+ catch {
967
+ // ignore if process already exited
968
+ }
969
+ };
970
+ const requestTermination = (source) => {
971
+ if (processExited)
972
+ return;
973
+ sendTerminationSignal('SIGTERM');
974
+ if (source === 'timeout') {
975
+ const graceMs = getShellTimeoutKillGraceMs();
976
+ if (graceMs > 0) {
977
+ timeoutForceKillHandle = setTimeout(() => {
978
+ if (processExited)
979
+ return;
980
+ sendTerminationSignal('SIGKILL');
981
+ }, graceMs);
982
+ }
983
+ }
984
+ };
554
985
  // Set up timeout handler
555
986
  const timeoutHandle = setTimeout(() => {
556
987
  if (!processExited) {
557
988
  timedOut = true;
558
- childProcess.kill('SIGTERM');
989
+ requestTermination('timeout');
559
990
  logger.warn('Command execution timeout', { command, parameters, timeout, directory });
560
991
  }
561
992
  }, timeout);
@@ -564,7 +995,7 @@ export async function executeShellCommand(command, parameters = [], directory, o
564
995
  return;
565
996
  aborted = true;
566
997
  markShellProcessCancelRequested(executionId);
567
- childProcess.kill('SIGTERM');
998
+ requestTermination('abort');
568
999
  logger.info('Shell command aborted by request', {
569
1000
  executionId,
570
1001
  command,
@@ -611,6 +1042,10 @@ export async function executeShellCommand(command, parameters = [], directory, o
611
1042
  childProcess.on('close', (code, signal) => {
612
1043
  processExited = true;
613
1044
  clearTimeout(timeoutHandle);
1045
+ if (timeoutForceKillHandle) {
1046
+ clearTimeout(timeoutForceKillHandle);
1047
+ timeoutForceKillHandle = null;
1048
+ }
614
1049
  options.abortSignal?.removeEventListener('abort', abortHandler);
615
1050
  unsubscribeStatusListener?.();
616
1051
  unsubscribeStatusListener = null;
@@ -691,6 +1126,10 @@ export async function executeShellCommand(command, parameters = [], directory, o
691
1126
  childProcess.on('error', (error) => {
692
1127
  processExited = true;
693
1128
  clearTimeout(timeoutHandle);
1129
+ if (timeoutForceKillHandle) {
1130
+ clearTimeout(timeoutForceKillHandle);
1131
+ timeoutForceKillHandle = null;
1132
+ }
694
1133
  options.abortSignal?.removeEventListener('abort', abortHandler);
695
1134
  unsubscribeStatusListener?.();
696
1135
  unsubscribeStatusListener = null;
@@ -918,6 +1357,241 @@ export function formatStructuredResult(result, artifacts = [], options = {}) {
918
1357
  ...(stderrSnippet.truncated ? { stderr_truncated: true } : {})
919
1358
  };
920
1359
  }
1360
+ export function formatMinimalShellResult(result) {
1361
+ const timedOut = Boolean(result.timedOut || result.error?.includes('timed out'));
1362
+ const canceled = Boolean(result.canceled || result.error?.toLowerCase().includes('canceled'));
1363
+ const inferredFailureReason = result.failureReason || inferShellFailureReason(String(result.error || ''));
1364
+ const failed = timedOut || canceled || result.exitCode !== 0 || Boolean(result.error) || Boolean(inferredFailureReason);
1365
+ let reason;
1366
+ if (timedOut) {
1367
+ reason = 'timeout';
1368
+ }
1369
+ else if (canceled) {
1370
+ reason = 'canceled';
1371
+ }
1372
+ else if (inferredFailureReason) {
1373
+ reason = inferredFailureReason;
1374
+ }
1375
+ else if (result.exitCode !== null && result.exitCode !== 0) {
1376
+ reason = 'non_zero_exit';
1377
+ }
1378
+ else if (result.error) {
1379
+ reason = 'execution_error';
1380
+ }
1381
+ return {
1382
+ status: failed ? 'failed' : 'success',
1383
+ exit_code: result.exitCode,
1384
+ timed_out: timedOut,
1385
+ canceled,
1386
+ ...(reason ? { reason } : {})
1387
+ };
1388
+ }
1389
+ export function formatMinimalShellResultForLLM(result) {
1390
+ return formatPreviewShellResultForLLM(result);
1391
+ }
1392
+ function containsImageDataUri(text) {
1393
+ return /data:image\/[a-z0-9.+-]+;base64,/i.test(String(text || ''));
1394
+ }
1395
+ /**
1396
+ * Strip ANSI escape sequences and terminal control characters from shell output
1397
+ * before sending to the LLM. Raw terminal output often contains spinner animations
1398
+ * (◒◐◓◑), cursor-control codes (\x1b[?25l, \x1b[999D\x1b[J), and ANSI color codes
1399
+ * that confuse LLMs into thinking a process is still running when it has already
1400
+ * completed successfully (exit_code: 0).
1401
+ *
1402
+ * Strips:
1403
+ * - CSI sequences: \x1b[ ... final-byte (colors, cursor movement, erase, etc.)
1404
+ * - OSC sequences: \x1b] ... \x07 or \x1b\ (terminal title/hyperlinks)
1405
+ * - DCS/SOS/PM/APC sequences: \x1bP/\x1bX/\x1b^/\x1b_ ... \x1b\
1406
+ * - Single-char Fe escapes: \x1b followed by non-[ byte
1407
+ * - Bare carriage returns used by spinner overwrites
1408
+ */
1409
+ export function stripAnsiFromShellOutput(text) {
1410
+ // CSI sequences: ESC [ ... (any intermediate+final byte)
1411
+ let stripped = text.replace(/\x1b\[[0-9;?!#]*[a-zA-Z@`]/g, '');
1412
+ // OSC sequences: ESC ] ... BEL or ESC\
1413
+ stripped = stripped.replace(/\x1b\][^\x07\x1b]*(?:\x07|\x1b\\)/g, '');
1414
+ // DCS/SOS/PM/APC: ESC [P X ^ _] ... ESC\
1415
+ stripped = stripped.replace(/\x1b[PX\^_].*?\x1b\\/gs, '');
1416
+ // Remaining single-char Fe escapes (ESC followed by one non-[ char)
1417
+ stripped = stripped.replace(/\x1b[^[]/g, '');
1418
+ // Carriage returns used by spinner-overwrite pattern (keep newlines)
1419
+ stripped = stripped.replace(/\r(?!\n)/g, '\n');
1420
+ // Collapse multiple blank lines from the cleanup
1421
+ stripped = stripped.replace(/\n{3,}/g, '\n\n');
1422
+ return stripped;
1423
+ }
1424
+ function buildLLMPreviewField(content, maxOutputChars) {
1425
+ const normalized = String(content || '');
1426
+ if (!normalized) {
1427
+ return { text: '', truncated: false, redacted: false };
1428
+ }
1429
+ if (containsImageDataUri(normalized)) {
1430
+ return {
1431
+ text: `omitted from LLM context (contains image data URI output; ${normalized.length} chars).`,
1432
+ truncated: false,
1433
+ redacted: true,
1434
+ };
1435
+ }
1436
+ // Strip ANSI sequences before truncating so the LLM receives clean text.
1437
+ // Without this, spinner animations and cursor-control codes in raw terminal
1438
+ // output make the LLM think a completed process (exit_code: 0) is still running.
1439
+ const clean = stripAnsiFromShellOutput(normalized);
1440
+ const snippet = buildOutputSnippet(clean, maxOutputChars);
1441
+ return {
1442
+ text: snippet.text,
1443
+ truncated: snippet.truncated,
1444
+ redacted: false,
1445
+ };
1446
+ }
1447
+ export function formatPreviewShellResult(result, options = {}) {
1448
+ const minimal = formatMinimalShellResult(result);
1449
+ const maxOutputChars = options.maxOutputChars ?? DEFAULT_LLM_PREVIEW_OUTPUT_CHARS;
1450
+ const stderrSource = String(result.stderr || result.error || '');
1451
+ const stdoutPreview = buildLLMPreviewField(result.stdout, maxOutputChars);
1452
+ const stderrPreview = buildLLMPreviewField(stderrSource, maxOutputChars);
1453
+ return {
1454
+ ...minimal,
1455
+ ...(stdoutPreview.text ? { stdout_preview: stdoutPreview.text } : {}),
1456
+ ...(stderrPreview.text ? { stderr_preview: stderrPreview.text } : {}),
1457
+ ...(stdoutPreview.truncated ? { stdout_truncated: true } : {}),
1458
+ ...(stderrPreview.truncated ? { stderr_truncated: true } : {}),
1459
+ ...(stdoutPreview.redacted ? { stdout_redacted: true } : {}),
1460
+ ...(stderrPreview.redacted ? { stderr_redacted: true } : {}),
1461
+ };
1462
+ }
1463
+ export function formatPreviewShellResultForLLM(result, options = {}) {
1464
+ const preview = formatPreviewShellResult(result, options);
1465
+ const lines = [
1466
+ `status: ${preview.status}`,
1467
+ `exit_code: ${preview.exit_code === null ? 'null' : String(preview.exit_code)}`,
1468
+ `timed_out: ${preview.timed_out ? 'true' : 'false'}`,
1469
+ `canceled: ${preview.canceled ? 'true' : 'false'}`
1470
+ ];
1471
+ if (preview.reason) {
1472
+ lines.push(`reason: ${preview.reason}`);
1473
+ }
1474
+ if (preview.stdout_preview) {
1475
+ lines.push('stdout_preview:');
1476
+ lines.push(preview.stdout_preview);
1477
+ }
1478
+ if (preview.stdout_truncated) {
1479
+ lines.push('stdout_truncated: true');
1480
+ }
1481
+ if (preview.stdout_redacted) {
1482
+ lines.push('stdout_redacted: true');
1483
+ }
1484
+ if (preview.stderr_preview) {
1485
+ lines.push('stderr_preview:');
1486
+ lines.push(preview.stderr_preview);
1487
+ }
1488
+ if (preview.stderr_truncated) {
1489
+ lines.push('stderr_truncated: true');
1490
+ }
1491
+ if (preview.stderr_redacted) {
1492
+ lines.push('stderr_redacted: true');
1493
+ }
1494
+ return lines.join('\n');
1495
+ }
1496
+ export function formatShellToolErrorResultForLLM(options) {
1497
+ const errorMessage = options.error instanceof Error ? options.error.message : String(options.error);
1498
+ const parameters = Array.isArray(options.parameters)
1499
+ ? options.parameters.map((parameter) => String(parameter))
1500
+ : [];
1501
+ return formatPreviewShellResultForLLM({
1502
+ executionId: 'shell-tool-error',
1503
+ command: typeof options.command === 'string' && options.command.trim()
1504
+ ? options.command
1505
+ : '<shell_cmd>',
1506
+ parameters,
1507
+ stdout: '',
1508
+ stderr: errorMessage,
1509
+ exitCode: null,
1510
+ signal: null,
1511
+ error: errorMessage,
1512
+ failureReason: options.failureReason || inferShellFailureReason(errorMessage) || 'execution_error',
1513
+ executedAt: new Date(),
1514
+ duration: 0,
1515
+ });
1516
+ }
1517
+ function buildShellToolResultContent(result, options) {
1518
+ if (options.llmResultMode === 'minimal') {
1519
+ if (options.outputFormat === 'json') {
1520
+ return JSON.stringify(formatPreviewShellResult(result), null, 2);
1521
+ }
1522
+ return formatPreviewShellResultForLLM(result);
1523
+ }
1524
+ if (options.outputFormat === 'json') {
1525
+ return JSON.stringify(formatStructuredResult(result, options.artifacts || [], { detail: options.outputDetail }), null, 2);
1526
+ }
1527
+ return formatResultForLLM(result, { detail: options.outputDetail });
1528
+ }
1529
+ function buildShellToolPreviewEnvelope(result, options) {
1530
+ const resultContent = buildShellToolResultContent(result, {
1531
+ llmResultMode: options.llmResultMode,
1532
+ outputFormat: options.outputFormat,
1533
+ outputDetail: options.outputDetail,
1534
+ artifacts: options.artifacts,
1535
+ });
1536
+ const previewItems = [
1537
+ createTextToolPreview(options.outputFormat === 'json'
1538
+ ? resultContent
1539
+ : formatResultForLLM(result, { detail: options.outputDetail }), { markdown: options.outputFormat !== 'json', title: 'shell_cmd result' }),
1540
+ ...(options.artifacts || []).map((artifact) => createArtifactToolPreview({
1541
+ path: artifact.path,
1542
+ bytes: artifact.bytes,
1543
+ display_name: artifact.path,
1544
+ ...(options.worldId ? { url: buildToolArtifactPreviewUrl({ path: artifact.path, worldId: options.worldId }) } : {}),
1545
+ })),
1546
+ ];
1547
+ return {
1548
+ __type: 'tool_execution_envelope',
1549
+ version: 1,
1550
+ tool: 'shell_cmd',
1551
+ ...(options.toolCallId ? { tool_call_id: options.toolCallId } : {}),
1552
+ status: result.exitCode === 0 && !result.error && !result.timedOut && !result.canceled ? 'completed' : 'failed',
1553
+ preview: previewItems,
1554
+ result: resultContent,
1555
+ };
1556
+ }
1557
+ function formatShellToolReturnContent(result, options) {
1558
+ if (!options.persistToolEnvelope) {
1559
+ return buildShellToolResultContent(result, {
1560
+ llmResultMode: options.llmResultMode,
1561
+ outputFormat: options.outputFormat,
1562
+ outputDetail: options.outputDetail,
1563
+ artifacts: options.artifacts,
1564
+ });
1565
+ }
1566
+ return serializeToolExecutionEnvelope(buildShellToolPreviewEnvelope(result, options));
1567
+ }
1568
+ export function formatShellToolErrorEnvelopeContent(options) {
1569
+ const errorMessage = options.error instanceof Error ? options.error.message : String(options.error);
1570
+ const parameters = Array.isArray(options.parameters)
1571
+ ? options.parameters.map((parameter) => String(parameter))
1572
+ : [];
1573
+ const result = {
1574
+ executionId: 'shell-tool-error',
1575
+ command: typeof options.command === 'string' && options.command.trim()
1576
+ ? options.command
1577
+ : '<shell_cmd>',
1578
+ parameters,
1579
+ stdout: '',
1580
+ stderr: errorMessage,
1581
+ exitCode: null,
1582
+ signal: null,
1583
+ error: errorMessage,
1584
+ failureReason: options.failureReason || inferShellFailureReason(errorMessage) || 'execution_error',
1585
+ executedAt: new Date(),
1586
+ duration: 0,
1587
+ };
1588
+ return serializeToolExecutionEnvelope(buildShellToolPreviewEnvelope(result, {
1589
+ llmResultMode: 'minimal',
1590
+ outputFormat: 'markdown',
1591
+ outputDetail: 'minimal',
1592
+ toolCallId: options.toolCallId,
1593
+ }));
1594
+ }
921
1595
  /**
922
1596
  * Format command execution result for LLM consumption
923
1597
  * Provides a human-readable summary of the execution with improved markdown formatting
@@ -1076,81 +1750,219 @@ export function createShellCmdToolDefinition() {
1076
1750
  },
1077
1751
  required: ['command']
1078
1752
  };
1753
+ const llmResultMode = typeof context?.llmResultMode === 'string'
1754
+ ? context.llmResultMode === 'verbose' ? 'verbose' : 'minimal'
1755
+ : 'verbose';
1756
+ const persistToolEnvelope = context?.persistToolEnvelope === true;
1079
1757
  const validation = validateToolParameters(args, toolSchema, 'shell_cmd');
1080
1758
  if (!validation.valid) {
1081
- return formatResultForLLM({
1759
+ const validationResult = {
1082
1760
  executionId: 'validation-error',
1083
1761
  command: args?.command || '<invalid>',
1084
1762
  parameters: [],
1085
- exitCode: 1,
1763
+ exitCode: null,
1086
1764
  signal: null,
1087
1765
  error: validation.error,
1766
+ failureReason: 'validation_error',
1088
1767
  stdout: '',
1089
1768
  stderr: '',
1090
1769
  executedAt: new Date(),
1091
1770
  duration: 0
1771
+ };
1772
+ const validationOutputFormat = validation.correctedArgs?.output_format === 'json' ? 'json' : 'markdown';
1773
+ return formatShellToolReturnContent(validationResult, {
1774
+ llmResultMode,
1775
+ outputFormat: validationOutputFormat,
1776
+ outputDetail: 'minimal',
1777
+ toolCallId: typeof context?.toolCallId === 'string' ? context.toolCallId : undefined,
1778
+ persistToolEnvelope,
1779
+ worldId: typeof context?.world?.id === 'string' ? context.world.id : undefined,
1092
1780
  });
1093
1781
  }
1094
1782
  const { command, parameters = [], timeout, output_format: outputFormat = 'markdown', output_detail: outputDetail = 'minimal', artifact_paths: artifactPaths = [] } = validation.correctedArgs;
1095
1783
  // Ensure parameters is always an array
1096
- const validParameters = Array.isArray(parameters) ?
1784
+ const rawParameters = Array.isArray(parameters) ?
1097
1785
  parameters.filter((p) => typeof p === 'string') :
1098
1786
  [];
1787
+ const chatIdRaw = typeof context?.chatId === 'string' ? context.chatId.trim() : '';
1788
+ const chatId = chatIdRaw || undefined;
1789
+ // Resolve skill-relative script paths (e.g. <skill-id>/scripts/foo.py) to absolute paths
1790
+ const resolvedDirectory = resolveTrustedShellWorkingDirectory(context);
1791
+ const runtimeSkillsRoot = join(resolveDirectory(resolvedDirectory), '.agents', 'skills');
1792
+ const skillOriginatedRequest = hasActiveSkillContext(context?.messages, chatId);
1793
+ const { resolvedParameters: validParameters, skillRoots } = resolveSkillScriptParameters(rawParameters, runtimeSkillsRoot, { allowBareScriptsResolution: skillOriginatedRequest });
1099
1794
  // Extract world and messageId from context for streaming
1100
1795
  const world = context?.world;
1101
1796
  const currentMessageId = context?.toolCallId;
1102
- const chatId = context?.chatId ? String(context.chatId) : undefined;
1103
1797
  const abortSignal = context?.abortSignal;
1104
- const resolvedDirectory = resolveTrustedShellWorkingDirectory(context);
1798
+ const streamAgentName = typeof context?.agentName === 'string' && context.agentName.trim()
1799
+ ? context.agentName.trim()
1800
+ : 'assistant';
1801
+ const hasToolStreamContext = Boolean(world
1802
+ && chatId
1803
+ && typeof currentMessageId === 'string'
1804
+ && currentMessageId.trim());
1805
+ const streamBaseMessageId = hasToolStreamContext ? String(currentMessageId).trim() : '';
1806
+ const stdoutMessageId = streamBaseMessageId ? `${streamBaseMessageId}-stdout` : '';
1105
1807
  const directoryValidation = validateShellDirectoryRequest(validation.correctedArgs.directory, resolvedDirectory);
1106
1808
  if (!directoryValidation.valid) {
1107
1809
  throw new Error(directoryValidation.error);
1108
1810
  }
1109
- const scopeValidation = validateShellCommandScope(command, validParameters, resolvedDirectory);
1811
+ const scopeValidation = validateShellCommandScope(command, validParameters, resolvedDirectory, skillRoots);
1110
1812
  if (!scopeValidation.valid) {
1111
1813
  throw new Error(scopeValidation.error);
1112
1814
  }
1113
- // Execute command with streaming callbacks if world is available
1815
+ const riskAssessment = classifyShellCommandRisk(command, validParameters);
1816
+ if (riskAssessment.tier === 'block') {
1817
+ throw new Error(`Blocked dangerous operation: ${riskAssessment.reason}. This shell command cannot be executed.`);
1818
+ }
1819
+ // Check world-level tool permission
1820
+ const toolPermission = getEnvValueFromText(world?.variables, 'tool_permission') ?? 'auto';
1821
+ if (toolPermission === 'read') {
1822
+ const blockedResult = {
1823
+ executionId: 'permission-blocked',
1824
+ command,
1825
+ parameters: validParameters,
1826
+ exitCode: null,
1827
+ signal: null,
1828
+ error: 'shell_cmd is blocked by the current permission level (read).',
1829
+ failureReason: 'validation_error',
1830
+ stdout: '',
1831
+ stderr: '',
1832
+ executedAt: new Date(),
1833
+ duration: 0,
1834
+ };
1835
+ return formatShellToolReturnContent(blockedResult, {
1836
+ llmResultMode,
1837
+ outputFormat: outputFormat === 'json' ? 'json' : 'markdown',
1838
+ outputDetail: 'minimal',
1839
+ toolCallId: typeof currentMessageId === 'string' ? currentMessageId : undefined,
1840
+ persistToolEnvelope,
1841
+ worldId: typeof world?.id === 'string' ? world.id : undefined,
1842
+ });
1843
+ }
1844
+ // At 'ask' level, every shell_cmd invocation requires HITL approval regardless of risk tier.
1845
+ if (toolPermission === 'ask' && riskAssessment.tier !== 'hitl_required') {
1846
+ if (!world) {
1847
+ throw new Error('Approval required: world-level permission is "ask" but HITL approval context is unavailable.');
1848
+ }
1849
+ const askApproval = await requestShellCommandRiskApproval({
1850
+ world,
1851
+ chatId: chatId ?? null,
1852
+ command,
1853
+ parameters: validParameters,
1854
+ resolvedDirectory,
1855
+ risk: { tier: 'hitl_required', reason: 'world permission level is "ask"', tags: ['ask-permission'] },
1856
+ toolCallId: typeof currentMessageId === 'string' ? currentMessageId : undefined,
1857
+ agentName: streamAgentName,
1858
+ messages: Array.isArray(context?.messages) ? context.messages : undefined,
1859
+ });
1860
+ if (!askApproval.approved) {
1861
+ throw new Error(`Command not executed: world permission is "ask" and the request was not approved (${askApproval.reason}).`);
1862
+ }
1863
+ }
1864
+ if (riskAssessment.tier === 'hitl_required') {
1865
+ if (!world) {
1866
+ throw new Error(`Approval required: command classified as ${riskAssessment.reason}. HITL approval context is unavailable.`);
1867
+ }
1868
+ const approval = await requestShellCommandRiskApproval({
1869
+ world,
1870
+ chatId: chatId ?? null,
1871
+ command,
1872
+ parameters: validParameters,
1873
+ resolvedDirectory,
1874
+ risk: riskAssessment,
1875
+ toolCallId: typeof currentMessageId === 'string' ? currentMessageId : undefined,
1876
+ agentName: streamAgentName,
1877
+ messages: Array.isArray(context?.messages) ? context.messages : undefined,
1878
+ });
1879
+ if (!approval.approved) {
1880
+ throw new Error(`Command not executed: approval required for ${riskAssessment.reason} and request was not approved (${approval.reason}).`);
1881
+ }
1882
+ }
1883
+ let stdoutStartEmitted = false;
1884
+ const emitStdoutToolStreamChunk = (chunk) => {
1885
+ if (!hasToolStreamContext)
1886
+ return;
1887
+ if (!chunk)
1888
+ return;
1889
+ if (!stdoutMessageId)
1890
+ return;
1891
+ if (!stdoutStartEmitted) {
1892
+ publishSSE(world, {
1893
+ type: 'start',
1894
+ toolName: 'shell_cmd',
1895
+ messageId: stdoutMessageId,
1896
+ agentName: streamAgentName,
1897
+ chatId
1898
+ });
1899
+ stdoutStartEmitted = true;
1900
+ }
1901
+ publishSSE(world, {
1902
+ type: 'chunk',
1903
+ toolName: 'shell_cmd',
1904
+ content: chunk,
1905
+ stream: 'stdout',
1906
+ messageId: stdoutMessageId,
1907
+ agentName: streamAgentName,
1908
+ chatId
1909
+ });
1910
+ };
1911
+ const emitStderrToolStreamChunk = (chunk) => {
1912
+ if (!world || !chatId || !chunk)
1913
+ return;
1914
+ publishSSE(world, {
1915
+ type: 'tool-stream',
1916
+ toolName: 'shell_cmd',
1917
+ content: chunk,
1918
+ stream: 'stderr',
1919
+ messageId: currentMessageId,
1920
+ agentName: 'shell_cmd',
1921
+ chatId
1922
+ });
1923
+ };
1924
+ // Execute command with tool-streaming callbacks when world context is available
1114
1925
  const result = await executeShellCommand(command, validParameters, resolvedDirectory, {
1115
1926
  timeout,
1116
1927
  abortSignal,
1117
1928
  worldId: world?.id,
1118
1929
  chatId,
1119
1930
  trustedWorkingDirectory: resolvedDirectory,
1120
- onStdout: world ? (chunk) => {
1121
- // Publish streaming events to world event system
1122
- publishSSE(world, {
1123
- type: 'tool-stream',
1124
- toolName: 'shell_cmd',
1125
- content: chunk,
1126
- stream: 'stdout',
1127
- messageId: currentMessageId,
1128
- agentName: 'shell_cmd'
1129
- });
1931
+ onStdout: hasToolStreamContext ? (chunk) => {
1932
+ emitStdoutToolStreamChunk(chunk);
1130
1933
  } : undefined,
1131
1934
  onStderr: world ? (chunk) => {
1132
- // Publish streaming events to world event system
1133
- publishSSE(world, {
1134
- type: 'tool-stream',
1135
- toolName: 'shell_cmd',
1136
- content: chunk,
1137
- stream: 'stderr',
1138
- messageId: currentMessageId,
1139
- agentName: 'shell_cmd'
1140
- });
1935
+ emitStderrToolStreamChunk(chunk);
1141
1936
  } : undefined
1142
1937
  });
1143
1938
  if (isCommandExecutionCanceled(result)) {
1144
1939
  throw new DOMException('Shell command execution canceled by user', 'AbortError');
1145
1940
  }
1941
+ // Emit SSE end only. Durable completion state now comes from the final tool result.
1942
+ if (hasToolStreamContext && stdoutMessageId && stdoutStartEmitted) {
1943
+ publishSSE(world, {
1944
+ type: 'end',
1945
+ toolName: 'shell_cmd',
1946
+ messageId: stdoutMessageId,
1947
+ agentName: streamAgentName,
1948
+ chatId
1949
+ });
1950
+ }
1146
1951
  const validatedArtifactPaths = Array.isArray(artifactPaths)
1147
1952
  ? artifactPaths.filter((artifactPath) => typeof artifactPath === 'string')
1148
1953
  : [];
1149
- const artifacts = await collectCommandArtifacts(validatedArtifactPaths, resolvedDirectory);
1150
- if (outputFormat === 'json') {
1151
- return JSON.stringify(formatStructuredResult(result, artifacts, { detail: outputDetail }), null, 2);
1152
- }
1153
- return formatResultForLLM(result, { detail: outputDetail });
1954
+ const artifacts = llmResultMode === 'minimal'
1955
+ ? []
1956
+ : await collectCommandArtifacts(validatedArtifactPaths, resolvedDirectory);
1957
+ return formatShellToolReturnContent(result, {
1958
+ llmResultMode,
1959
+ outputFormat,
1960
+ outputDetail,
1961
+ toolCallId: typeof context?.toolCallId === 'string' ? context.toolCallId : undefined,
1962
+ persistToolEnvelope,
1963
+ artifacts,
1964
+ worldId: typeof context?.world?.id === 'string' ? context.world.id : undefined,
1965
+ });
1154
1966
  }
1155
1967
  };
1156
1968
  }