gobby 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. gobby/__init__.py +1 -1
  2. gobby/adapters/claude_code.py +99 -61
  3. gobby/adapters/gemini.py +140 -38
  4. gobby/agents/isolation.py +130 -0
  5. gobby/agents/registry.py +11 -0
  6. gobby/agents/session.py +1 -0
  7. gobby/agents/spawn_executor.py +43 -13
  8. gobby/agents/spawners/macos.py +26 -1
  9. gobby/app_context.py +59 -0
  10. gobby/cli/__init__.py +0 -2
  11. gobby/cli/memory.py +185 -0
  12. gobby/cli/utils.py +5 -17
  13. gobby/clones/git.py +177 -0
  14. gobby/config/features.py +0 -20
  15. gobby/config/skills.py +31 -0
  16. gobby/config/tasks.py +4 -0
  17. gobby/hooks/event_handlers/__init__.py +155 -0
  18. gobby/hooks/event_handlers/_agent.py +175 -0
  19. gobby/hooks/event_handlers/_base.py +87 -0
  20. gobby/hooks/event_handlers/_misc.py +66 -0
  21. gobby/hooks/event_handlers/_session.py +573 -0
  22. gobby/hooks/event_handlers/_tool.py +196 -0
  23. gobby/hooks/hook_manager.py +21 -1
  24. gobby/install/gemini/hooks/hook_dispatcher.py +74 -15
  25. gobby/llm/claude.py +377 -42
  26. gobby/mcp_proxy/importer.py +4 -41
  27. gobby/mcp_proxy/instructions.py +2 -2
  28. gobby/mcp_proxy/manager.py +13 -3
  29. gobby/mcp_proxy/registries.py +35 -4
  30. gobby/mcp_proxy/services/recommendation.py +2 -28
  31. gobby/mcp_proxy/tools/agent_messaging.py +93 -44
  32. gobby/mcp_proxy/tools/agents.py +45 -9
  33. gobby/mcp_proxy/tools/artifacts.py +46 -12
  34. gobby/mcp_proxy/tools/sessions/_commits.py +31 -24
  35. gobby/mcp_proxy/tools/sessions/_crud.py +5 -5
  36. gobby/mcp_proxy/tools/sessions/_handoff.py +45 -41
  37. gobby/mcp_proxy/tools/sessions/_messages.py +35 -7
  38. gobby/mcp_proxy/tools/spawn_agent.py +44 -6
  39. gobby/mcp_proxy/tools/task_readiness.py +27 -4
  40. gobby/mcp_proxy/tools/tasks/_context.py +18 -0
  41. gobby/mcp_proxy/tools/tasks/_crud.py +13 -6
  42. gobby/mcp_proxy/tools/tasks/_lifecycle.py +29 -14
  43. gobby/mcp_proxy/tools/tasks/_session.py +22 -7
  44. gobby/mcp_proxy/tools/workflows/__init__.py +266 -0
  45. gobby/mcp_proxy/tools/workflows/_artifacts.py +225 -0
  46. gobby/mcp_proxy/tools/workflows/_import.py +112 -0
  47. gobby/mcp_proxy/tools/workflows/_lifecycle.py +321 -0
  48. gobby/mcp_proxy/tools/workflows/_query.py +207 -0
  49. gobby/mcp_proxy/tools/workflows/_resolution.py +78 -0
  50. gobby/mcp_proxy/tools/workflows/_terminal.py +139 -0
  51. gobby/mcp_proxy/tools/worktrees.py +32 -7
  52. gobby/memory/components/__init__.py +0 -0
  53. gobby/memory/components/ingestion.py +98 -0
  54. gobby/memory/components/search.py +108 -0
  55. gobby/memory/extractor.py +15 -1
  56. gobby/memory/manager.py +16 -25
  57. gobby/paths.py +51 -0
  58. gobby/prompts/loader.py +1 -35
  59. gobby/runner.py +36 -10
  60. gobby/servers/http.py +186 -149
  61. gobby/servers/routes/admin.py +12 -0
  62. gobby/servers/routes/mcp/endpoints/execution.py +15 -7
  63. gobby/servers/routes/mcp/endpoints/registry.py +8 -8
  64. gobby/servers/routes/mcp/hooks.py +50 -3
  65. gobby/servers/websocket.py +57 -1
  66. gobby/sessions/analyzer.py +4 -4
  67. gobby/sessions/manager.py +9 -0
  68. gobby/sessions/transcripts/gemini.py +100 -34
  69. gobby/skills/parser.py +23 -0
  70. gobby/skills/sync.py +5 -4
  71. gobby/storage/artifacts.py +19 -0
  72. gobby/storage/database.py +9 -2
  73. gobby/storage/memories.py +32 -21
  74. gobby/storage/migrations.py +46 -4
  75. gobby/storage/sessions.py +4 -2
  76. gobby/storage/skills.py +87 -7
  77. gobby/tasks/external_validator.py +4 -17
  78. gobby/tasks/validation.py +13 -87
  79. gobby/tools/summarizer.py +18 -51
  80. gobby/utils/status.py +13 -0
  81. gobby/workflows/actions.py +5 -0
  82. gobby/workflows/context_actions.py +21 -24
  83. gobby/workflows/detection_helpers.py +38 -24
  84. gobby/workflows/enforcement/__init__.py +11 -1
  85. gobby/workflows/enforcement/blocking.py +109 -1
  86. gobby/workflows/enforcement/handlers.py +35 -1
  87. gobby/workflows/engine.py +96 -0
  88. gobby/workflows/evaluator.py +110 -0
  89. gobby/workflows/hooks.py +41 -0
  90. gobby/workflows/lifecycle_evaluator.py +2 -1
  91. gobby/workflows/memory_actions.py +11 -0
  92. gobby/workflows/safe_evaluator.py +8 -0
  93. gobby/workflows/summary_actions.py +123 -50
  94. {gobby-0.2.7.dist-info → gobby-0.2.9.dist-info}/METADATA +1 -1
  95. {gobby-0.2.7.dist-info → gobby-0.2.9.dist-info}/RECORD +99 -107
  96. gobby/cli/tui.py +0 -34
  97. gobby/hooks/event_handlers.py +0 -909
  98. gobby/mcp_proxy/tools/workflows.py +0 -973
  99. gobby/tui/__init__.py +0 -5
  100. gobby/tui/api_client.py +0 -278
  101. gobby/tui/app.py +0 -329
  102. gobby/tui/screens/__init__.py +0 -25
  103. gobby/tui/screens/agents.py +0 -333
  104. gobby/tui/screens/chat.py +0 -450
  105. gobby/tui/screens/dashboard.py +0 -377
  106. gobby/tui/screens/memory.py +0 -305
  107. gobby/tui/screens/metrics.py +0 -231
  108. gobby/tui/screens/orchestrator.py +0 -903
  109. gobby/tui/screens/sessions.py +0 -412
  110. gobby/tui/screens/tasks.py +0 -440
  111. gobby/tui/screens/workflows.py +0 -289
  112. gobby/tui/screens/worktrees.py +0 -174
  113. gobby/tui/widgets/__init__.py +0 -21
  114. gobby/tui/widgets/chat.py +0 -210
  115. gobby/tui/widgets/conductor.py +0 -104
  116. gobby/tui/widgets/menu.py +0 -132
  117. gobby/tui/widgets/message_panel.py +0 -160
  118. gobby/tui/widgets/review_gate.py +0 -224
  119. gobby/tui/widgets/task_tree.py +0 -99
  120. gobby/tui/widgets/token_budget.py +0 -166
  121. gobby/tui/ws_client.py +0 -258
  122. {gobby-0.2.7.dist-info → gobby-0.2.9.dist-info}/WHEEL +0 -0
  123. {gobby-0.2.7.dist-info → gobby-0.2.9.dist-info}/entry_points.txt +0 -0
  124. {gobby-0.2.7.dist-info → gobby-0.2.9.dist-info}/licenses/LICENSE.md +0 -0
  125. {gobby-0.2.7.dist-info → gobby-0.2.9.dist-info}/top_level.txt +0 -0
gobby/tools/summarizer.py CHANGED
@@ -23,25 +23,6 @@ MAX_DESCRIPTION_LENGTH = 200
23
23
  _config: ToolSummarizerConfig | None = None
24
24
  _loader: PromptLoader | None = None
25
25
 
26
- DEFAULT_SUMMARY_PROMPT = """Summarize this MCP tool description in 180 characters or less.
27
- Keep it to three sentences or less. Be concise and preserve the key functionality.
28
- Do not add quotes, extra formatting, or code examples.
29
-
30
- Description: {description}
31
-
32
- Summary:"""
33
-
34
- DEFAULT_SUMMARY_SYSTEM_PROMPT = "You are a technical summarizer. Create concise tool descriptions."
35
-
36
- DEFAULT_SERVER_DESC_PROMPT = """Write a single concise sentence describing what the '{server_name}' MCP server does based on its tools.
37
-
38
- Tools:
39
- {tools_list}
40
-
41
- Description (1 sentence, try to keep under 100 characters):"""
42
-
43
- DEFAULT_SERVER_DESC_SYSTEM_PROMPT = "You write concise technical descriptions."
44
-
45
26
 
46
27
  def init_summarizer_config(config: ToolSummarizerConfig, project_dir: str | None = None) -> None:
47
28
  """Initialize the summarizer with configuration."""
@@ -50,13 +31,6 @@ def init_summarizer_config(config: ToolSummarizerConfig, project_dir: str | None
50
31
  global _config, _loader
51
32
  _config = config
52
33
  _loader = PromptLoader(project_dir=Path(project_dir) if project_dir else None)
53
- # Register fallbacks
54
- _loader.register_fallback("features/tool_summary", lambda: DEFAULT_SUMMARY_PROMPT)
55
- _loader.register_fallback("features/tool_summary_system", lambda: DEFAULT_SUMMARY_SYSTEM_PROMPT)
56
- _loader.register_fallback("features/server_description", lambda: DEFAULT_SERVER_DESC_PROMPT)
57
- _loader.register_fallback(
58
- "features/server_description_system", lambda: DEFAULT_SERVER_DESC_SYSTEM_PROMPT
59
- )
60
34
 
61
35
 
62
36
  def _get_config() -> ToolSummarizerConfig:
@@ -96,9 +70,9 @@ async def _summarize_description_with_claude(description: str) -> str:
96
70
  if _loader is None:
97
71
  raise RuntimeError("Summarizer not initialized")
98
72
  prompt = _loader.render(prompt_path, {"description": description})
99
- except (FileNotFoundError, OSError, KeyError, ValueError, RuntimeError) as e:
100
- logger.debug(f"Failed to load prompt from {prompt_path}: {e}, using default")
101
- prompt = DEFAULT_SUMMARY_PROMPT.format(description=description)
73
+ except (OSError, KeyError, ValueError, RuntimeError) as e:
74
+ logger.debug(f"Failed to load prompt from {prompt_path}: {e}")
75
+ raise
102
76
 
103
77
  # Get system prompt
104
78
  sys_prompt_path = config.system_prompt_path or "features/tool_summary_system"
@@ -106,9 +80,9 @@ async def _summarize_description_with_claude(description: str) -> str:
106
80
  if _loader is None:
107
81
  raise RuntimeError("Summarizer not initialized")
108
82
  system_prompt = _loader.render(sys_prompt_path, {})
109
- except (FileNotFoundError, OSError, KeyError, ValueError, RuntimeError) as e:
110
- logger.debug(f"Failed to load system prompt from {sys_prompt_path}: {e}, using default")
111
- system_prompt = DEFAULT_SUMMARY_SYSTEM_PROMPT
83
+ except (OSError, KeyError, ValueError, RuntimeError) as e:
84
+ logger.debug(f"Failed to load system prompt from {sys_prompt_path}: {e}")
85
+ system_prompt = "You are a technical summarizer."
112
86
 
113
87
  # Configure for single-turn completion
114
88
  options = ClaudeAgentOptions(
@@ -198,30 +172,23 @@ async def generate_server_description(
198
172
  "server_name": server_name,
199
173
  "tools_list": tools_list,
200
174
  }
201
- try:
202
- if _loader is None:
203
- _get_config() # force init
204
- if _loader is None:
205
- # Still None after _get_config, use default
206
- prompt = DEFAULT_SERVER_DESC_PROMPT.format(**context)
207
- else:
208
- prompt = _loader.render(prompt_path, context)
209
- except (FileNotFoundError, OSError, KeyError, ValueError, RuntimeError) as e:
210
- logger.debug(f"Failed to load prompt from {prompt_path}: {e}, using default")
211
- prompt = DEFAULT_SERVER_DESC_PROMPT.format(**context)
175
+ if _loader is None:
176
+ _get_config() # force init
177
+ if _loader is None:
178
+ # Still None after _get_config, use default
179
+ raise RuntimeError("Summarizer not initialized")
180
+ else:
181
+ prompt = _loader.render(prompt_path, context)
212
182
 
213
183
  # Get system prompt
214
184
  sys_prompt_path = (
215
185
  config.server_description_system_prompt_path or "features/server_description_system"
216
186
  )
217
- try:
218
- if _loader is None:
219
- system_prompt = DEFAULT_SERVER_DESC_SYSTEM_PROMPT
220
- else:
221
- system_prompt = _loader.render(sys_prompt_path, {})
222
- except (FileNotFoundError, OSError, KeyError, ValueError, RuntimeError) as e:
223
- logger.debug(f"Failed to load system prompt from {sys_prompt_path}: {e}, using default")
224
- system_prompt = DEFAULT_SERVER_DESC_SYSTEM_PROMPT
187
+
188
+ if _loader is None:
189
+ system_prompt = "You write concise technical descriptions."
190
+ else:
191
+ system_prompt = _loader.render(sys_prompt_path, {})
225
192
 
226
193
  # Configure for single-turn completion
227
194
  options = ClaudeAgentOptions(
gobby/utils/status.py CHANGED
@@ -84,6 +84,11 @@ def fetch_rich_status(http_port: int, timeout: float = 2.0) -> dict[str, Any]:
84
84
  if skills_data:
85
85
  status_kwargs["skills_total"] = skills_data.get("total", 0)
86
86
 
87
+ # Artifacts
88
+ artifacts_data = data.get("artifacts", {})
89
+ if artifacts_data and artifacts_data.get("count", 0) > 0:
90
+ status_kwargs["artifacts_count"] = artifacts_data.get("count", 0)
91
+
87
92
  except (httpx.ConnectError, httpx.TimeoutException):
88
93
  # Daemon not responding - return empty
89
94
  pass
@@ -124,6 +129,8 @@ def format_status_message(
124
129
  memories_avg_importance: float | None = None,
125
130
  # Skills
126
131
  skills_total: int | None = None,
132
+ # Artifacts
133
+ artifacts_count: int | None = None,
127
134
  **kwargs: Any,
128
135
  ) -> str:
129
136
  """
@@ -254,6 +261,12 @@ def format_status_message(
254
261
  lines.append(f" {mem_str}")
255
262
  lines.append("")
256
263
 
264
+ # Artifacts section (only show if we have data)
265
+ if artifacts_count is not None:
266
+ lines.append("Artifacts:")
267
+ lines.append(f" Captured: {artifacts_count}")
268
+ lines.append("")
269
+
257
270
  # Paths section (only when running)
258
271
  if running and (pid_file or log_files):
259
272
  lines.append("Paths:")
@@ -32,6 +32,7 @@ from gobby.workflows.enforcement import (
32
32
  handle_require_commit_before_stop,
33
33
  handle_require_task_complete,
34
34
  handle_require_task_review_or_close_before_stop,
35
+ handle_track_schema_lookup,
35
36
  handle_validate_session_task_scope,
36
37
  )
37
38
  from gobby.workflows.llm_actions import handle_call_llm
@@ -283,6 +284,9 @@ class ActionExecutor:
283
284
  async def capture_baseline(context: ActionContext, **kw: Any) -> dict[str, Any] | None:
284
285
  return await handle_capture_baseline_dirty_files(context, task_manager=tm, **kw)
285
286
 
287
+ async def track_schema(context: ActionContext, **kw: Any) -> dict[str, Any] | None:
288
+ return await handle_track_schema_lookup(context, task_manager=tm, **kw)
289
+
286
290
  self.register("block_tools", block_tools)
287
291
  self.register("require_active_task", require_active)
288
292
  self.register("require_task_complete", require_complete)
@@ -290,6 +294,7 @@ class ActionExecutor:
290
294
  self.register("require_task_review_or_close_before_stop", require_review)
291
295
  self.register("validate_session_task_scope", validate_scope)
292
296
  self.register("capture_baseline_dirty_files", capture_baseline)
297
+ self.register("track_schema_lookup", track_schema)
293
298
 
294
299
  def _register_webhook_action(self) -> None:
295
300
  """Register webhook action with config closure."""
@@ -308,16 +308,8 @@ def extract_handoff_context(
308
308
  except Exception as wt_err:
309
309
  logger.debug(f"Failed to get worktree context: {wt_err}")
310
310
 
311
- # Add active skills from HookSkillManager
312
- try:
313
- from gobby.hooks.skill_manager import HookSkillManager
314
-
315
- skill_manager = HookSkillManager()
316
- core_skills = skill_manager.discover_core_skills()
317
- always_apply_skills = [s.name for s in core_skills if s.is_always_apply()]
318
- handoff_ctx.active_skills = always_apply_skills
319
- except Exception as skill_err:
320
- logger.debug(f"Failed to get active skills: {skill_err}")
311
+ # Note: active_skills population removed - redundant with _build_skill_injection_context()
312
+ # which already handles skill restoration on session start
321
313
 
322
314
  # Format as markdown (like /clear stores formatted summary)
323
315
  markdown = format_handoff_as_markdown(handoff_ctx)
@@ -414,16 +406,24 @@ def format_handoff_as_markdown(ctx: Any, prompt_template: str | None = None) ->
414
406
  if ctx.git_status:
415
407
  sections.append(f"### Uncommitted Changes\n```\n{ctx.git_status}\n```")
416
408
 
417
- # Files modified section
418
- if ctx.files_modified:
419
- lines = ["### Files Being Modified"]
420
- for f in ctx.files_modified:
421
- lines.append(f"- {f}")
422
- sections.append("\n".join(lines))
423
-
424
- # Initial goal section
409
+ # Files modified section - only show files still dirty (not yet committed)
410
+ if ctx.files_modified and ctx.git_status:
411
+ # Filter to files that appear in git status (still uncommitted)
412
+ dirty_files = [f for f in ctx.files_modified if f in ctx.git_status]
413
+ if dirty_files:
414
+ lines = ["### Files Being Modified"]
415
+ for f in dirty_files:
416
+ lines.append(f"- {f}")
417
+ sections.append("\n".join(lines))
418
+
419
+ # Initial goal section - only if task is still active (not closed/completed)
425
420
  if ctx.initial_goal:
426
- sections.append(f"### Original Goal\n{ctx.initial_goal}")
421
+ task_status = None
422
+ if ctx.active_gobby_task:
423
+ task_status = ctx.active_gobby_task.get("status")
424
+ # Only include if no task or task is still open/in_progress
425
+ if task_status in (None, "open", "in_progress"):
426
+ sections.append(f"### Original Goal\n{ctx.initial_goal}")
427
427
 
428
428
  # Recent activity section
429
429
  if ctx.recent_activity:
@@ -432,11 +432,8 @@ def format_handoff_as_markdown(ctx: Any, prompt_template: str | None = None) ->
432
432
  lines.append(f"- {activity}")
433
433
  sections.append("\n".join(lines))
434
434
 
435
- # Active skills section
436
- if hasattr(ctx, "active_skills") and ctx.active_skills:
437
- lines = ["### Active Skills"]
438
- lines.append(f"Skills available: {', '.join(ctx.active_skills)}")
439
- sections.append("\n".join(lines))
435
+ # Note: Active Skills section removed - redundant with _build_skill_injection_context()
436
+ # which already handles skill restoration on session start
440
437
 
441
438
  return "\n\n".join(sections)
442
439
 
@@ -7,7 +7,7 @@ and update workflow state variables accordingly.
7
7
  """
8
8
 
9
9
  import logging
10
- from typing import TYPE_CHECKING
10
+ from typing import TYPE_CHECKING, Any
11
11
 
12
12
  if TYPE_CHECKING:
13
13
  from gobby.hooks.events import HookEvent
@@ -44,30 +44,24 @@ def detect_task_claim(
44
44
  if not event.data:
45
45
  return
46
46
 
47
- tool_name = event.data.get("tool_name", "")
48
47
  tool_input = event.data.get("tool_input", {}) or {}
49
- # Claude Code sends "tool_result", but we also check "tool_output" for compatibility
50
- tool_output = event.data.get("tool_result") or event.data.get("tool_output") or {}
51
-
52
- # Check if this is a gobby-tasks call via MCP proxy
53
- # Tool name could be "call_tool" (from legacy) or "mcp__gobby__call_tool" (direct)
54
- if tool_name not in ("call_tool", "mcp__gobby__call_tool"):
55
- return
48
+ # Use normalized tool_output (adapters normalize tool_result/tool_response)
49
+ tool_output = event.data.get("tool_output") or {}
56
50
 
57
- # Check server is gobby-tasks
58
- server_name = tool_input.get("server_name", "")
51
+ # Use normalized MCP fields from adapter layer
52
+ # Adapters extract these from CLI-specific formats
53
+ server_name = event.data.get("mcp_server", "")
59
54
  if server_name != "gobby-tasks":
60
55
  return
61
56
 
62
- # Check inner tool name
63
- inner_tool_name = tool_input.get("tool_name", "")
57
+ inner_tool_name = event.data.get("mcp_tool", "")
64
58
 
65
59
  # Handle close_task - clears task_claimed when task is closed
66
60
  # Note: Claude Code doesn't include tool_result in post-tool-use hooks, so for CC
67
61
  # the workflow state is updated directly in the MCP proxy's close_task function.
68
62
  # This detection provides a fallback for CLIs that do report tool results (Gemini/Codex).
69
63
  if inner_tool_name == "close_task":
70
- tool_output = event.data.get("tool_result") or event.data.get("tool_output") or {}
64
+ # tool_output already normalized at top of function
71
65
 
72
66
  # If no tool output, skip - can't verify success
73
67
  # The MCP proxy's close_task handles state clearing for successful closes
@@ -254,6 +248,11 @@ def detect_mcp_call(event: "HookEvent", state: "WorkflowState") -> None:
254
248
  This enables workflow conditions like:
255
249
  when: "mcp_called('gobby-memory', 'recall')"
256
250
 
251
+ Uses normalized fields from adapters:
252
+ - mcp_server: The MCP server name (normalized from both Claude and Gemini formats)
253
+ - mcp_tool: The tool name on the server (normalized from both formats)
254
+ - tool_output: The tool result (normalized from tool_result/tool_response)
255
+
257
256
  Args:
258
257
  event: The AFTER_TOOL hook event
259
258
  state: Current workflow state (modified in place)
@@ -261,21 +260,36 @@ def detect_mcp_call(event: "HookEvent", state: "WorkflowState") -> None:
261
260
  if not event.data:
262
261
  return
263
262
 
264
- tool_name = event.data.get("tool_name", "")
265
- tool_input = event.data.get("tool_input", {}) or {}
266
- # Claude Code sends "tool_result", but we also check "tool_output" for compatibility
267
- tool_output = event.data.get("tool_result") or event.data.get("tool_output") or {}
263
+ # Use normalized fields from adapter layer
264
+ # Adapters extract these from CLI-specific formats:
265
+ # - Claude: tool_input.server_name/tool_name mcp_server/mcp_tool
266
+ # - Gemini: mcp_context.server_name/tool_name mcp_server/mcp_tool
267
+ server_name = event.data.get("mcp_server", "")
268
+ inner_tool = event.data.get("mcp_tool", "")
268
269
 
269
- # Check for MCP proxy call
270
- if tool_name not in ("call_tool", "mcp__gobby__call_tool"):
270
+ if not server_name or not inner_tool:
271
271
  return
272
272
 
273
- server_name = tool_input.get("server_name", "")
274
- inner_tool = tool_input.get("tool_name", "")
273
+ # Use normalized tool_output (adapters normalize tool_result/tool_response)
274
+ tool_output = event.data.get("tool_output") or {}
275
275
 
276
- if not server_name or not inner_tool:
277
- return
276
+ _track_mcp_call(state, server_name, inner_tool, tool_output)
277
+
278
+
279
+ def _track_mcp_call(
280
+ state: "WorkflowState",
281
+ server_name: str,
282
+ inner_tool: str,
283
+ tool_output: dict[str, Any] | Any,
284
+ ) -> None:
285
+ """Track a successful MCP call in workflow state.
278
286
 
287
+ Args:
288
+ state: Current workflow state (modified in place)
289
+ server_name: MCP server name (e.g., "gobby-sessions")
290
+ inner_tool: Tool name on the server (e.g., "get_current_session")
291
+ tool_output: Tool output to check for errors
292
+ """
279
293
  # Check if call succeeded (skip tracking failed calls)
280
294
  if isinstance(tool_output, dict):
281
295
  if tool_output.get("error") or tool_output.get("status") == "error":
@@ -4,7 +4,12 @@ This package provides actions that enforce task tracking before allowing
4
4
  certain tools, and enforce task completion before allowing agent to stop.
5
5
  """
6
6
 
7
- from gobby.workflows.enforcement.blocking import block_tools
7
+ from gobby.workflows.enforcement.blocking import (
8
+ block_tools,
9
+ is_discovery_tool,
10
+ is_tool_unlocked,
11
+ track_schema_lookup,
12
+ )
8
13
  from gobby.workflows.enforcement.commit_policy import (
9
14
  capture_baseline_dirty_files,
10
15
  require_commit_before_stop,
@@ -17,6 +22,7 @@ from gobby.workflows.enforcement.handlers import (
17
22
  handle_require_commit_before_stop,
18
23
  handle_require_task_complete,
19
24
  handle_require_task_review_or_close_before_stop,
25
+ handle_track_schema_lookup,
20
26
  handle_validate_session_task_scope,
21
27
  )
22
28
  from gobby.workflows.enforcement.task_policy import (
@@ -28,6 +34,9 @@ from gobby.workflows.enforcement.task_policy import (
28
34
  __all__ = [
29
35
  # Blocking
30
36
  "block_tools",
37
+ "is_discovery_tool",
38
+ "is_tool_unlocked",
39
+ "track_schema_lookup",
31
40
  # Commit policy
32
41
  "capture_baseline_dirty_files",
33
42
  "require_commit_before_stop",
@@ -43,5 +52,6 @@ __all__ = [
43
52
  "handle_require_commit_before_stop",
44
53
  "handle_require_task_complete",
45
54
  "handle_require_task_review_or_close_before_stop",
55
+ "handle_track_schema_lookup",
46
56
  "handle_validate_session_task_scope",
47
57
  ]
@@ -5,12 +5,14 @@ Provides configurable tool blocking based on workflow state and conditions.
5
5
 
6
6
  from __future__ import annotations
7
7
 
8
+ import json
8
9
  import logging
9
10
  from collections.abc import Callable
10
11
  from typing import TYPE_CHECKING, Any
11
12
 
12
13
  from gobby.workflows.git_utils import get_dirty_files
13
14
  from gobby.workflows.safe_evaluator import LazyBool, SafeExpressionEvaluator
15
+ from gobby.workflows.templates import TemplateEngine
14
16
 
15
17
  if TYPE_CHECKING:
16
18
  from gobby.storage.tasks import LocalTaskManager
@@ -18,6 +20,89 @@ if TYPE_CHECKING:
18
20
 
19
21
  logger = logging.getLogger(__name__)
20
22
 
23
+ # MCP discovery tools that don't require prior schema lookup
24
+ DISCOVERY_TOOLS = {
25
+ "list_mcp_servers",
26
+ "list_tools",
27
+ "get_tool_schema",
28
+ "search_tools",
29
+ "recommend_tools",
30
+ "list_skills",
31
+ "get_skill",
32
+ "search_skills",
33
+ }
34
+
35
+
36
+ def is_discovery_tool(tool_name: str | None) -> bool:
37
+ """Check if the tool is a discovery/introspection tool.
38
+
39
+ These tools are allowed without prior schema lookup since they ARE
40
+ the discovery mechanism.
41
+
42
+ Args:
43
+ tool_name: The MCP tool name (from tool_input.tool_name)
44
+
45
+ Returns:
46
+ True if this is a discovery tool that doesn't need schema unlock
47
+ """
48
+ return tool_name in DISCOVERY_TOOLS if tool_name else False
49
+
50
+
51
+ def is_tool_unlocked(
52
+ tool_input: dict[str, Any],
53
+ variables: dict[str, Any],
54
+ ) -> bool:
55
+ """Check if a tool has been unlocked via prior get_tool_schema call.
56
+
57
+ Args:
58
+ tool_input: The tool input containing server_name and tool_name
59
+ variables: Workflow state variables containing unlocked_tools list
60
+
61
+ Returns:
62
+ True if the server:tool combo was previously unlocked via get_tool_schema
63
+ """
64
+ server = tool_input.get("server_name", "")
65
+ tool = tool_input.get("tool_name", "")
66
+ if not server or not tool:
67
+ return False
68
+ key = f"{server}:{tool}"
69
+ unlocked = variables.get("unlocked_tools", [])
70
+ return key in unlocked
71
+
72
+
73
+ def track_schema_lookup(
74
+ tool_input: dict[str, Any],
75
+ workflow_state: WorkflowState | None,
76
+ ) -> dict[str, Any] | None:
77
+ """Track a successful get_tool_schema call by adding to unlocked_tools.
78
+
79
+ Called from on_after_tool when tool_name is get_tool_schema and succeeded.
80
+
81
+ Args:
82
+ tool_input: The tool input containing server_name and tool_name
83
+ workflow_state: Workflow state to update
84
+
85
+ Returns:
86
+ Dict with tracking result or None
87
+ """
88
+ if not workflow_state:
89
+ return None
90
+
91
+ server = tool_input.get("server_name", "")
92
+ tool = tool_input.get("tool_name", "")
93
+ if not server or not tool:
94
+ return None
95
+
96
+ key = f"{server}:{tool}"
97
+ unlocked = workflow_state.variables.setdefault("unlocked_tools", [])
98
+
99
+ if key not in unlocked:
100
+ unlocked.append(key)
101
+ logger.debug(f"Unlocked tool schema: {key}")
102
+ return {"unlocked": key, "total_unlocked": len(unlocked)}
103
+
104
+ return {"already_unlocked": key}
105
+
21
106
 
22
107
  def _is_plan_file(file_path: str, source: str | None = None) -> bool:
23
108
  """Check if file path is a Claude Code plan file (platform-agnostic).
@@ -98,6 +183,8 @@ def _evaluate_block_condition(
98
183
  # Allowed functions for safe evaluation
99
184
  allowed_funcs: dict[str, Callable[..., Any]] = {
100
185
  "is_plan_file": _is_plan_file,
186
+ "is_discovery_tool": is_discovery_tool,
187
+ "is_tool_unlocked": lambda ti: is_tool_unlocked(ti, variables),
101
188
  "bool": bool,
102
189
  "str": str,
103
190
  "int": int,
@@ -241,7 +328,18 @@ async def block_tools(
241
328
  if mcp_key in mcp_tools:
242
329
  rule_matches = True
243
330
  # For MCP tools, the actual arguments are in tool_input.arguments
244
- mcp_tool_args = tool_input.get("arguments", {}) or {}
331
+ # Arguments may be a JSON string (Claude Code serialization) or dict
332
+ raw_args = tool_input.get("arguments")
333
+ if isinstance(raw_args, str):
334
+ try:
335
+ parsed = json.loads(raw_args)
336
+ mcp_tool_args = parsed if isinstance(parsed, dict) else {}
337
+ except (json.JSONDecodeError, TypeError):
338
+ mcp_tool_args = {}
339
+ elif isinstance(raw_args, dict):
340
+ mcp_tool_args = raw_args
341
+ else:
342
+ mcp_tool_args = {}
245
343
 
246
344
  if not rule_matches:
247
345
  continue
@@ -263,6 +361,16 @@ async def block_tools(
263
361
  continue
264
362
 
265
363
  reason = rule.get("reason", f"Tool '{tool_name}' is blocked.")
364
+
365
+ # Render Jinja2 template variables in reason message
366
+ if "{{" in reason:
367
+ try:
368
+ engine = TemplateEngine()
369
+ reason = engine.render(reason, {"tool_input": tool_input})
370
+ except Exception as e:
371
+ logger.warning(f"Failed to render reason template: {e}")
372
+ # Keep original reason on failure
373
+
266
374
  logger.info(f"block_tools: Blocking '{tool_name}' - {reason[:100]}")
267
375
  return {"decision": "block", "reason": reason}
268
376
 
@@ -9,7 +9,7 @@ from __future__ import annotations
9
9
  import logging
10
10
  from typing import TYPE_CHECKING, Any
11
11
 
12
- from gobby.workflows.enforcement.blocking import block_tools
12
+ from gobby.workflows.enforcement.blocking import block_tools, track_schema_lookup
13
13
  from gobby.workflows.enforcement.commit_policy import (
14
14
  capture_baseline_dirty_files,
15
15
  require_commit_before_stop,
@@ -33,6 +33,7 @@ __all__ = [
33
33
  "handle_require_commit_before_stop",
34
34
  "handle_require_task_complete",
35
35
  "handle_require_task_review_or_close_before_stop",
36
+ "handle_track_schema_lookup",
36
37
  "handle_validate_session_task_scope",
37
38
  ]
38
39
 
@@ -267,3 +268,36 @@ async def handle_require_task_complete(
267
268
  project_id=project_id,
268
269
  workflow_state=context.state,
269
270
  )
271
+
272
+
273
+ async def handle_track_schema_lookup(
274
+ context: Any,
275
+ task_manager: LocalTaskManager | None = None,
276
+ **kwargs: Any,
277
+ ) -> dict[str, Any] | None:
278
+ """ActionHandler wrapper for track_schema_lookup.
279
+
280
+ Tracks successful get_tool_schema calls to unlock tools for call_tool.
281
+ Should be triggered on on_after_tool when the tool is get_tool_schema.
282
+ """
283
+ if not context.event_data:
284
+ return None
285
+
286
+ tool_name = context.event_data.get("tool_name", "")
287
+ is_failure = context.event_data.get("is_failure", False)
288
+
289
+ # Only track successful get_tool_schema calls
290
+ # Handle both native MCP format and Gobby proxy format
291
+ if tool_name not in ("get_tool_schema", "mcp__gobby__get_tool_schema"):
292
+ return None
293
+
294
+ if is_failure:
295
+ return None
296
+
297
+ # Extract tool_input - for MCP proxy, it's in tool_input directly
298
+ tool_input = context.event_data.get("tool_input", {}) or {}
299
+
300
+ return track_schema_lookup(
301
+ tool_input=tool_input,
302
+ workflow_state=context.state,
303
+ )