gobby 0.2.5__py3-none-any.whl → 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. gobby/adapters/claude_code.py +13 -4
  2. gobby/adapters/codex.py +43 -3
  3. gobby/agents/runner.py +8 -0
  4. gobby/cli/__init__.py +6 -0
  5. gobby/cli/clones.py +419 -0
  6. gobby/cli/conductor.py +266 -0
  7. gobby/cli/installers/antigravity.py +3 -9
  8. gobby/cli/installers/claude.py +9 -9
  9. gobby/cli/installers/codex.py +2 -8
  10. gobby/cli/installers/gemini.py +2 -8
  11. gobby/cli/installers/shared.py +71 -8
  12. gobby/cli/skills.py +858 -0
  13. gobby/cli/tasks/ai.py +0 -440
  14. gobby/cli/tasks/crud.py +44 -6
  15. gobby/cli/tasks/main.py +0 -4
  16. gobby/cli/tui.py +2 -2
  17. gobby/cli/utils.py +3 -3
  18. gobby/clones/__init__.py +13 -0
  19. gobby/clones/git.py +547 -0
  20. gobby/conductor/__init__.py +16 -0
  21. gobby/conductor/alerts.py +135 -0
  22. gobby/conductor/loop.py +164 -0
  23. gobby/conductor/monitors/__init__.py +11 -0
  24. gobby/conductor/monitors/agents.py +116 -0
  25. gobby/conductor/monitors/tasks.py +155 -0
  26. gobby/conductor/pricing.py +234 -0
  27. gobby/conductor/token_tracker.py +160 -0
  28. gobby/config/app.py +63 -1
  29. gobby/config/search.py +110 -0
  30. gobby/config/servers.py +1 -1
  31. gobby/config/skills.py +43 -0
  32. gobby/config/tasks.py +6 -14
  33. gobby/hooks/event_handlers.py +145 -2
  34. gobby/hooks/hook_manager.py +48 -2
  35. gobby/hooks/skill_manager.py +130 -0
  36. gobby/install/claude/hooks/hook_dispatcher.py +4 -4
  37. gobby/install/codex/hooks/hook_dispatcher.py +1 -1
  38. gobby/install/gemini/hooks/hook_dispatcher.py +87 -12
  39. gobby/llm/claude.py +22 -34
  40. gobby/llm/claude_executor.py +46 -256
  41. gobby/llm/codex_executor.py +59 -291
  42. gobby/llm/executor.py +21 -0
  43. gobby/llm/gemini.py +134 -110
  44. gobby/llm/litellm_executor.py +143 -6
  45. gobby/llm/resolver.py +95 -33
  46. gobby/mcp_proxy/instructions.py +54 -0
  47. gobby/mcp_proxy/models.py +15 -0
  48. gobby/mcp_proxy/registries.py +68 -5
  49. gobby/mcp_proxy/server.py +33 -3
  50. gobby/mcp_proxy/services/tool_proxy.py +81 -1
  51. gobby/mcp_proxy/stdio.py +2 -1
  52. gobby/mcp_proxy/tools/__init__.py +0 -2
  53. gobby/mcp_proxy/tools/agent_messaging.py +317 -0
  54. gobby/mcp_proxy/tools/clones.py +903 -0
  55. gobby/mcp_proxy/tools/memory.py +1 -24
  56. gobby/mcp_proxy/tools/metrics.py +65 -1
  57. gobby/mcp_proxy/tools/orchestration/__init__.py +3 -0
  58. gobby/mcp_proxy/tools/orchestration/cleanup.py +151 -0
  59. gobby/mcp_proxy/tools/orchestration/wait.py +467 -0
  60. gobby/mcp_proxy/tools/session_messages.py +1 -2
  61. gobby/mcp_proxy/tools/skills/__init__.py +631 -0
  62. gobby/mcp_proxy/tools/task_orchestration.py +7 -0
  63. gobby/mcp_proxy/tools/task_readiness.py +14 -0
  64. gobby/mcp_proxy/tools/task_sync.py +1 -1
  65. gobby/mcp_proxy/tools/tasks/_context.py +0 -20
  66. gobby/mcp_proxy/tools/tasks/_crud.py +91 -4
  67. gobby/mcp_proxy/tools/tasks/_expansion.py +348 -0
  68. gobby/mcp_proxy/tools/tasks/_factory.py +6 -16
  69. gobby/mcp_proxy/tools/tasks/_lifecycle.py +60 -29
  70. gobby/mcp_proxy/tools/tasks/_lifecycle_validation.py +18 -29
  71. gobby/mcp_proxy/tools/workflows.py +1 -1
  72. gobby/mcp_proxy/tools/worktrees.py +5 -0
  73. gobby/memory/backends/__init__.py +6 -1
  74. gobby/memory/backends/mem0.py +6 -1
  75. gobby/memory/extractor.py +477 -0
  76. gobby/memory/manager.py +11 -2
  77. gobby/prompts/defaults/handoff/compact.md +63 -0
  78. gobby/prompts/defaults/handoff/session_end.md +57 -0
  79. gobby/prompts/defaults/memory/extract.md +61 -0
  80. gobby/runner.py +37 -16
  81. gobby/search/__init__.py +48 -6
  82. gobby/search/backends/__init__.py +159 -0
  83. gobby/search/backends/embedding.py +225 -0
  84. gobby/search/embeddings.py +238 -0
  85. gobby/search/models.py +148 -0
  86. gobby/search/unified.py +496 -0
  87. gobby/servers/http.py +23 -8
  88. gobby/servers/routes/admin.py +280 -0
  89. gobby/servers/routes/mcp/tools.py +241 -52
  90. gobby/servers/websocket.py +2 -2
  91. gobby/sessions/analyzer.py +2 -0
  92. gobby/sessions/transcripts/base.py +1 -0
  93. gobby/sessions/transcripts/claude.py +64 -5
  94. gobby/skills/__init__.py +91 -0
  95. gobby/skills/loader.py +685 -0
  96. gobby/skills/manager.py +384 -0
  97. gobby/skills/parser.py +258 -0
  98. gobby/skills/search.py +463 -0
  99. gobby/skills/sync.py +119 -0
  100. gobby/skills/updater.py +385 -0
  101. gobby/skills/validator.py +368 -0
  102. gobby/storage/clones.py +378 -0
  103. gobby/storage/database.py +1 -1
  104. gobby/storage/memories.py +43 -13
  105. gobby/storage/migrations.py +180 -6
  106. gobby/storage/sessions.py +73 -0
  107. gobby/storage/skills.py +749 -0
  108. gobby/storage/tasks/_crud.py +4 -4
  109. gobby/storage/tasks/_lifecycle.py +41 -6
  110. gobby/storage/tasks/_manager.py +14 -5
  111. gobby/storage/tasks/_models.py +8 -3
  112. gobby/sync/memories.py +39 -4
  113. gobby/sync/tasks.py +83 -6
  114. gobby/tasks/__init__.py +1 -2
  115. gobby/tasks/validation.py +24 -15
  116. gobby/tui/api_client.py +4 -7
  117. gobby/tui/app.py +5 -3
  118. gobby/tui/screens/orchestrator.py +1 -2
  119. gobby/tui/screens/tasks.py +2 -4
  120. gobby/tui/ws_client.py +1 -1
  121. gobby/utils/daemon_client.py +2 -2
  122. gobby/workflows/actions.py +84 -2
  123. gobby/workflows/context_actions.py +43 -0
  124. gobby/workflows/detection_helpers.py +115 -31
  125. gobby/workflows/engine.py +13 -2
  126. gobby/workflows/lifecycle_evaluator.py +29 -1
  127. gobby/workflows/loader.py +19 -6
  128. gobby/workflows/memory_actions.py +74 -0
  129. gobby/workflows/summary_actions.py +17 -0
  130. gobby/workflows/task_enforcement_actions.py +448 -6
  131. {gobby-0.2.5.dist-info → gobby-0.2.6.dist-info}/METADATA +82 -21
  132. {gobby-0.2.5.dist-info → gobby-0.2.6.dist-info}/RECORD +136 -107
  133. gobby/install/codex/prompts/forget.md +0 -7
  134. gobby/install/codex/prompts/memories.md +0 -7
  135. gobby/install/codex/prompts/recall.md +0 -7
  136. gobby/install/codex/prompts/remember.md +0 -13
  137. gobby/llm/gemini_executor.py +0 -339
  138. gobby/mcp_proxy/tools/task_expansion.py +0 -591
  139. gobby/tasks/context.py +0 -747
  140. gobby/tasks/criteria.py +0 -342
  141. gobby/tasks/expansion.py +0 -626
  142. gobby/tasks/prompts/expand.py +0 -327
  143. gobby/tasks/research.py +0 -421
  144. gobby/tasks/tdd.py +0 -352
  145. {gobby-0.2.5.dist-info → gobby-0.2.6.dist-info}/WHEEL +0 -0
  146. {gobby-0.2.5.dist-info → gobby-0.2.6.dist-info}/entry_points.txt +0 -0
  147. {gobby-0.2.5.dist-info → gobby-0.2.6.dist-info}/licenses/LICENSE.md +0 -0
  148. {gobby-0.2.5.dist-info → gobby-0.2.6.dist-info}/top_level.txt +0 -0
@@ -28,6 +28,7 @@ from gobby.workflows.git_utils import get_file_changes, get_git_status, get_rece
28
28
  from gobby.workflows.llm_actions import call_llm
29
29
  from gobby.workflows.mcp_actions import call_mcp_tool
30
30
  from gobby.workflows.memory_actions import (
31
+ memory_extract,
31
32
  memory_recall_relevant,
32
33
  memory_save,
33
34
  memory_sync_export,
@@ -58,6 +59,7 @@ from gobby.workflows.summary_actions import (
58
59
  synthesize_title,
59
60
  )
60
61
  from gobby.workflows.task_enforcement_actions import (
62
+ block_tools,
61
63
  capture_baseline_dirty_files,
62
64
  require_active_task,
63
65
  require_commit_before_stop,
@@ -226,6 +228,7 @@ class ActionExecutor:
226
228
  self.register("memory_recall_relevant", self._handle_memory_recall_relevant)
227
229
  self.register("memory_sync_import", self._handle_memory_sync_import)
228
230
  self.register("memory_sync_export", self._handle_memory_sync_export)
231
+ self.register("memory_extract", self._handle_memory_extract)
229
232
  self.register(
230
233
  "reset_memory_injection_tracking", self._handle_reset_memory_injection_tracking
231
234
  )
@@ -236,6 +239,7 @@ class ActionExecutor:
236
239
  self.register("start_new_session", self._handle_start_new_session)
237
240
  self.register("mark_loop_complete", self._handle_mark_loop_complete)
238
241
  # Task enforcement
242
+ self.register("block_tools", self._handle_block_tools)
239
243
  self.register("require_active_task", self._handle_require_active_task)
240
244
  self.register("require_commit_before_stop", self._handle_require_commit_before_stop)
241
245
  self.register(
@@ -658,6 +662,8 @@ class ActionExecutor:
658
662
 
659
663
  For compact mode, fetches the current session's existing summary_markdown
660
664
  as previous_summary for cumulative compression.
665
+
666
+ Supports loading prompts from the prompts collection via the 'prompt' parameter.
661
667
  """
662
668
  # Detect mode from kwargs or event data
663
669
  mode = kwargs.get("mode", "clear")
@@ -683,12 +689,27 @@ class ActionExecutor:
683
689
  f"for cumulative compression"
684
690
  )
685
691
 
692
+ # Load template from prompts collection if 'prompt' parameter provided
693
+ template = kwargs.get("template")
694
+ prompt_path = kwargs.get("prompt")
695
+ if prompt_path and not template:
696
+ try:
697
+ from gobby.prompts.loader import PromptLoader
698
+
699
+ loader = PromptLoader()
700
+ prompt_template = loader.load(prompt_path)
701
+ template = prompt_template.content
702
+ logger.debug(f"Loaded prompt template from: {prompt_path}")
703
+ except Exception as e:
704
+ logger.warning(f"Failed to load prompt from {prompt_path}: {e}")
705
+ # Fall back to inline template or default
706
+
686
707
  return await generate_handoff(
687
708
  session_manager=context.session_manager,
688
709
  session_id=context.session_id,
689
710
  llm_service=context.llm_service,
690
711
  transcript_processor=context.transcript_processor,
691
- template=kwargs.get("template"),
712
+ template=template,
692
713
  previous_summary=previous_summary,
693
714
  mode=mode,
694
715
  )
@@ -780,6 +801,30 @@ class ActionExecutor:
780
801
  """Reset memory injection tracking to allow re-injection after context loss."""
781
802
  return reset_memory_injection_tracking(state=context.state)
782
803
 
804
+ async def _handle_memory_extract(
805
+ self, context: ActionContext, **kwargs: Any
806
+ ) -> dict[str, Any] | None:
807
+ """Extract memories from the current session.
808
+
809
+ Args (via kwargs):
810
+ min_importance: Minimum importance threshold (default: 0.7)
811
+ max_memories: Maximum memories to extract (default: 5)
812
+ dry_run: If True, don't store memories (default: False)
813
+
814
+ Returns:
815
+ Dict with extracted_count and optional memory details
816
+ """
817
+ return await memory_extract(
818
+ session_manager=context.session_manager,
819
+ session_id=context.session_id,
820
+ llm_service=context.llm_service,
821
+ memory_manager=context.memory_manager,
822
+ transcript_processor=context.transcript_processor,
823
+ min_importance=kwargs.get("min_importance", 0.7),
824
+ max_memories=kwargs.get("max_memories", 5),
825
+ dry_run=kwargs.get("dry_run", False),
826
+ )
827
+
783
828
  async def _handle_mark_session_status(
784
829
  self, context: ActionContext, **kwargs: Any
785
830
  ) -> dict[str, Any] | None:
@@ -840,10 +885,47 @@ class ActionExecutor:
840
885
  project_path=project_path,
841
886
  )
842
887
 
888
+ async def _handle_block_tools(
889
+ self, context: ActionContext, **kwargs: Any
890
+ ) -> dict[str, Any] | None:
891
+ """Block tools based on configurable rules.
892
+
893
+ This is the unified tool blocking action that replaces require_active_task
894
+ for CC native task blocking while also supporting task-before-edit enforcement.
895
+
896
+ For MCP tool blocking (mcp_tools rules), also passes:
897
+ - project_path: for checking dirty files in git status
898
+ - task_manager: for checking if claimed task has commits
899
+ - source: CLI source for is_plan_file checks
900
+ """
901
+ # Get project_path for git dirty file checks
902
+ project_path = kwargs.get("project_path")
903
+ if not project_path and context.event_data:
904
+ project_path = context.event_data.get("cwd")
905
+
906
+ # Get source from session for is_plan_file checks
907
+ source = None
908
+ current_session = context.session_manager.get(context.session_id)
909
+ if current_session:
910
+ source = current_session.source
911
+
912
+ return await block_tools(
913
+ rules=kwargs.get("rules"),
914
+ event_data=context.event_data,
915
+ workflow_state=context.state,
916
+ project_path=project_path,
917
+ task_manager=self.task_manager,
918
+ source=source,
919
+ )
920
+
843
921
  async def _handle_require_active_task(
844
922
  self, context: ActionContext, **kwargs: Any
845
923
  ) -> dict[str, Any] | None:
846
- """Check for active task before allowing protected tools."""
924
+ """Check for active task before allowing protected tools.
925
+
926
+ DEPRECATED: Use block_tools action with rules instead.
927
+ Kept for backward compatibility with existing workflows.
928
+ """
847
929
  # Get project_id from session for project-scoped task filtering
848
930
  current_session = context.session_manager.get(context.session_id)
849
931
  project_id = current_session.project_id if current_session else None
@@ -304,6 +304,17 @@ def extract_handoff_context(
304
304
  except Exception as wt_err:
305
305
  logger.debug(f"Failed to get worktree context: {wt_err}")
306
306
 
307
+ # Add active skills from HookSkillManager
308
+ try:
309
+ from gobby.hooks.skill_manager import HookSkillManager
310
+
311
+ skill_manager = HookSkillManager()
312
+ core_skills = skill_manager.discover_core_skills()
313
+ always_apply_skills = [s.name for s in core_skills if s.is_always_apply()]
314
+ handoff_ctx.active_skills = always_apply_skills
315
+ except Exception as skill_err:
316
+ logger.debug(f"Failed to get active skills: {skill_err}")
317
+
307
318
  # Format as markdown (like /clear stores formatted summary)
308
319
  markdown = format_handoff_as_markdown(handoff_ctx)
309
320
 
@@ -320,6 +331,32 @@ def extract_handoff_context(
320
331
  return {"error": str(e)}
321
332
 
322
333
 
334
+ def recommend_skills_for_task(task: dict[str, Any] | None) -> list[str]:
335
+ """Recommend relevant skills based on task category.
336
+
337
+ Uses HookSkillManager to get skill recommendations based on the task's
338
+ category field. Returns always-apply skills if no category is set.
339
+
340
+ Args:
341
+ task: Task dict with optional 'category' field, or None.
342
+
343
+ Returns:
344
+ List of recommended skill names for this task.
345
+ """
346
+ if task is None:
347
+ return []
348
+
349
+ try:
350
+ from gobby.hooks.skill_manager import HookSkillManager
351
+
352
+ manager = HookSkillManager()
353
+ category = task.get("category")
354
+ return manager.recommend_skills(category=category)
355
+ except Exception as e:
356
+ logger.debug(f"Failed to recommend skills: {e}")
357
+ return []
358
+
359
+
323
360
  def format_handoff_as_markdown(ctx: Any, prompt_template: str | None = None) -> str:
324
361
  """Format HandoffContext as markdown for storage.
325
362
 
@@ -391,4 +428,10 @@ def format_handoff_as_markdown(ctx: Any, prompt_template: str | None = None) ->
391
428
  lines.append(f"- {activity}")
392
429
  sections.append("\n".join(lines))
393
430
 
431
+ # Active skills section
432
+ if hasattr(ctx, "active_skills") and ctx.active_skills:
433
+ lines = ["### Active Skills"]
434
+ lines.append(f"Skills available: {', '.join(ctx.active_skills)}")
435
+ sections.append("\n".join(lines))
436
+
394
437
  return "\n\n".join(sections)
@@ -11,6 +11,7 @@ from typing import TYPE_CHECKING
11
11
 
12
12
  if TYPE_CHECKING:
13
13
  from gobby.hooks.events import HookEvent
14
+ from gobby.storage.tasks import LocalTaskManager
14
15
  from gobby.tasks.session_tasks import SessionTaskManager
15
16
 
16
17
  from .definitions import WorkflowState
@@ -22,6 +23,7 @@ def detect_task_claim(
22
23
  event: "HookEvent",
23
24
  state: "WorkflowState",
24
25
  session_task_manager: "SessionTaskManager | None" = None,
26
+ task_manager: "LocalTaskManager | None" = None,
25
27
  ) -> None:
26
28
  """Detect gobby-tasks calls that claim or release a task for this session.
27
29
 
@@ -44,7 +46,8 @@ def detect_task_claim(
44
46
 
45
47
  tool_name = event.data.get("tool_name", "")
46
48
  tool_input = event.data.get("tool_input", {}) or {}
47
- tool_output = event.data.get("tool_output", {}) or {}
49
+ # Claude Code sends "tool_result", but we also check "tool_output" for compatibility
50
+ tool_output = event.data.get("tool_result") or event.data.get("tool_output") or {}
48
51
 
49
52
  # Check if this is a gobby-tasks call via MCP proxy
50
53
  # Tool name could be "call_tool" (from legacy) or "mcp__gobby__call_tool" (direct)
@@ -58,7 +61,34 @@ def detect_task_claim(
58
61
 
59
62
  # Check inner tool name
60
63
  inner_tool_name = tool_input.get("tool_name", "")
61
- if inner_tool_name not in ("create_task", "update_task", "close_task"):
64
+
65
+ # Handle close_task - clears task_claimed when task is closed
66
+ # Note: Claude Code doesn't include tool_result in post-tool-use hooks, so for CC
67
+ # the workflow state is updated directly in the MCP proxy's close_task function.
68
+ # This detection provides a fallback for CLIs that do report tool results (Gemini/Codex).
69
+ if inner_tool_name == "close_task":
70
+ tool_output = event.data.get("tool_result") or event.data.get("tool_output") or {}
71
+
72
+ # If no tool output, skip - can't verify success
73
+ # The MCP proxy's close_task handles state clearing for successful closes
74
+ if not tool_output:
75
+ return
76
+
77
+ # Check if close succeeded (not an error)
78
+ if isinstance(tool_output, dict):
79
+ if tool_output.get("error") or tool_output.get("status") == "error":
80
+ return
81
+ result = tool_output.get("result", {})
82
+ if isinstance(result, dict) and result.get("error"):
83
+ return
84
+
85
+ # Clear task_claimed on successful close
86
+ state.variables["task_claimed"] = False
87
+ state.variables["claimed_task_id"] = None
88
+ logger.info(f"Session {state.session_id}: task_claimed=False (detected close_task success)")
89
+ return
90
+
91
+ if inner_tool_name not in ("create_task", "update_task", "claim_task"):
62
92
  return
63
93
 
64
94
  # For update_task, only count if status is being set to in_progress
@@ -66,11 +96,9 @@ def detect_task_claim(
66
96
  arguments = tool_input.get("arguments", {}) or {}
67
97
  if arguments.get("status") != "in_progress":
68
98
  return
99
+ # claim_task always counts (it sets status to in_progress internally)
69
100
 
70
- # For close_task, we'll clear task_claimed after success check
71
- is_close_task = inner_tool_name == "close_task"
72
-
73
- # Check if the call succeeded (not an error)
101
+ # Check if the call succeeded (not an error) - for non-close_task operations
74
102
  # tool_output structure varies, but errors typically have "error" key
75
103
  # or the MCP response has "status": "error"
76
104
  if isinstance(tool_output, dict):
@@ -81,35 +109,26 @@ def detect_task_claim(
81
109
  if isinstance(result, dict) and result.get("error"):
82
110
  return
83
111
 
84
- # Handle close_task - clear the claim only if closing the claimed task
85
- if is_close_task:
86
- arguments = tool_input.get("arguments", {}) or {}
87
- closed_task_id = arguments.get("task_id")
88
- claimed_task_id = state.variables.get("claimed_task_id")
89
-
90
- # Only clear task_claimed if we're closing the task that was claimed
91
- if closed_task_id and claimed_task_id and closed_task_id == claimed_task_id:
92
- state.variables["task_claimed"] = False
93
- state.variables["claimed_task_id"] = None
94
- logger.info(
95
- f"Session {state.session_id}: task_claimed=False "
96
- f"(claimed task {closed_task_id} closed via close_task)"
97
- )
98
- else:
99
- logger.debug(
100
- f"Session {state.session_id}: close_task for {closed_task_id} "
101
- f"(claimed: {claimed_task_id}) - not clearing task_claimed"
102
- )
103
- return
104
-
105
112
  # Extract task_id based on tool type
106
113
  arguments = tool_input.get("arguments", {}) or {}
107
- if inner_tool_name == "update_task":
114
+ if inner_tool_name in ("update_task", "claim_task"):
108
115
  task_id = arguments.get("task_id")
116
+ # Resolve to UUID for consistent comparison with close_task
117
+ if task_id and task_manager:
118
+ try:
119
+ task = task_manager.get_task(task_id)
120
+ if task:
121
+ task_id = task.id # Use UUID
122
+ except Exception: # nosec B110 - best effort resolution, keep original if fails
123
+ pass
109
124
  elif inner_tool_name == "create_task":
110
125
  # For create_task, the id is in the result
111
126
  result = tool_output.get("result", {}) if isinstance(tool_output, dict) else {}
112
127
  task_id = result.get("id") if isinstance(result, dict) else None
128
+ # Skip if we can't get the task ID (e.g., Claude Code doesn't include tool results)
129
+ # The MCP tool itself handles state updates in this case via _crud.py
130
+ if not task_id:
131
+ return
113
132
  else:
114
133
  task_id = None
115
134
 
@@ -121,8 +140,8 @@ def detect_task_claim(
121
140
  f"(via {inner_tool_name})"
122
141
  )
123
142
 
124
- # Auto-link task to session when status is set to in_progress
125
- if inner_tool_name == "update_task":
143
+ # Auto-link task to session when claiming a task
144
+ if inner_tool_name in ("update_task", "claim_task"):
126
145
  arguments = tool_input.get("arguments", {}) or {}
127
146
  task_id = arguments.get("task_id")
128
147
  if task_id and session_task_manager:
@@ -159,6 +178,70 @@ def detect_plan_mode(event: "HookEvent", state: "WorkflowState") -> None:
159
178
  logger.info(f"Session {state.session_id}: plan_mode=False (exited plan mode)")
160
179
 
161
180
 
181
+ def detect_plan_mode_from_context(event: "HookEvent", state: "WorkflowState") -> None:
182
+ """Detect plan mode from system reminders injected by Claude Code.
183
+
184
+ Claude Code injects system reminders like "Plan mode is active" when the user
185
+ enters plan mode via the UI (not via the EnterPlanMode tool). This function
186
+ detects those reminders and sets the plan_mode variable accordingly.
187
+
188
+ IMPORTANT: Only matches indicators within <system-reminder> tags to avoid
189
+ false positives from handoff context or user messages that mention plan mode.
190
+
191
+ This complements detect_plan_mode() which only catches programmatic tool calls.
192
+
193
+ Args:
194
+ event: The BEFORE_AGENT hook event (contains user prompt with system reminders)
195
+ state: Current workflow state (modified in place)
196
+ """
197
+ if not event.data:
198
+ return
199
+
200
+ # Check for plan mode system reminder in the prompt
201
+ prompt = event.data.get("prompt", "") or ""
202
+
203
+ # Extract only content within <system-reminder> tags to avoid false positives
204
+ # from handoff context or user messages mentioning plan mode
205
+ import re
206
+
207
+ system_reminders = re.findall(r"<system-reminder>(.*?)</system-reminder>", prompt, re.DOTALL)
208
+ reminder_text = " ".join(system_reminders)
209
+
210
+ # Claude Code injects these phrases in system reminders when plan mode is active
211
+ plan_mode_indicators = [
212
+ "Plan mode is active",
213
+ "Plan mode still active",
214
+ "You are in plan mode",
215
+ ]
216
+
217
+ # Check if plan mode is indicated in system reminders only
218
+ for indicator in plan_mode_indicators:
219
+ if indicator in reminder_text:
220
+ if not state.variables.get("plan_mode"):
221
+ state.variables["plan_mode"] = True
222
+ logger.info(
223
+ f"Session {state.session_id}: plan_mode=True "
224
+ f"(detected from system reminder: '{indicator}')"
225
+ )
226
+ return
227
+
228
+ # Detect exit from plan mode (also only in system reminders)
229
+ exit_indicators = [
230
+ "Exited Plan Mode",
231
+ "Plan mode exited",
232
+ ]
233
+
234
+ for indicator in exit_indicators:
235
+ if indicator in reminder_text:
236
+ if state.variables.get("plan_mode"):
237
+ state.variables["plan_mode"] = False
238
+ logger.info(
239
+ f"Session {state.session_id}: plan_mode=False "
240
+ f"(detected from system reminder: '{indicator}')"
241
+ )
242
+ return
243
+
244
+
162
245
  def detect_mcp_call(event: "HookEvent", state: "WorkflowState") -> None:
163
246
  """Track MCP tool calls by server/tool for workflow conditions.
164
247
 
@@ -180,7 +263,8 @@ def detect_mcp_call(event: "HookEvent", state: "WorkflowState") -> None:
180
263
 
181
264
  tool_name = event.data.get("tool_name", "")
182
265
  tool_input = event.data.get("tool_input", {}) or {}
183
- tool_output = event.data.get("tool_output", {}) or {}
266
+ # Claude Code sends "tool_result", but we also check "tool_output" for compatibility
267
+ tool_output = event.data.get("tool_result") or event.data.get("tool_output") or {}
184
268
 
185
269
  # Check for MCP proxy call
186
270
  if tool_name not in ("call_tool", "mcp__gobby__call_tool"):
gobby/workflows/engine.py CHANGED
@@ -15,7 +15,12 @@ from .audit_helpers import (
15
15
  log_transition,
16
16
  )
17
17
  from .definitions import WorkflowDefinition, WorkflowState
18
- from .detection_helpers import detect_mcp_call, detect_plan_mode, detect_task_claim
18
+ from .detection_helpers import (
19
+ detect_mcp_call,
20
+ detect_plan_mode,
21
+ detect_plan_mode_from_context,
22
+ detect_task_claim,
23
+ )
19
24
  from .evaluator import ConditionEvaluator
20
25
  from .lifecycle_evaluator import (
21
26
  evaluate_all_lifecycle_workflows as _evaluate_all_lifecycle_workflows,
@@ -375,6 +380,7 @@ class WorkflowEngine:
375
380
  evaluator=self.evaluator,
376
381
  detect_task_claim_fn=self._detect_task_claim,
377
382
  detect_plan_mode_fn=self._detect_plan_mode,
383
+ detect_plan_mode_from_context_fn=self._detect_plan_mode_from_context,
378
384
  check_premature_stop_fn=self._check_premature_stop,
379
385
  context_data=context_data,
380
386
  )
@@ -474,12 +480,17 @@ class WorkflowEngine:
474
480
  def _detect_task_claim(self, event: HookEvent, state: WorkflowState) -> None:
475
481
  """Detect gobby-tasks calls that claim or release a task for this session."""
476
482
  session_task_manager = getattr(self.action_executor, "session_task_manager", None)
477
- detect_task_claim(event, state, session_task_manager)
483
+ task_manager = getattr(self.action_executor, "task_manager", None)
484
+ detect_task_claim(event, state, session_task_manager, task_manager)
478
485
 
479
486
  def _detect_plan_mode(self, event: HookEvent, state: WorkflowState) -> None:
480
487
  """Detect Claude Code plan mode entry/exit and set workflow variable."""
481
488
  detect_plan_mode(event, state)
482
489
 
490
+ def _detect_plan_mode_from_context(self, event: HookEvent, state: WorkflowState) -> None:
491
+ """Detect plan mode from system reminders in user prompt."""
492
+ detect_plan_mode_from_context(event, state)
493
+
483
494
  def _detect_mcp_call(self, event: HookEvent, state: WorkflowState) -> None:
484
495
  """Track MCP tool calls by server/tool for workflow conditions."""
485
496
  detect_mcp_call(event, state)
@@ -439,6 +439,7 @@ async def evaluate_all_lifecycle_workflows(
439
439
  detect_plan_mode_fn: Any,
440
440
  check_premature_stop_fn: Any,
441
441
  context_data: dict[str, Any] | None = None,
442
+ detect_plan_mode_from_context_fn: Any | None = None,
442
443
  ) -> HookResponse:
443
444
  """
444
445
  Discover and evaluate all lifecycle workflows for the given event.
@@ -453,9 +454,10 @@ async def evaluate_all_lifecycle_workflows(
453
454
  action_executor: Action executor for running actions
454
455
  evaluator: Condition evaluator
455
456
  detect_task_claim_fn: Function to detect task claims
456
- detect_plan_mode_fn: Function to detect plan mode
457
+ detect_plan_mode_fn: Function to detect plan mode (from tool calls)
457
458
  check_premature_stop_fn: Async function to check premature stop
458
459
  context_data: Optional context data passed between actions
460
+ detect_plan_mode_from_context_fn: Function to detect plan mode from system reminders
459
461
 
460
462
  Returns:
461
463
  Merged HookResponse with combined context and first non-allow decision.
@@ -594,6 +596,21 @@ async def evaluate_all_lifecycle_workflows(
594
596
  detect_plan_mode_fn(event, state)
595
597
  state_manager.save_state(state)
596
598
 
599
+ # Detect plan mode from system reminders for BEFORE_AGENT events
600
+ # This catches plan mode when user enters via UI (not via EnterPlanMode tool)
601
+ if event.event_type == HookEventType.BEFORE_AGENT and detect_plan_mode_from_context_fn:
602
+ session_id = event.metadata.get("_platform_session_id")
603
+ if session_id:
604
+ state = state_manager.get_state(session_id)
605
+ if state is None:
606
+ state = WorkflowState(
607
+ session_id=session_id,
608
+ workflow_name="__lifecycle__",
609
+ step="",
610
+ )
611
+ detect_plan_mode_from_context_fn(event, state)
612
+ state_manager.save_state(state)
613
+
597
614
  # Check for premature stop in active step workflows on STOP events
598
615
  if event.event_type == HookEventType.STOP:
599
616
  premature_response = await check_premature_stop_fn(event, context_data)
@@ -610,4 +627,15 @@ async def evaluate_all_lifecycle_workflows(
610
627
  reason=final_reason,
611
628
  context="\n\n".join(all_context) if all_context else None,
612
629
  system_message=final_system_message,
630
+ metadata={
631
+ "discovered_workflows": [
632
+ {
633
+ "name": w.name,
634
+ "priority": w.priority,
635
+ "is_project": w.is_project,
636
+ "path": str(w.path),
637
+ }
638
+ for w in workflows
639
+ ]
640
+ },
613
641
  )
gobby/workflows/loader.py CHANGED
@@ -193,6 +193,7 @@ class WorkflowLoader:
193
193
  return self._discovery_cache[cache_key]
194
194
 
195
195
  discovered: dict[str, DiscoveredWorkflow] = {} # name -> workflow (for shadowing)
196
+ failed: dict[str, str] = {} # name -> error message for failed workflows
196
197
 
197
198
  # 1. Scan global lifecycle directory first (will be shadowed by project)
198
199
  for global_dir in self.global_dirs:
@@ -201,7 +202,14 @@ class WorkflowLoader:
201
202
  # 2. Scan project lifecycle directory (shadows global)
202
203
  if project_path:
203
204
  project_dir = Path(project_path) / ".gobby" / "workflows" / "lifecycle"
204
- self._scan_directory(project_dir, is_project=True, discovered=discovered)
205
+ self._scan_directory(project_dir, is_project=True, discovered=discovered, failed=failed)
206
+
207
+ # Log errors when project workflow fails but global exists (failed shadowing)
208
+ for name, error in failed.items():
209
+ if name in discovered and not discovered[name].is_project:
210
+ logger.error(
211
+ f"Project workflow '{name}' failed to load, using global instead: {error}"
212
+ )
205
213
 
206
214
  # 3. Filter to lifecycle workflows only
207
215
  lifecycle_workflows = [w for w in discovered.values() if w.definition.type == "lifecycle"]
@@ -225,6 +233,7 @@ class WorkflowLoader:
225
233
  directory: Path,
226
234
  is_project: bool,
227
235
  discovered: dict[str, DiscoveredWorkflow],
236
+ failed: dict[str, str] | None = None,
228
237
  ) -> None:
229
238
  """
230
239
  Scan a directory for workflow YAML files and add to discovered dict.
@@ -233,6 +242,7 @@ class WorkflowLoader:
233
242
  directory: Directory to scan
234
243
  is_project: Whether this is a project directory (for shadowing)
235
244
  discovered: Dict to update (name -> DiscoveredWorkflow)
245
+ failed: Optional dict to track failed workflows (name -> error message)
236
246
  """
237
247
  if not directory.exists():
238
248
  return
@@ -258,6 +268,8 @@ class WorkflowLoader:
258
268
  data = self._merge_workflows(parent.model_dump(), data)
259
269
  except ValueError as e:
260
270
  logger.warning(f"Skipping workflow {name}: {e}")
271
+ if failed is not None:
272
+ failed[name] = str(e)
261
273
  continue
262
274
 
263
275
  definition = WorkflowDefinition(**data)
@@ -267,6 +279,10 @@ class WorkflowLoader:
267
279
  if definition.settings and "priority" in definition.settings:
268
280
  priority = definition.settings["priority"]
269
281
 
282
+ # Log successful shadowing when project workflow overrides global
283
+ if name in discovered and is_project and not discovered[name].is_project:
284
+ logger.info(f"Project workflow '{name}' shadows global workflow")
285
+
270
286
  # Project workflows shadow global (overwrite in dict)
271
287
  # Global is scanned first, so project overwrites
272
288
  discovered[name] = DiscoveredWorkflow(
@@ -279,6 +295,8 @@ class WorkflowLoader:
279
295
 
280
296
  except Exception as e:
281
297
  logger.warning(f"Failed to load workflow from {yaml_path}: {e}")
298
+ if failed is not None:
299
+ failed[name] = str(e)
282
300
 
283
301
  def clear_cache(self) -> None:
284
302
  """
@@ -288,11 +306,6 @@ class WorkflowLoader:
288
306
  self._cache.clear()
289
307
  self._discovery_cache.clear()
290
308
 
291
- def clear_discovery_cache(self) -> None:
292
- """Clear the discovery cache. Call when workflows may have changed."""
293
- # Deprecated: use clear_cache instead to clear everything
294
- self.clear_cache()
295
-
296
309
  def validate_workflow_for_agent(
297
310
  self,
298
311
  workflow_name: str,
@@ -270,3 +270,77 @@ def reset_memory_injection_tracking(state: Any | None = None) -> dict[str, Any]:
270
270
  logger.info(f"reset_memory_injection_tracking: Cleared {cleared_count} injected memory IDs")
271
271
 
272
272
  return {"success": True, "cleared": cleared_count}
273
+
274
+
275
+ async def memory_extract(
276
+ session_manager: Any,
277
+ session_id: str,
278
+ llm_service: Any,
279
+ memory_manager: Any,
280
+ transcript_processor: Any | None = None,
281
+ min_importance: float = 0.7,
282
+ max_memories: int = 5,
283
+ dry_run: bool = False,
284
+ ) -> dict[str, Any] | None:
285
+ """Extract memories from a session transcript.
286
+
287
+ Uses LLM analysis to identify high-value, reusable knowledge from
288
+ session transcripts and stores them as memories.
289
+
290
+ Args:
291
+ session_manager: The session manager instance
292
+ session_id: Current session ID
293
+ llm_service: LLM service for analysis
294
+ memory_manager: Memory manager for storage
295
+ transcript_processor: Optional transcript processor
296
+ min_importance: Minimum importance threshold (0.0-1.0)
297
+ max_memories: Maximum memories to extract
298
+ dry_run: If True, don't store memories
299
+
300
+ Returns:
301
+ Dict with extracted_count and memory details, or error
302
+ """
303
+ if not memory_manager:
304
+ return {"error": "Memory Manager not available"}
305
+
306
+ if not memory_manager.config.enabled:
307
+ logger.debug("memory_extract: Memory system disabled")
308
+ return None
309
+
310
+ if not llm_service:
311
+ return {"error": "LLM service not available"}
312
+
313
+ try:
314
+ from gobby.memory.extractor import SessionMemoryExtractor
315
+
316
+ extractor = SessionMemoryExtractor(
317
+ memory_manager=memory_manager,
318
+ session_manager=session_manager,
319
+ llm_service=llm_service,
320
+ transcript_processor=transcript_processor,
321
+ )
322
+
323
+ candidates = await extractor.extract(
324
+ session_id=session_id,
325
+ min_importance=min_importance,
326
+ max_memories=max_memories,
327
+ dry_run=dry_run,
328
+ )
329
+
330
+ if not candidates:
331
+ logger.debug(f"memory_extract: No memories extracted from session {session_id}")
332
+ return {"extracted_count": 0, "memories": []}
333
+
334
+ logger.info(
335
+ f"memory_extract: Extracted {len(candidates)} memories from session {session_id}"
336
+ )
337
+
338
+ return {
339
+ "extracted_count": len(candidates),
340
+ "memories": [c.to_dict() for c in candidates],
341
+ "dry_run": dry_run,
342
+ }
343
+
344
+ except Exception as e:
345
+ logger.error(f"memory_extract: Failed: {e}", exc_info=True)
346
+ return {"error": str(e)}