gobby 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (244) hide show
  1. gobby/__init__.py +1 -1
  2. gobby/adapters/__init__.py +2 -1
  3. gobby/adapters/claude_code.py +13 -4
  4. gobby/adapters/codex_impl/__init__.py +28 -0
  5. gobby/adapters/codex_impl/adapter.py +722 -0
  6. gobby/adapters/codex_impl/client.py +679 -0
  7. gobby/adapters/codex_impl/protocol.py +20 -0
  8. gobby/adapters/codex_impl/types.py +68 -0
  9. gobby/agents/definitions.py +11 -1
  10. gobby/agents/isolation.py +395 -0
  11. gobby/agents/runner.py +8 -0
  12. gobby/agents/sandbox.py +261 -0
  13. gobby/agents/spawn.py +42 -287
  14. gobby/agents/spawn_executor.py +385 -0
  15. gobby/agents/spawners/__init__.py +24 -0
  16. gobby/agents/spawners/command_builder.py +189 -0
  17. gobby/agents/spawners/embedded.py +21 -2
  18. gobby/agents/spawners/headless.py +21 -2
  19. gobby/agents/spawners/prompt_manager.py +125 -0
  20. gobby/cli/__init__.py +6 -0
  21. gobby/cli/clones.py +419 -0
  22. gobby/cli/conductor.py +266 -0
  23. gobby/cli/install.py +4 -4
  24. gobby/cli/installers/antigravity.py +3 -9
  25. gobby/cli/installers/claude.py +15 -9
  26. gobby/cli/installers/codex.py +2 -8
  27. gobby/cli/installers/gemini.py +8 -8
  28. gobby/cli/installers/shared.py +175 -13
  29. gobby/cli/sessions.py +1 -1
  30. gobby/cli/skills.py +858 -0
  31. gobby/cli/tasks/ai.py +0 -440
  32. gobby/cli/tasks/crud.py +44 -6
  33. gobby/cli/tasks/main.py +0 -4
  34. gobby/cli/tui.py +2 -2
  35. gobby/cli/utils.py +12 -5
  36. gobby/clones/__init__.py +13 -0
  37. gobby/clones/git.py +547 -0
  38. gobby/conductor/__init__.py +16 -0
  39. gobby/conductor/alerts.py +135 -0
  40. gobby/conductor/loop.py +164 -0
  41. gobby/conductor/monitors/__init__.py +11 -0
  42. gobby/conductor/monitors/agents.py +116 -0
  43. gobby/conductor/monitors/tasks.py +155 -0
  44. gobby/conductor/pricing.py +234 -0
  45. gobby/conductor/token_tracker.py +160 -0
  46. gobby/config/__init__.py +12 -97
  47. gobby/config/app.py +69 -91
  48. gobby/config/extensions.py +2 -2
  49. gobby/config/features.py +7 -130
  50. gobby/config/search.py +110 -0
  51. gobby/config/servers.py +1 -1
  52. gobby/config/skills.py +43 -0
  53. gobby/config/tasks.py +9 -41
  54. gobby/hooks/__init__.py +0 -13
  55. gobby/hooks/event_handlers.py +188 -2
  56. gobby/hooks/hook_manager.py +50 -4
  57. gobby/hooks/plugins.py +1 -1
  58. gobby/hooks/skill_manager.py +130 -0
  59. gobby/hooks/webhooks.py +1 -1
  60. gobby/install/claude/hooks/hook_dispatcher.py +4 -4
  61. gobby/install/codex/hooks/hook_dispatcher.py +1 -1
  62. gobby/install/gemini/hooks/hook_dispatcher.py +87 -12
  63. gobby/llm/claude.py +22 -34
  64. gobby/llm/claude_executor.py +46 -256
  65. gobby/llm/codex_executor.py +59 -291
  66. gobby/llm/executor.py +21 -0
  67. gobby/llm/gemini.py +134 -110
  68. gobby/llm/litellm_executor.py +143 -6
  69. gobby/llm/resolver.py +98 -35
  70. gobby/mcp_proxy/importer.py +62 -4
  71. gobby/mcp_proxy/instructions.py +56 -0
  72. gobby/mcp_proxy/models.py +15 -0
  73. gobby/mcp_proxy/registries.py +68 -8
  74. gobby/mcp_proxy/server.py +33 -3
  75. gobby/mcp_proxy/services/recommendation.py +43 -11
  76. gobby/mcp_proxy/services/tool_proxy.py +81 -1
  77. gobby/mcp_proxy/stdio.py +2 -1
  78. gobby/mcp_proxy/tools/__init__.py +0 -2
  79. gobby/mcp_proxy/tools/agent_messaging.py +317 -0
  80. gobby/mcp_proxy/tools/agents.py +31 -731
  81. gobby/mcp_proxy/tools/clones.py +518 -0
  82. gobby/mcp_proxy/tools/memory.py +3 -26
  83. gobby/mcp_proxy/tools/metrics.py +65 -1
  84. gobby/mcp_proxy/tools/orchestration/__init__.py +3 -0
  85. gobby/mcp_proxy/tools/orchestration/cleanup.py +151 -0
  86. gobby/mcp_proxy/tools/orchestration/wait.py +467 -0
  87. gobby/mcp_proxy/tools/sessions/__init__.py +14 -0
  88. gobby/mcp_proxy/tools/sessions/_commits.py +232 -0
  89. gobby/mcp_proxy/tools/sessions/_crud.py +253 -0
  90. gobby/mcp_proxy/tools/sessions/_factory.py +63 -0
  91. gobby/mcp_proxy/tools/sessions/_handoff.py +499 -0
  92. gobby/mcp_proxy/tools/sessions/_messages.py +138 -0
  93. gobby/mcp_proxy/tools/skills/__init__.py +616 -0
  94. gobby/mcp_proxy/tools/spawn_agent.py +417 -0
  95. gobby/mcp_proxy/tools/task_orchestration.py +7 -0
  96. gobby/mcp_proxy/tools/task_readiness.py +14 -0
  97. gobby/mcp_proxy/tools/task_sync.py +1 -1
  98. gobby/mcp_proxy/tools/tasks/_context.py +0 -20
  99. gobby/mcp_proxy/tools/tasks/_crud.py +91 -4
  100. gobby/mcp_proxy/tools/tasks/_expansion.py +348 -0
  101. gobby/mcp_proxy/tools/tasks/_factory.py +6 -16
  102. gobby/mcp_proxy/tools/tasks/_lifecycle.py +110 -45
  103. gobby/mcp_proxy/tools/tasks/_lifecycle_validation.py +18 -29
  104. gobby/mcp_proxy/tools/workflows.py +1 -1
  105. gobby/mcp_proxy/tools/worktrees.py +0 -338
  106. gobby/memory/backends/__init__.py +6 -1
  107. gobby/memory/backends/mem0.py +6 -1
  108. gobby/memory/extractor.py +477 -0
  109. gobby/memory/ingestion/__init__.py +5 -0
  110. gobby/memory/ingestion/multimodal.py +221 -0
  111. gobby/memory/manager.py +73 -285
  112. gobby/memory/search/__init__.py +10 -0
  113. gobby/memory/search/coordinator.py +248 -0
  114. gobby/memory/services/__init__.py +5 -0
  115. gobby/memory/services/crossref.py +142 -0
  116. gobby/prompts/loader.py +5 -2
  117. gobby/runner.py +37 -16
  118. gobby/search/__init__.py +48 -6
  119. gobby/search/backends/__init__.py +159 -0
  120. gobby/search/backends/embedding.py +225 -0
  121. gobby/search/embeddings.py +238 -0
  122. gobby/search/models.py +148 -0
  123. gobby/search/unified.py +496 -0
  124. gobby/servers/http.py +24 -12
  125. gobby/servers/routes/admin.py +294 -0
  126. gobby/servers/routes/mcp/endpoints/__init__.py +61 -0
  127. gobby/servers/routes/mcp/endpoints/discovery.py +405 -0
  128. gobby/servers/routes/mcp/endpoints/execution.py +568 -0
  129. gobby/servers/routes/mcp/endpoints/registry.py +378 -0
  130. gobby/servers/routes/mcp/endpoints/server.py +304 -0
  131. gobby/servers/routes/mcp/hooks.py +1 -1
  132. gobby/servers/routes/mcp/tools.py +48 -1317
  133. gobby/servers/websocket.py +2 -2
  134. gobby/sessions/analyzer.py +2 -0
  135. gobby/sessions/lifecycle.py +1 -1
  136. gobby/sessions/processor.py +10 -0
  137. gobby/sessions/transcripts/base.py +2 -0
  138. gobby/sessions/transcripts/claude.py +79 -10
  139. gobby/skills/__init__.py +91 -0
  140. gobby/skills/loader.py +685 -0
  141. gobby/skills/manager.py +384 -0
  142. gobby/skills/parser.py +286 -0
  143. gobby/skills/search.py +463 -0
  144. gobby/skills/sync.py +119 -0
  145. gobby/skills/updater.py +385 -0
  146. gobby/skills/validator.py +368 -0
  147. gobby/storage/clones.py +378 -0
  148. gobby/storage/database.py +1 -1
  149. gobby/storage/memories.py +43 -13
  150. gobby/storage/migrations.py +162 -201
  151. gobby/storage/sessions.py +116 -7
  152. gobby/storage/skills.py +782 -0
  153. gobby/storage/tasks/_crud.py +4 -4
  154. gobby/storage/tasks/_lifecycle.py +57 -7
  155. gobby/storage/tasks/_manager.py +14 -5
  156. gobby/storage/tasks/_models.py +8 -3
  157. gobby/sync/memories.py +40 -5
  158. gobby/sync/tasks.py +83 -6
  159. gobby/tasks/__init__.py +1 -2
  160. gobby/tasks/external_validator.py +1 -1
  161. gobby/tasks/validation.py +46 -35
  162. gobby/tools/summarizer.py +91 -10
  163. gobby/tui/api_client.py +4 -7
  164. gobby/tui/app.py +5 -3
  165. gobby/tui/screens/orchestrator.py +1 -2
  166. gobby/tui/screens/tasks.py +2 -4
  167. gobby/tui/ws_client.py +1 -1
  168. gobby/utils/daemon_client.py +2 -2
  169. gobby/utils/project_context.py +2 -3
  170. gobby/utils/status.py +13 -0
  171. gobby/workflows/actions.py +221 -1135
  172. gobby/workflows/artifact_actions.py +31 -0
  173. gobby/workflows/autonomous_actions.py +11 -0
  174. gobby/workflows/context_actions.py +93 -1
  175. gobby/workflows/detection_helpers.py +115 -31
  176. gobby/workflows/enforcement/__init__.py +47 -0
  177. gobby/workflows/enforcement/blocking.py +269 -0
  178. gobby/workflows/enforcement/commit_policy.py +283 -0
  179. gobby/workflows/enforcement/handlers.py +269 -0
  180. gobby/workflows/{task_enforcement_actions.py → enforcement/task_policy.py} +29 -388
  181. gobby/workflows/engine.py +13 -2
  182. gobby/workflows/git_utils.py +106 -0
  183. gobby/workflows/lifecycle_evaluator.py +29 -1
  184. gobby/workflows/llm_actions.py +30 -0
  185. gobby/workflows/loader.py +19 -6
  186. gobby/workflows/mcp_actions.py +20 -1
  187. gobby/workflows/memory_actions.py +154 -0
  188. gobby/workflows/safe_evaluator.py +183 -0
  189. gobby/workflows/session_actions.py +44 -0
  190. gobby/workflows/state_actions.py +60 -1
  191. gobby/workflows/stop_signal_actions.py +55 -0
  192. gobby/workflows/summary_actions.py +111 -1
  193. gobby/workflows/task_sync_actions.py +347 -0
  194. gobby/workflows/todo_actions.py +34 -1
  195. gobby/workflows/webhook_actions.py +185 -0
  196. {gobby-0.2.5.dist-info → gobby-0.2.7.dist-info}/METADATA +87 -21
  197. {gobby-0.2.5.dist-info → gobby-0.2.7.dist-info}/RECORD +201 -172
  198. {gobby-0.2.5.dist-info → gobby-0.2.7.dist-info}/WHEEL +1 -1
  199. gobby/adapters/codex.py +0 -1292
  200. gobby/install/claude/commands/gobby/bug.md +0 -51
  201. gobby/install/claude/commands/gobby/chore.md +0 -51
  202. gobby/install/claude/commands/gobby/epic.md +0 -52
  203. gobby/install/claude/commands/gobby/eval.md +0 -235
  204. gobby/install/claude/commands/gobby/feat.md +0 -49
  205. gobby/install/claude/commands/gobby/nit.md +0 -52
  206. gobby/install/claude/commands/gobby/ref.md +0 -52
  207. gobby/install/codex/prompts/forget.md +0 -7
  208. gobby/install/codex/prompts/memories.md +0 -7
  209. gobby/install/codex/prompts/recall.md +0 -7
  210. gobby/install/codex/prompts/remember.md +0 -13
  211. gobby/llm/gemini_executor.py +0 -339
  212. gobby/mcp_proxy/tools/session_messages.py +0 -1056
  213. gobby/mcp_proxy/tools/task_expansion.py +0 -591
  214. gobby/prompts/defaults/expansion/system.md +0 -119
  215. gobby/prompts/defaults/expansion/user.md +0 -48
  216. gobby/prompts/defaults/external_validation/agent.md +0 -72
  217. gobby/prompts/defaults/external_validation/external.md +0 -63
  218. gobby/prompts/defaults/external_validation/spawn.md +0 -83
  219. gobby/prompts/defaults/external_validation/system.md +0 -6
  220. gobby/prompts/defaults/features/import_mcp.md +0 -22
  221. gobby/prompts/defaults/features/import_mcp_github.md +0 -17
  222. gobby/prompts/defaults/features/import_mcp_search.md +0 -16
  223. gobby/prompts/defaults/features/recommend_tools.md +0 -32
  224. gobby/prompts/defaults/features/recommend_tools_hybrid.md +0 -35
  225. gobby/prompts/defaults/features/recommend_tools_llm.md +0 -30
  226. gobby/prompts/defaults/features/server_description.md +0 -20
  227. gobby/prompts/defaults/features/server_description_system.md +0 -6
  228. gobby/prompts/defaults/features/task_description.md +0 -31
  229. gobby/prompts/defaults/features/task_description_system.md +0 -6
  230. gobby/prompts/defaults/features/tool_summary.md +0 -17
  231. gobby/prompts/defaults/features/tool_summary_system.md +0 -6
  232. gobby/prompts/defaults/research/step.md +0 -58
  233. gobby/prompts/defaults/validation/criteria.md +0 -47
  234. gobby/prompts/defaults/validation/validate.md +0 -38
  235. gobby/storage/migrations_legacy.py +0 -1359
  236. gobby/tasks/context.py +0 -747
  237. gobby/tasks/criteria.py +0 -342
  238. gobby/tasks/expansion.py +0 -626
  239. gobby/tasks/prompts/expand.py +0 -327
  240. gobby/tasks/research.py +0 -421
  241. gobby/tasks/tdd.py +0 -352
  242. {gobby-0.2.5.dist-info → gobby-0.2.7.dist-info}/entry_points.txt +0 -0
  243. {gobby-0.2.5.dist-info → gobby-0.2.7.dist-info}/licenses/LICENSE.md +0 -0
  244. {gobby-0.2.5.dist-info → gobby-0.2.7.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,477 @@
1
+ """
2
+ Session memory extractor.
3
+
4
+ Automatically extracts meaningful, reusable memories from session transcripts.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import logging
11
+ from dataclasses import dataclass
12
+ from pathlib import Path
13
+ from typing import TYPE_CHECKING, Any
14
+
15
+ from gobby.prompts.loader import PromptLoader
16
+ from gobby.workflows.summary_actions import format_turns_for_llm
17
+
18
+ if TYPE_CHECKING:
19
+ from gobby.llm.service import LLMService
20
+ from gobby.memory.manager import MemoryManager
21
+ from gobby.storage.sessions import LocalSessionManager
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+ # Prompt path in the prompts collection
26
+ EXTRACT_PROMPT_PATH = "memory/extract"
27
+
28
+
29
+ @dataclass
30
+ class MemoryCandidate:
31
+ """A candidate memory extracted from a session."""
32
+
33
+ content: str
34
+ memory_type: str # fact, pattern, preference, context
35
+ importance: float
36
+ tags: list[str]
37
+
38
+ def to_dict(self) -> dict[str, Any]:
39
+ """Convert to dictionary."""
40
+ return {
41
+ "content": self.content,
42
+ "memory_type": self.memory_type,
43
+ "importance": self.importance,
44
+ "tags": self.tags,
45
+ }
46
+
47
+
48
+ @dataclass
49
+ class SessionContext:
50
+ """Context extracted from a session for memory extraction."""
51
+
52
+ session_id: str
53
+ project_id: str | None
54
+ project_name: str
55
+ task_refs: str
56
+ files_modified: str
57
+ tool_summary: str
58
+ transcript_summary: str
59
+
60
+
61
+ class SessionMemoryExtractor:
62
+ """Extract meaningful memories from session transcripts.
63
+
64
+ Uses LLM analysis to identify high-value, reusable knowledge from
65
+ session transcripts and stores them as memories.
66
+ """
67
+
68
+ def __init__(
69
+ self,
70
+ memory_manager: MemoryManager,
71
+ session_manager: LocalSessionManager,
72
+ llm_service: LLMService,
73
+ prompt_loader: PromptLoader | None = None,
74
+ transcript_processor: Any | None = None,
75
+ ):
76
+ """Initialize the extractor.
77
+
78
+ Args:
79
+ memory_manager: Manager for storing extracted memories
80
+ session_manager: Manager for session data
81
+ llm_service: LLM service for analysis
82
+ prompt_loader: Optional custom prompt loader
83
+ transcript_processor: Optional transcript processor for parsing
84
+ """
85
+ self.memory_manager = memory_manager
86
+ self.session_manager = session_manager
87
+ self.llm_service = llm_service
88
+ self.prompt_loader = prompt_loader or PromptLoader()
89
+ self.transcript_processor = transcript_processor
90
+
91
+ async def extract(
92
+ self,
93
+ session_id: str,
94
+ min_importance: float = 0.7,
95
+ max_memories: int = 5,
96
+ dry_run: bool = False,
97
+ ) -> list[MemoryCandidate]:
98
+ """Extract memories from a session.
99
+
100
+ Args:
101
+ session_id: The session to extract memories from
102
+ min_importance: Minimum importance threshold (0.0-1.0)
103
+ max_memories: Maximum number of memories to extract
104
+ dry_run: If True, don't store memories, just return candidates
105
+
106
+ Returns:
107
+ List of extracted memory candidates
108
+ """
109
+ # 1. Get session context
110
+ context = await self._get_session_context(session_id)
111
+ if not context:
112
+ logger.warning(f"Could not get context for session {session_id}")
113
+ return []
114
+
115
+ # 2. Load and render prompt
116
+ prompt = self._render_prompt(
117
+ context=context,
118
+ min_importance=min_importance,
119
+ max_memories=max_memories,
120
+ )
121
+
122
+ # 3. LLM analysis
123
+ candidates = await self._analyze_with_llm(prompt)
124
+ if not candidates:
125
+ logger.debug(f"No memory candidates extracted from session {session_id}")
126
+ return []
127
+
128
+ # 4. Quality filter + deduplicate
129
+ filtered = await self._filter_and_dedupe(
130
+ candidates=candidates,
131
+ min_importance=min_importance,
132
+ project_id=context.project_id,
133
+ )
134
+
135
+ # 5. Store (unless dry_run)
136
+ if not dry_run and filtered:
137
+ await self._store_memories(
138
+ candidates=filtered,
139
+ session_id=session_id,
140
+ project_id=context.project_id,
141
+ )
142
+
143
+ return filtered
144
+
145
+ async def _get_session_context(self, session_id: str) -> SessionContext | None:
146
+ """Get context from the session for memory extraction.
147
+
148
+ Args:
149
+ session_id: The session ID
150
+
151
+ Returns:
152
+ SessionContext with extracted information, or None if not available
153
+ """
154
+ session = self.session_manager.get(session_id)
155
+ if not session:
156
+ return None
157
+
158
+ # Get project info
159
+ project_id = session.project_id
160
+ project_name = "Unknown Project"
161
+
162
+ if project_id:
163
+ # Try to get project name from project manager
164
+ try:
165
+ from gobby.storage.projects import LocalProjectManager
166
+
167
+ project_mgr = LocalProjectManager(self.memory_manager.db)
168
+ project = project_mgr.get(project_id)
169
+ if project and project.name:
170
+ project_name = project.name
171
+ except Exception as e:
172
+ logger.debug(f"Could not get project name: {e}")
173
+
174
+ # Get transcript content
175
+ transcript_path = getattr(session, "jsonl_path", None)
176
+ transcript_summary = ""
177
+ task_refs = ""
178
+ files_modified = ""
179
+ tool_summary_parts: list[str] = []
180
+
181
+ if transcript_path and Path(transcript_path).exists():
182
+ turns = self._load_transcript(transcript_path)
183
+
184
+ # Extract turns since last clear (or all if no clear)
185
+ if self.transcript_processor:
186
+ recent_turns = self.transcript_processor.extract_turns_since_clear(
187
+ turns, max_turns=50
188
+ )
189
+ else:
190
+ recent_turns = turns[-50:] if len(turns) > 50 else turns
191
+
192
+ # Format for LLM
193
+ transcript_summary = format_turns_for_llm(recent_turns)
194
+
195
+ # Extract file modifications and tool usage from turns
196
+ files_set: set[str] = set()
197
+ task_set: set[str] = set()
198
+
199
+ for turn in recent_turns:
200
+ message = turn.get("message", {})
201
+ content = message.get("content", [])
202
+
203
+ if isinstance(content, list):
204
+ for block in content:
205
+ if isinstance(block, dict) and block.get("type") == "tool_use":
206
+ tool_name = block.get("name", "")
207
+ tool_input = block.get("input", {})
208
+
209
+ # Track file modifications
210
+ if tool_name in ("Edit", "Write", "NotebookEdit"):
211
+ file_path = tool_input.get("file_path", "")
212
+ if file_path:
213
+ files_set.add(file_path)
214
+
215
+ # Track task references
216
+ if tool_name in ("update_task", "create_task", "close_task"):
217
+ task_id = tool_input.get("task_id", "")
218
+ if task_id:
219
+ task_set.add(task_id)
220
+
221
+ # Track key tool actions
222
+ if tool_name in ("Edit", "Write", "Bash", "Grep", "Glob"):
223
+ tool_summary_parts.append(tool_name)
224
+
225
+ files_modified = ", ".join(sorted(files_set)) if files_set else "None"
226
+ task_refs = ", ".join(sorted(task_set)) if task_set else "None"
227
+
228
+ # Create tool summary (count of each tool type)
229
+ tool_counts: dict[str, int] = {}
230
+ for tool in tool_summary_parts:
231
+ tool_counts[tool] = tool_counts.get(tool, 0) + 1
232
+ tool_summary = ", ".join(f"{k}({v})" for k, v in sorted(tool_counts.items()))
233
+
234
+ return SessionContext(
235
+ session_id=session_id,
236
+ project_id=project_id,
237
+ project_name=project_name,
238
+ task_refs=task_refs,
239
+ files_modified=files_modified,
240
+ tool_summary=tool_summary or "None",
241
+ transcript_summary=transcript_summary,
242
+ )
243
+
244
+ def _load_transcript(self, transcript_path: str) -> list[dict[str, Any]]:
245
+ """Load transcript turns from JSONL file.
246
+
247
+ Args:
248
+ transcript_path: Path to the transcript file
249
+
250
+ Returns:
251
+ List of turn dictionaries
252
+ """
253
+ turns: list[dict[str, Any]] = []
254
+ try:
255
+ with open(transcript_path, encoding="utf-8") as f:
256
+ for line in f:
257
+ if line.strip():
258
+ turns.append(json.loads(line))
259
+ except Exception as e:
260
+ logger.warning(f"Failed to load transcript: {e}")
261
+ return turns
262
+
263
+ def _render_prompt(
264
+ self,
265
+ context: SessionContext,
266
+ min_importance: float,
267
+ max_memories: int,
268
+ ) -> str:
269
+ """Render the extraction prompt with context.
270
+
271
+ Args:
272
+ context: Session context
273
+ min_importance: Minimum importance threshold
274
+ max_memories: Maximum memories to extract
275
+
276
+ Returns:
277
+ Rendered prompt string
278
+ """
279
+ return self.prompt_loader.render(
280
+ EXTRACT_PROMPT_PATH,
281
+ {
282
+ "project_name": context.project_name,
283
+ "task_refs": context.task_refs,
284
+ "files": context.files_modified,
285
+ "tool_summary": context.tool_summary,
286
+ "transcript_summary": context.transcript_summary,
287
+ "min_importance": min_importance,
288
+ "max_memories": max_memories,
289
+ },
290
+ )
291
+
292
+ async def _analyze_with_llm(self, prompt: str) -> list[MemoryCandidate]:
293
+ """Call LLM to analyze transcript and extract memories.
294
+
295
+ Args:
296
+ prompt: Rendered prompt for the LLM
297
+
298
+ Returns:
299
+ List of memory candidates extracted from LLM response
300
+ """
301
+ try:
302
+ provider = self.llm_service.get_default_provider()
303
+ response = await provider.generate_text(prompt)
304
+
305
+ # Parse JSON from response
306
+ candidates = self._parse_llm_response(response)
307
+ return candidates
308
+
309
+ except Exception as e:
310
+ logger.error(f"LLM analysis failed: {e}")
311
+ return []
312
+
313
+ def _parse_llm_response(self, response: str) -> list[MemoryCandidate]:
314
+ """Parse LLM response to extract memory candidates.
315
+
316
+ Args:
317
+ response: Raw LLM response text
318
+
319
+ Returns:
320
+ List of memory candidates
321
+ """
322
+ candidates: list[MemoryCandidate] = []
323
+
324
+ # Try to find JSON array in response
325
+ try:
326
+ # Look for JSON array markers
327
+ start_idx = response.find("[")
328
+ end_idx = response.rfind("]")
329
+
330
+ if start_idx == -1 or end_idx == -1:
331
+ logger.warning("No JSON array found in LLM response")
332
+ return []
333
+
334
+ json_str = response[start_idx : end_idx + 1]
335
+ data = json.loads(json_str)
336
+
337
+ if not isinstance(data, list):
338
+ logger.warning("LLM response is not a list")
339
+ return []
340
+
341
+ for item in data:
342
+ if not isinstance(item, dict):
343
+ continue
344
+
345
+ content = item.get("content", "").strip()
346
+ if not content:
347
+ continue
348
+
349
+ memory_type = item.get("memory_type", "fact")
350
+ if memory_type not in ("fact", "pattern", "preference", "context"):
351
+ memory_type = "fact"
352
+
353
+ raw_importance = item.get("importance", 0.7)
354
+ try:
355
+ importance = float(raw_importance)
356
+ except (ValueError, TypeError) as e:
357
+ logger.warning(
358
+ f"Invalid importance value '{raw_importance}' in memory item "
359
+ f"(content: {content[:50]}...): {e}. Using default 0.7"
360
+ )
361
+ importance = 0.7
362
+ importance = max(0.0, min(1.0, importance))
363
+
364
+ tags = item.get("tags", [])
365
+ if not isinstance(tags, list):
366
+ tags = []
367
+ tags = [str(t) for t in tags]
368
+
369
+ candidates.append(
370
+ MemoryCandidate(
371
+ content=content,
372
+ memory_type=memory_type,
373
+ importance=importance,
374
+ tags=tags,
375
+ )
376
+ )
377
+
378
+ except json.JSONDecodeError as e:
379
+ logger.warning(f"Failed to parse LLM response as JSON: {e}")
380
+ except Exception as e:
381
+ logger.warning(f"Error parsing LLM response: {e}")
382
+
383
+ return candidates
384
+
385
+ async def _filter_and_dedupe(
386
+ self,
387
+ candidates: list[MemoryCandidate],
388
+ min_importance: float,
389
+ project_id: str | None,
390
+ ) -> list[MemoryCandidate]:
391
+ """Filter candidates by importance and deduplicate against existing memories.
392
+
393
+ Args:
394
+ candidates: Raw candidates from LLM
395
+ min_importance: Minimum importance threshold
396
+ project_id: Project ID for deduplication
397
+
398
+ Returns:
399
+ Filtered and deduplicated candidates
400
+ """
401
+ filtered: list[MemoryCandidate] = []
402
+
403
+ for candidate in candidates:
404
+ # Skip low importance
405
+ if candidate.importance < min_importance:
406
+ continue
407
+
408
+ # Check for duplicates in existing memories
409
+ if self.memory_manager.content_exists(candidate.content, project_id):
410
+ logger.debug(f"Skipping duplicate memory: {candidate.content[:50]}...")
411
+ continue
412
+
413
+ # Check for near-duplicates in this batch
414
+ is_duplicate = False
415
+ for existing in filtered:
416
+ if self._is_similar(candidate.content, existing.content):
417
+ is_duplicate = True
418
+ break
419
+
420
+ if not is_duplicate:
421
+ filtered.append(candidate)
422
+
423
+ return filtered
424
+
425
+ def _is_similar(self, content1: str, content2: str, threshold: float = 0.8) -> bool:
426
+ """Check if two content strings are similar enough to be considered duplicates.
427
+
428
+ Uses a simple word overlap heuristic.
429
+
430
+ Args:
431
+ content1: First content string
432
+ content2: Second content string
433
+ threshold: Similarity threshold (0.0-1.0)
434
+
435
+ Returns:
436
+ True if contents are similar
437
+ """
438
+ words1 = set(content1.lower().split())
439
+ words2 = set(content2.lower().split())
440
+
441
+ if not words1 or not words2:
442
+ return False
443
+
444
+ # Jaccard similarity
445
+ intersection = len(words1 & words2)
446
+ union = len(words1 | words2)
447
+
448
+ similarity = intersection / union if union > 0 else 0
449
+ return similarity >= threshold
450
+
451
+ async def _store_memories(
452
+ self,
453
+ candidates: list[MemoryCandidate],
454
+ session_id: str,
455
+ project_id: str | None,
456
+ ) -> None:
457
+ """Store extracted memories.
458
+
459
+ Args:
460
+ candidates: Memory candidates to store
461
+ session_id: Source session ID
462
+ project_id: Project ID for the memories
463
+ """
464
+ for candidate in candidates:
465
+ try:
466
+ await self.memory_manager.remember(
467
+ content=candidate.content,
468
+ memory_type=candidate.memory_type,
469
+ importance=candidate.importance,
470
+ project_id=project_id,
471
+ source_type="session",
472
+ source_session_id=session_id,
473
+ tags=candidate.tags,
474
+ )
475
+ logger.debug(f"Stored memory: {candidate.content[:50]}...")
476
+ except Exception as e:
477
+ logger.warning(f"Failed to store memory: {e}")
@@ -0,0 +1,5 @@
1
+ """Memory ingestion components for multimodal content."""
2
+
3
+ from gobby.memory.ingestion.multimodal import MultimodalIngestor
4
+
5
+ __all__ = ["MultimodalIngestor"]
@@ -0,0 +1,221 @@
1
+ """Multimodal content ingestion for memory system."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import mimetypes
7
+ from pathlib import Path
8
+ from typing import TYPE_CHECKING
9
+
10
+ from gobby.memory.protocol import MediaAttachment
11
+ from gobby.storage.memories import Memory
12
+
13
+ if TYPE_CHECKING:
14
+ from gobby.llm.service import LLMService
15
+ from gobby.memory.protocol import MemoryBackendProtocol
16
+ from gobby.storage.memories import LocalMemoryManager
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ class MultimodalIngestor:
22
+ """
23
+ Handles ingestion of multimodal content (images, screenshots) into memory.
24
+
25
+ Extracts image handling from MemoryManager to provide focused
26
+ multimodal processing capabilities.
27
+ """
28
+
29
+ def __init__(
30
+ self,
31
+ storage: LocalMemoryManager,
32
+ backend: MemoryBackendProtocol,
33
+ llm_service: LLMService | None = None,
34
+ ):
35
+ """
36
+ Initialize the multimodal ingestor.
37
+
38
+ Args:
39
+ storage: Memory storage manager for persistence
40
+ backend: Memory backend protocol for creating records
41
+ llm_service: LLM service for image description
42
+ """
43
+ self._storage = storage
44
+ self._backend = backend
45
+ self._llm_service = llm_service
46
+
47
+ @property
48
+ def llm_service(self) -> LLMService | None:
49
+ """Get the LLM service for image description."""
50
+ return self._llm_service
51
+
52
+ @llm_service.setter
53
+ def llm_service(self, service: LLMService | None) -> None:
54
+ """Set the LLM service for image description."""
55
+ self._llm_service = service
56
+
57
+ async def remember_with_image(
58
+ self,
59
+ image_path: str,
60
+ context: str | None = None,
61
+ memory_type: str = "fact",
62
+ importance: float = 0.5,
63
+ project_id: str | None = None,
64
+ source_type: str = "user",
65
+ source_session_id: str | None = None,
66
+ tags: list[str] | None = None,
67
+ ) -> Memory:
68
+ """
69
+ Store a memory with an image attachment.
70
+
71
+ Uses the configured LLM provider to generate a description of the image,
72
+ then stores the memory with the description as content and the image
73
+ as a media attachment.
74
+
75
+ Args:
76
+ image_path: Path to the image file
77
+ context: Optional context to guide the image description
78
+ memory_type: Type of memory (fact, preference, etc)
79
+ importance: 0.0-1.0 importance score
80
+ project_id: Optional project context
81
+ source_type: Origin of memory
82
+ source_session_id: Origin session
83
+ tags: Optional tags
84
+
85
+ Returns:
86
+ The created Memory object
87
+
88
+ Raises:
89
+ ValueError: If LLM service is not configured or image not found
90
+ """
91
+ path = Path(image_path)
92
+ if not path.exists():
93
+ raise ValueError(f"Image not found: {image_path}")
94
+
95
+ # Get LLM provider for image description
96
+ if not self._llm_service:
97
+ raise ValueError(
98
+ "LLM service not configured. Pass llm_service to MemoryManager "
99
+ "to enable remember_with_image."
100
+ )
101
+
102
+ provider = self._llm_service.get_default_provider()
103
+
104
+ # Generate image description
105
+ description = await provider.describe_image(image_path, context=context)
106
+
107
+ # Determine MIME type
108
+ mime_type, _ = mimetypes.guess_type(str(path))
109
+ if not mime_type:
110
+ mime_type = "application/octet-stream"
111
+
112
+ # Create media attachment
113
+ media = MediaAttachment(
114
+ media_type="image",
115
+ content_path=str(path.absolute()),
116
+ mime_type=mime_type,
117
+ description=description,
118
+ description_model=provider.provider_name,
119
+ )
120
+
121
+ # Store memory with media attachment via backend
122
+ record = await self._backend.create(
123
+ content=description,
124
+ memory_type=memory_type,
125
+ importance=importance,
126
+ project_id=project_id,
127
+ source_type=source_type,
128
+ source_session_id=source_session_id,
129
+ tags=tags,
130
+ media=[media],
131
+ )
132
+
133
+ # Return as Memory object for backward compatibility
134
+ # Note: The backend returns MemoryRecord, but we need Memory
135
+ memory = self._storage.get_memory(record.id)
136
+ if memory is not None:
137
+ return memory
138
+
139
+ # Fallback: construct Memory from MemoryRecord if storage lookup fails
140
+ # This can happen with synthetic records from failed backend calls
141
+ return Memory(
142
+ id=record.id,
143
+ content=record.content,
144
+ memory_type=record.memory_type,
145
+ created_at=record.created_at.isoformat(),
146
+ updated_at=record.updated_at.isoformat()
147
+ if record.updated_at
148
+ else record.created_at.isoformat(),
149
+ project_id=record.project_id,
150
+ source_type=record.source_type,
151
+ source_session_id=record.source_session_id,
152
+ importance=record.importance,
153
+ tags=record.tags,
154
+ )
155
+
156
+ async def remember_screenshot(
157
+ self,
158
+ screenshot_bytes: bytes,
159
+ context: str | None = None,
160
+ memory_type: str = "observation",
161
+ importance: float = 0.5,
162
+ project_id: str | None = None,
163
+ source_type: str = "user",
164
+ source_session_id: str | None = None,
165
+ tags: list[str] | None = None,
166
+ ) -> Memory:
167
+ """
168
+ Store a memory from raw screenshot bytes.
169
+
170
+ Saves the screenshot to .gobby/resources/ with a timestamp-based filename,
171
+ then delegates to remember_with_image() for LLM description and storage.
172
+
173
+ Args:
174
+ screenshot_bytes: Raw PNG screenshot bytes (from Playwright/Puppeteer)
175
+ context: Optional context to guide the image description
176
+ memory_type: Type of memory (default: "observation")
177
+ importance: 0.0-1.0 importance score
178
+ project_id: Optional project context
179
+ source_type: Origin of memory
180
+ source_session_id: Origin session
181
+ tags: Optional tags
182
+
183
+ Returns:
184
+ The created Memory object
185
+
186
+ Raises:
187
+ ValueError: If LLM service is not configured or screenshot bytes are empty
188
+ """
189
+ if not screenshot_bytes:
190
+ raise ValueError("Screenshot bytes cannot be empty")
191
+
192
+ # Determine resources directory using centralized utility
193
+ from datetime import datetime as dt
194
+
195
+ from gobby.cli.utils import get_resources_dir
196
+ from gobby.utils.project_context import get_project_context
197
+
198
+ ctx = get_project_context()
199
+ project_path = ctx.get("path") if ctx else None
200
+ resources_dir = get_resources_dir(project_path)
201
+
202
+ # Generate timestamp-based filename
203
+ timestamp = dt.now().strftime("%Y%m%d_%H%M%S_%f")
204
+ filename = f"screenshot_{timestamp}.png"
205
+ filepath = resources_dir / filename
206
+
207
+ # Write screenshot to file
208
+ filepath.write_bytes(screenshot_bytes)
209
+ logger.debug(f"Saved screenshot to {filepath}")
210
+
211
+ # Delegate to remember_with_image
212
+ return await self.remember_with_image(
213
+ image_path=str(filepath),
214
+ context=context,
215
+ memory_type=memory_type,
216
+ importance=importance,
217
+ project_id=project_id,
218
+ source_type=source_type,
219
+ source_session_id=source_session_id,
220
+ tags=tags,
221
+ )