gobby 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gobby/__init__.py +1 -1
- gobby/adapters/__init__.py +2 -1
- gobby/adapters/claude_code.py +13 -4
- gobby/adapters/codex_impl/__init__.py +28 -0
- gobby/adapters/codex_impl/adapter.py +722 -0
- gobby/adapters/codex_impl/client.py +679 -0
- gobby/adapters/codex_impl/protocol.py +20 -0
- gobby/adapters/codex_impl/types.py +68 -0
- gobby/agents/definitions.py +11 -1
- gobby/agents/isolation.py +395 -0
- gobby/agents/runner.py +8 -0
- gobby/agents/sandbox.py +261 -0
- gobby/agents/spawn.py +42 -287
- gobby/agents/spawn_executor.py +385 -0
- gobby/agents/spawners/__init__.py +24 -0
- gobby/agents/spawners/command_builder.py +189 -0
- gobby/agents/spawners/embedded.py +21 -2
- gobby/agents/spawners/headless.py +21 -2
- gobby/agents/spawners/prompt_manager.py +125 -0
- gobby/cli/__init__.py +6 -0
- gobby/cli/clones.py +419 -0
- gobby/cli/conductor.py +266 -0
- gobby/cli/install.py +4 -4
- gobby/cli/installers/antigravity.py +3 -9
- gobby/cli/installers/claude.py +15 -9
- gobby/cli/installers/codex.py +2 -8
- gobby/cli/installers/gemini.py +8 -8
- gobby/cli/installers/shared.py +175 -13
- gobby/cli/sessions.py +1 -1
- gobby/cli/skills.py +858 -0
- gobby/cli/tasks/ai.py +0 -440
- gobby/cli/tasks/crud.py +44 -6
- gobby/cli/tasks/main.py +0 -4
- gobby/cli/tui.py +2 -2
- gobby/cli/utils.py +12 -5
- gobby/clones/__init__.py +13 -0
- gobby/clones/git.py +547 -0
- gobby/conductor/__init__.py +16 -0
- gobby/conductor/alerts.py +135 -0
- gobby/conductor/loop.py +164 -0
- gobby/conductor/monitors/__init__.py +11 -0
- gobby/conductor/monitors/agents.py +116 -0
- gobby/conductor/monitors/tasks.py +155 -0
- gobby/conductor/pricing.py +234 -0
- gobby/conductor/token_tracker.py +160 -0
- gobby/config/__init__.py +12 -97
- gobby/config/app.py +69 -91
- gobby/config/extensions.py +2 -2
- gobby/config/features.py +7 -130
- gobby/config/search.py +110 -0
- gobby/config/servers.py +1 -1
- gobby/config/skills.py +43 -0
- gobby/config/tasks.py +9 -41
- gobby/hooks/__init__.py +0 -13
- gobby/hooks/event_handlers.py +188 -2
- gobby/hooks/hook_manager.py +50 -4
- gobby/hooks/plugins.py +1 -1
- gobby/hooks/skill_manager.py +130 -0
- gobby/hooks/webhooks.py +1 -1
- gobby/install/claude/hooks/hook_dispatcher.py +4 -4
- gobby/install/codex/hooks/hook_dispatcher.py +1 -1
- gobby/install/gemini/hooks/hook_dispatcher.py +87 -12
- gobby/llm/claude.py +22 -34
- gobby/llm/claude_executor.py +46 -256
- gobby/llm/codex_executor.py +59 -291
- gobby/llm/executor.py +21 -0
- gobby/llm/gemini.py +134 -110
- gobby/llm/litellm_executor.py +143 -6
- gobby/llm/resolver.py +98 -35
- gobby/mcp_proxy/importer.py +62 -4
- gobby/mcp_proxy/instructions.py +56 -0
- gobby/mcp_proxy/models.py +15 -0
- gobby/mcp_proxy/registries.py +68 -8
- gobby/mcp_proxy/server.py +33 -3
- gobby/mcp_proxy/services/recommendation.py +43 -11
- gobby/mcp_proxy/services/tool_proxy.py +81 -1
- gobby/mcp_proxy/stdio.py +2 -1
- gobby/mcp_proxy/tools/__init__.py +0 -2
- gobby/mcp_proxy/tools/agent_messaging.py +317 -0
- gobby/mcp_proxy/tools/agents.py +31 -731
- gobby/mcp_proxy/tools/clones.py +518 -0
- gobby/mcp_proxy/tools/memory.py +3 -26
- gobby/mcp_proxy/tools/metrics.py +65 -1
- gobby/mcp_proxy/tools/orchestration/__init__.py +3 -0
- gobby/mcp_proxy/tools/orchestration/cleanup.py +151 -0
- gobby/mcp_proxy/tools/orchestration/wait.py +467 -0
- gobby/mcp_proxy/tools/sessions/__init__.py +14 -0
- gobby/mcp_proxy/tools/sessions/_commits.py +232 -0
- gobby/mcp_proxy/tools/sessions/_crud.py +253 -0
- gobby/mcp_proxy/tools/sessions/_factory.py +63 -0
- gobby/mcp_proxy/tools/sessions/_handoff.py +499 -0
- gobby/mcp_proxy/tools/sessions/_messages.py +138 -0
- gobby/mcp_proxy/tools/skills/__init__.py +616 -0
- gobby/mcp_proxy/tools/spawn_agent.py +417 -0
- gobby/mcp_proxy/tools/task_orchestration.py +7 -0
- gobby/mcp_proxy/tools/task_readiness.py +14 -0
- gobby/mcp_proxy/tools/task_sync.py +1 -1
- gobby/mcp_proxy/tools/tasks/_context.py +0 -20
- gobby/mcp_proxy/tools/tasks/_crud.py +91 -4
- gobby/mcp_proxy/tools/tasks/_expansion.py +348 -0
- gobby/mcp_proxy/tools/tasks/_factory.py +6 -16
- gobby/mcp_proxy/tools/tasks/_lifecycle.py +110 -45
- gobby/mcp_proxy/tools/tasks/_lifecycle_validation.py +18 -29
- gobby/mcp_proxy/tools/workflows.py +1 -1
- gobby/mcp_proxy/tools/worktrees.py +0 -338
- gobby/memory/backends/__init__.py +6 -1
- gobby/memory/backends/mem0.py +6 -1
- gobby/memory/extractor.py +477 -0
- gobby/memory/ingestion/__init__.py +5 -0
- gobby/memory/ingestion/multimodal.py +221 -0
- gobby/memory/manager.py +73 -285
- gobby/memory/search/__init__.py +10 -0
- gobby/memory/search/coordinator.py +248 -0
- gobby/memory/services/__init__.py +5 -0
- gobby/memory/services/crossref.py +142 -0
- gobby/prompts/loader.py +5 -2
- gobby/runner.py +37 -16
- gobby/search/__init__.py +48 -6
- gobby/search/backends/__init__.py +159 -0
- gobby/search/backends/embedding.py +225 -0
- gobby/search/embeddings.py +238 -0
- gobby/search/models.py +148 -0
- gobby/search/unified.py +496 -0
- gobby/servers/http.py +24 -12
- gobby/servers/routes/admin.py +294 -0
- gobby/servers/routes/mcp/endpoints/__init__.py +61 -0
- gobby/servers/routes/mcp/endpoints/discovery.py +405 -0
- gobby/servers/routes/mcp/endpoints/execution.py +568 -0
- gobby/servers/routes/mcp/endpoints/registry.py +378 -0
- gobby/servers/routes/mcp/endpoints/server.py +304 -0
- gobby/servers/routes/mcp/hooks.py +1 -1
- gobby/servers/routes/mcp/tools.py +48 -1317
- gobby/servers/websocket.py +2 -2
- gobby/sessions/analyzer.py +2 -0
- gobby/sessions/lifecycle.py +1 -1
- gobby/sessions/processor.py +10 -0
- gobby/sessions/transcripts/base.py +2 -0
- gobby/sessions/transcripts/claude.py +79 -10
- gobby/skills/__init__.py +91 -0
- gobby/skills/loader.py +685 -0
- gobby/skills/manager.py +384 -0
- gobby/skills/parser.py +286 -0
- gobby/skills/search.py +463 -0
- gobby/skills/sync.py +119 -0
- gobby/skills/updater.py +385 -0
- gobby/skills/validator.py +368 -0
- gobby/storage/clones.py +378 -0
- gobby/storage/database.py +1 -1
- gobby/storage/memories.py +43 -13
- gobby/storage/migrations.py +162 -201
- gobby/storage/sessions.py +116 -7
- gobby/storage/skills.py +782 -0
- gobby/storage/tasks/_crud.py +4 -4
- gobby/storage/tasks/_lifecycle.py +57 -7
- gobby/storage/tasks/_manager.py +14 -5
- gobby/storage/tasks/_models.py +8 -3
- gobby/sync/memories.py +40 -5
- gobby/sync/tasks.py +83 -6
- gobby/tasks/__init__.py +1 -2
- gobby/tasks/external_validator.py +1 -1
- gobby/tasks/validation.py +46 -35
- gobby/tools/summarizer.py +91 -10
- gobby/tui/api_client.py +4 -7
- gobby/tui/app.py +5 -3
- gobby/tui/screens/orchestrator.py +1 -2
- gobby/tui/screens/tasks.py +2 -4
- gobby/tui/ws_client.py +1 -1
- gobby/utils/daemon_client.py +2 -2
- gobby/utils/project_context.py +2 -3
- gobby/utils/status.py +13 -0
- gobby/workflows/actions.py +221 -1135
- gobby/workflows/artifact_actions.py +31 -0
- gobby/workflows/autonomous_actions.py +11 -0
- gobby/workflows/context_actions.py +93 -1
- gobby/workflows/detection_helpers.py +115 -31
- gobby/workflows/enforcement/__init__.py +47 -0
- gobby/workflows/enforcement/blocking.py +269 -0
- gobby/workflows/enforcement/commit_policy.py +283 -0
- gobby/workflows/enforcement/handlers.py +269 -0
- gobby/workflows/{task_enforcement_actions.py → enforcement/task_policy.py} +29 -388
- gobby/workflows/engine.py +13 -2
- gobby/workflows/git_utils.py +106 -0
- gobby/workflows/lifecycle_evaluator.py +29 -1
- gobby/workflows/llm_actions.py +30 -0
- gobby/workflows/loader.py +19 -6
- gobby/workflows/mcp_actions.py +20 -1
- gobby/workflows/memory_actions.py +154 -0
- gobby/workflows/safe_evaluator.py +183 -0
- gobby/workflows/session_actions.py +44 -0
- gobby/workflows/state_actions.py +60 -1
- gobby/workflows/stop_signal_actions.py +55 -0
- gobby/workflows/summary_actions.py +111 -1
- gobby/workflows/task_sync_actions.py +347 -0
- gobby/workflows/todo_actions.py +34 -1
- gobby/workflows/webhook_actions.py +185 -0
- {gobby-0.2.5.dist-info → gobby-0.2.7.dist-info}/METADATA +87 -21
- {gobby-0.2.5.dist-info → gobby-0.2.7.dist-info}/RECORD +201 -172
- {gobby-0.2.5.dist-info → gobby-0.2.7.dist-info}/WHEEL +1 -1
- gobby/adapters/codex.py +0 -1292
- gobby/install/claude/commands/gobby/bug.md +0 -51
- gobby/install/claude/commands/gobby/chore.md +0 -51
- gobby/install/claude/commands/gobby/epic.md +0 -52
- gobby/install/claude/commands/gobby/eval.md +0 -235
- gobby/install/claude/commands/gobby/feat.md +0 -49
- gobby/install/claude/commands/gobby/nit.md +0 -52
- gobby/install/claude/commands/gobby/ref.md +0 -52
- gobby/install/codex/prompts/forget.md +0 -7
- gobby/install/codex/prompts/memories.md +0 -7
- gobby/install/codex/prompts/recall.md +0 -7
- gobby/install/codex/prompts/remember.md +0 -13
- gobby/llm/gemini_executor.py +0 -339
- gobby/mcp_proxy/tools/session_messages.py +0 -1056
- gobby/mcp_proxy/tools/task_expansion.py +0 -591
- gobby/prompts/defaults/expansion/system.md +0 -119
- gobby/prompts/defaults/expansion/user.md +0 -48
- gobby/prompts/defaults/external_validation/agent.md +0 -72
- gobby/prompts/defaults/external_validation/external.md +0 -63
- gobby/prompts/defaults/external_validation/spawn.md +0 -83
- gobby/prompts/defaults/external_validation/system.md +0 -6
- gobby/prompts/defaults/features/import_mcp.md +0 -22
- gobby/prompts/defaults/features/import_mcp_github.md +0 -17
- gobby/prompts/defaults/features/import_mcp_search.md +0 -16
- gobby/prompts/defaults/features/recommend_tools.md +0 -32
- gobby/prompts/defaults/features/recommend_tools_hybrid.md +0 -35
- gobby/prompts/defaults/features/recommend_tools_llm.md +0 -30
- gobby/prompts/defaults/features/server_description.md +0 -20
- gobby/prompts/defaults/features/server_description_system.md +0 -6
- gobby/prompts/defaults/features/task_description.md +0 -31
- gobby/prompts/defaults/features/task_description_system.md +0 -6
- gobby/prompts/defaults/features/tool_summary.md +0 -17
- gobby/prompts/defaults/features/tool_summary_system.md +0 -6
- gobby/prompts/defaults/research/step.md +0 -58
- gobby/prompts/defaults/validation/criteria.md +0 -47
- gobby/prompts/defaults/validation/validate.md +0 -38
- gobby/storage/migrations_legacy.py +0 -1359
- gobby/tasks/context.py +0 -747
- gobby/tasks/criteria.py +0 -342
- gobby/tasks/expansion.py +0 -626
- gobby/tasks/prompts/expand.py +0 -327
- gobby/tasks/research.py +0 -421
- gobby/tasks/tdd.py +0 -352
- {gobby-0.2.5.dist-info → gobby-0.2.7.dist-info}/entry_points.txt +0 -0
- {gobby-0.2.5.dist-info → gobby-0.2.7.dist-info}/licenses/LICENSE.md +0 -0
- {gobby-0.2.5.dist-info → gobby-0.2.7.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,477 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Session memory extractor.
|
|
3
|
+
|
|
4
|
+
Automatically extracts meaningful, reusable memories from session transcripts.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import logging
|
|
11
|
+
from dataclasses import dataclass
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import TYPE_CHECKING, Any
|
|
14
|
+
|
|
15
|
+
from gobby.prompts.loader import PromptLoader
|
|
16
|
+
from gobby.workflows.summary_actions import format_turns_for_llm
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from gobby.llm.service import LLMService
|
|
20
|
+
from gobby.memory.manager import MemoryManager
|
|
21
|
+
from gobby.storage.sessions import LocalSessionManager
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
# Prompt path in the prompts collection
|
|
26
|
+
EXTRACT_PROMPT_PATH = "memory/extract"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dataclass
|
|
30
|
+
class MemoryCandidate:
|
|
31
|
+
"""A candidate memory extracted from a session."""
|
|
32
|
+
|
|
33
|
+
content: str
|
|
34
|
+
memory_type: str # fact, pattern, preference, context
|
|
35
|
+
importance: float
|
|
36
|
+
tags: list[str]
|
|
37
|
+
|
|
38
|
+
def to_dict(self) -> dict[str, Any]:
|
|
39
|
+
"""Convert to dictionary."""
|
|
40
|
+
return {
|
|
41
|
+
"content": self.content,
|
|
42
|
+
"memory_type": self.memory_type,
|
|
43
|
+
"importance": self.importance,
|
|
44
|
+
"tags": self.tags,
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class SessionContext:
|
|
50
|
+
"""Context extracted from a session for memory extraction."""
|
|
51
|
+
|
|
52
|
+
session_id: str
|
|
53
|
+
project_id: str | None
|
|
54
|
+
project_name: str
|
|
55
|
+
task_refs: str
|
|
56
|
+
files_modified: str
|
|
57
|
+
tool_summary: str
|
|
58
|
+
transcript_summary: str
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class SessionMemoryExtractor:
|
|
62
|
+
"""Extract meaningful memories from session transcripts.
|
|
63
|
+
|
|
64
|
+
Uses LLM analysis to identify high-value, reusable knowledge from
|
|
65
|
+
session transcripts and stores them as memories.
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
def __init__(
|
|
69
|
+
self,
|
|
70
|
+
memory_manager: MemoryManager,
|
|
71
|
+
session_manager: LocalSessionManager,
|
|
72
|
+
llm_service: LLMService,
|
|
73
|
+
prompt_loader: PromptLoader | None = None,
|
|
74
|
+
transcript_processor: Any | None = None,
|
|
75
|
+
):
|
|
76
|
+
"""Initialize the extractor.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
memory_manager: Manager for storing extracted memories
|
|
80
|
+
session_manager: Manager for session data
|
|
81
|
+
llm_service: LLM service for analysis
|
|
82
|
+
prompt_loader: Optional custom prompt loader
|
|
83
|
+
transcript_processor: Optional transcript processor for parsing
|
|
84
|
+
"""
|
|
85
|
+
self.memory_manager = memory_manager
|
|
86
|
+
self.session_manager = session_manager
|
|
87
|
+
self.llm_service = llm_service
|
|
88
|
+
self.prompt_loader = prompt_loader or PromptLoader()
|
|
89
|
+
self.transcript_processor = transcript_processor
|
|
90
|
+
|
|
91
|
+
async def extract(
|
|
92
|
+
self,
|
|
93
|
+
session_id: str,
|
|
94
|
+
min_importance: float = 0.7,
|
|
95
|
+
max_memories: int = 5,
|
|
96
|
+
dry_run: bool = False,
|
|
97
|
+
) -> list[MemoryCandidate]:
|
|
98
|
+
"""Extract memories from a session.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
session_id: The session to extract memories from
|
|
102
|
+
min_importance: Minimum importance threshold (0.0-1.0)
|
|
103
|
+
max_memories: Maximum number of memories to extract
|
|
104
|
+
dry_run: If True, don't store memories, just return candidates
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
List of extracted memory candidates
|
|
108
|
+
"""
|
|
109
|
+
# 1. Get session context
|
|
110
|
+
context = await self._get_session_context(session_id)
|
|
111
|
+
if not context:
|
|
112
|
+
logger.warning(f"Could not get context for session {session_id}")
|
|
113
|
+
return []
|
|
114
|
+
|
|
115
|
+
# 2. Load and render prompt
|
|
116
|
+
prompt = self._render_prompt(
|
|
117
|
+
context=context,
|
|
118
|
+
min_importance=min_importance,
|
|
119
|
+
max_memories=max_memories,
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# 3. LLM analysis
|
|
123
|
+
candidates = await self._analyze_with_llm(prompt)
|
|
124
|
+
if not candidates:
|
|
125
|
+
logger.debug(f"No memory candidates extracted from session {session_id}")
|
|
126
|
+
return []
|
|
127
|
+
|
|
128
|
+
# 4. Quality filter + deduplicate
|
|
129
|
+
filtered = await self._filter_and_dedupe(
|
|
130
|
+
candidates=candidates,
|
|
131
|
+
min_importance=min_importance,
|
|
132
|
+
project_id=context.project_id,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# 5. Store (unless dry_run)
|
|
136
|
+
if not dry_run and filtered:
|
|
137
|
+
await self._store_memories(
|
|
138
|
+
candidates=filtered,
|
|
139
|
+
session_id=session_id,
|
|
140
|
+
project_id=context.project_id,
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
return filtered
|
|
144
|
+
|
|
145
|
+
async def _get_session_context(self, session_id: str) -> SessionContext | None:
|
|
146
|
+
"""Get context from the session for memory extraction.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
session_id: The session ID
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
SessionContext with extracted information, or None if not available
|
|
153
|
+
"""
|
|
154
|
+
session = self.session_manager.get(session_id)
|
|
155
|
+
if not session:
|
|
156
|
+
return None
|
|
157
|
+
|
|
158
|
+
# Get project info
|
|
159
|
+
project_id = session.project_id
|
|
160
|
+
project_name = "Unknown Project"
|
|
161
|
+
|
|
162
|
+
if project_id:
|
|
163
|
+
# Try to get project name from project manager
|
|
164
|
+
try:
|
|
165
|
+
from gobby.storage.projects import LocalProjectManager
|
|
166
|
+
|
|
167
|
+
project_mgr = LocalProjectManager(self.memory_manager.db)
|
|
168
|
+
project = project_mgr.get(project_id)
|
|
169
|
+
if project and project.name:
|
|
170
|
+
project_name = project.name
|
|
171
|
+
except Exception as e:
|
|
172
|
+
logger.debug(f"Could not get project name: {e}")
|
|
173
|
+
|
|
174
|
+
# Get transcript content
|
|
175
|
+
transcript_path = getattr(session, "jsonl_path", None)
|
|
176
|
+
transcript_summary = ""
|
|
177
|
+
task_refs = ""
|
|
178
|
+
files_modified = ""
|
|
179
|
+
tool_summary_parts: list[str] = []
|
|
180
|
+
|
|
181
|
+
if transcript_path and Path(transcript_path).exists():
|
|
182
|
+
turns = self._load_transcript(transcript_path)
|
|
183
|
+
|
|
184
|
+
# Extract turns since last clear (or all if no clear)
|
|
185
|
+
if self.transcript_processor:
|
|
186
|
+
recent_turns = self.transcript_processor.extract_turns_since_clear(
|
|
187
|
+
turns, max_turns=50
|
|
188
|
+
)
|
|
189
|
+
else:
|
|
190
|
+
recent_turns = turns[-50:] if len(turns) > 50 else turns
|
|
191
|
+
|
|
192
|
+
# Format for LLM
|
|
193
|
+
transcript_summary = format_turns_for_llm(recent_turns)
|
|
194
|
+
|
|
195
|
+
# Extract file modifications and tool usage from turns
|
|
196
|
+
files_set: set[str] = set()
|
|
197
|
+
task_set: set[str] = set()
|
|
198
|
+
|
|
199
|
+
for turn in recent_turns:
|
|
200
|
+
message = turn.get("message", {})
|
|
201
|
+
content = message.get("content", [])
|
|
202
|
+
|
|
203
|
+
if isinstance(content, list):
|
|
204
|
+
for block in content:
|
|
205
|
+
if isinstance(block, dict) and block.get("type") == "tool_use":
|
|
206
|
+
tool_name = block.get("name", "")
|
|
207
|
+
tool_input = block.get("input", {})
|
|
208
|
+
|
|
209
|
+
# Track file modifications
|
|
210
|
+
if tool_name in ("Edit", "Write", "NotebookEdit"):
|
|
211
|
+
file_path = tool_input.get("file_path", "")
|
|
212
|
+
if file_path:
|
|
213
|
+
files_set.add(file_path)
|
|
214
|
+
|
|
215
|
+
# Track task references
|
|
216
|
+
if tool_name in ("update_task", "create_task", "close_task"):
|
|
217
|
+
task_id = tool_input.get("task_id", "")
|
|
218
|
+
if task_id:
|
|
219
|
+
task_set.add(task_id)
|
|
220
|
+
|
|
221
|
+
# Track key tool actions
|
|
222
|
+
if tool_name in ("Edit", "Write", "Bash", "Grep", "Glob"):
|
|
223
|
+
tool_summary_parts.append(tool_name)
|
|
224
|
+
|
|
225
|
+
files_modified = ", ".join(sorted(files_set)) if files_set else "None"
|
|
226
|
+
task_refs = ", ".join(sorted(task_set)) if task_set else "None"
|
|
227
|
+
|
|
228
|
+
# Create tool summary (count of each tool type)
|
|
229
|
+
tool_counts: dict[str, int] = {}
|
|
230
|
+
for tool in tool_summary_parts:
|
|
231
|
+
tool_counts[tool] = tool_counts.get(tool, 0) + 1
|
|
232
|
+
tool_summary = ", ".join(f"{k}({v})" for k, v in sorted(tool_counts.items()))
|
|
233
|
+
|
|
234
|
+
return SessionContext(
|
|
235
|
+
session_id=session_id,
|
|
236
|
+
project_id=project_id,
|
|
237
|
+
project_name=project_name,
|
|
238
|
+
task_refs=task_refs,
|
|
239
|
+
files_modified=files_modified,
|
|
240
|
+
tool_summary=tool_summary or "None",
|
|
241
|
+
transcript_summary=transcript_summary,
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
def _load_transcript(self, transcript_path: str) -> list[dict[str, Any]]:
|
|
245
|
+
"""Load transcript turns from JSONL file.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
transcript_path: Path to the transcript file
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
List of turn dictionaries
|
|
252
|
+
"""
|
|
253
|
+
turns: list[dict[str, Any]] = []
|
|
254
|
+
try:
|
|
255
|
+
with open(transcript_path, encoding="utf-8") as f:
|
|
256
|
+
for line in f:
|
|
257
|
+
if line.strip():
|
|
258
|
+
turns.append(json.loads(line))
|
|
259
|
+
except Exception as e:
|
|
260
|
+
logger.warning(f"Failed to load transcript: {e}")
|
|
261
|
+
return turns
|
|
262
|
+
|
|
263
|
+
def _render_prompt(
|
|
264
|
+
self,
|
|
265
|
+
context: SessionContext,
|
|
266
|
+
min_importance: float,
|
|
267
|
+
max_memories: int,
|
|
268
|
+
) -> str:
|
|
269
|
+
"""Render the extraction prompt with context.
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
context: Session context
|
|
273
|
+
min_importance: Minimum importance threshold
|
|
274
|
+
max_memories: Maximum memories to extract
|
|
275
|
+
|
|
276
|
+
Returns:
|
|
277
|
+
Rendered prompt string
|
|
278
|
+
"""
|
|
279
|
+
return self.prompt_loader.render(
|
|
280
|
+
EXTRACT_PROMPT_PATH,
|
|
281
|
+
{
|
|
282
|
+
"project_name": context.project_name,
|
|
283
|
+
"task_refs": context.task_refs,
|
|
284
|
+
"files": context.files_modified,
|
|
285
|
+
"tool_summary": context.tool_summary,
|
|
286
|
+
"transcript_summary": context.transcript_summary,
|
|
287
|
+
"min_importance": min_importance,
|
|
288
|
+
"max_memories": max_memories,
|
|
289
|
+
},
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
async def _analyze_with_llm(self, prompt: str) -> list[MemoryCandidate]:
|
|
293
|
+
"""Call LLM to analyze transcript and extract memories.
|
|
294
|
+
|
|
295
|
+
Args:
|
|
296
|
+
prompt: Rendered prompt for the LLM
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
List of memory candidates extracted from LLM response
|
|
300
|
+
"""
|
|
301
|
+
try:
|
|
302
|
+
provider = self.llm_service.get_default_provider()
|
|
303
|
+
response = await provider.generate_text(prompt)
|
|
304
|
+
|
|
305
|
+
# Parse JSON from response
|
|
306
|
+
candidates = self._parse_llm_response(response)
|
|
307
|
+
return candidates
|
|
308
|
+
|
|
309
|
+
except Exception as e:
|
|
310
|
+
logger.error(f"LLM analysis failed: {e}")
|
|
311
|
+
return []
|
|
312
|
+
|
|
313
|
+
def _parse_llm_response(self, response: str) -> list[MemoryCandidate]:
|
|
314
|
+
"""Parse LLM response to extract memory candidates.
|
|
315
|
+
|
|
316
|
+
Args:
|
|
317
|
+
response: Raw LLM response text
|
|
318
|
+
|
|
319
|
+
Returns:
|
|
320
|
+
List of memory candidates
|
|
321
|
+
"""
|
|
322
|
+
candidates: list[MemoryCandidate] = []
|
|
323
|
+
|
|
324
|
+
# Try to find JSON array in response
|
|
325
|
+
try:
|
|
326
|
+
# Look for JSON array markers
|
|
327
|
+
start_idx = response.find("[")
|
|
328
|
+
end_idx = response.rfind("]")
|
|
329
|
+
|
|
330
|
+
if start_idx == -1 or end_idx == -1:
|
|
331
|
+
logger.warning("No JSON array found in LLM response")
|
|
332
|
+
return []
|
|
333
|
+
|
|
334
|
+
json_str = response[start_idx : end_idx + 1]
|
|
335
|
+
data = json.loads(json_str)
|
|
336
|
+
|
|
337
|
+
if not isinstance(data, list):
|
|
338
|
+
logger.warning("LLM response is not a list")
|
|
339
|
+
return []
|
|
340
|
+
|
|
341
|
+
for item in data:
|
|
342
|
+
if not isinstance(item, dict):
|
|
343
|
+
continue
|
|
344
|
+
|
|
345
|
+
content = item.get("content", "").strip()
|
|
346
|
+
if not content:
|
|
347
|
+
continue
|
|
348
|
+
|
|
349
|
+
memory_type = item.get("memory_type", "fact")
|
|
350
|
+
if memory_type not in ("fact", "pattern", "preference", "context"):
|
|
351
|
+
memory_type = "fact"
|
|
352
|
+
|
|
353
|
+
raw_importance = item.get("importance", 0.7)
|
|
354
|
+
try:
|
|
355
|
+
importance = float(raw_importance)
|
|
356
|
+
except (ValueError, TypeError) as e:
|
|
357
|
+
logger.warning(
|
|
358
|
+
f"Invalid importance value '{raw_importance}' in memory item "
|
|
359
|
+
f"(content: {content[:50]}...): {e}. Using default 0.7"
|
|
360
|
+
)
|
|
361
|
+
importance = 0.7
|
|
362
|
+
importance = max(0.0, min(1.0, importance))
|
|
363
|
+
|
|
364
|
+
tags = item.get("tags", [])
|
|
365
|
+
if not isinstance(tags, list):
|
|
366
|
+
tags = []
|
|
367
|
+
tags = [str(t) for t in tags]
|
|
368
|
+
|
|
369
|
+
candidates.append(
|
|
370
|
+
MemoryCandidate(
|
|
371
|
+
content=content,
|
|
372
|
+
memory_type=memory_type,
|
|
373
|
+
importance=importance,
|
|
374
|
+
tags=tags,
|
|
375
|
+
)
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
except json.JSONDecodeError as e:
|
|
379
|
+
logger.warning(f"Failed to parse LLM response as JSON: {e}")
|
|
380
|
+
except Exception as e:
|
|
381
|
+
logger.warning(f"Error parsing LLM response: {e}")
|
|
382
|
+
|
|
383
|
+
return candidates
|
|
384
|
+
|
|
385
|
+
async def _filter_and_dedupe(
|
|
386
|
+
self,
|
|
387
|
+
candidates: list[MemoryCandidate],
|
|
388
|
+
min_importance: float,
|
|
389
|
+
project_id: str | None,
|
|
390
|
+
) -> list[MemoryCandidate]:
|
|
391
|
+
"""Filter candidates by importance and deduplicate against existing memories.
|
|
392
|
+
|
|
393
|
+
Args:
|
|
394
|
+
candidates: Raw candidates from LLM
|
|
395
|
+
min_importance: Minimum importance threshold
|
|
396
|
+
project_id: Project ID for deduplication
|
|
397
|
+
|
|
398
|
+
Returns:
|
|
399
|
+
Filtered and deduplicated candidates
|
|
400
|
+
"""
|
|
401
|
+
filtered: list[MemoryCandidate] = []
|
|
402
|
+
|
|
403
|
+
for candidate in candidates:
|
|
404
|
+
# Skip low importance
|
|
405
|
+
if candidate.importance < min_importance:
|
|
406
|
+
continue
|
|
407
|
+
|
|
408
|
+
# Check for duplicates in existing memories
|
|
409
|
+
if self.memory_manager.content_exists(candidate.content, project_id):
|
|
410
|
+
logger.debug(f"Skipping duplicate memory: {candidate.content[:50]}...")
|
|
411
|
+
continue
|
|
412
|
+
|
|
413
|
+
# Check for near-duplicates in this batch
|
|
414
|
+
is_duplicate = False
|
|
415
|
+
for existing in filtered:
|
|
416
|
+
if self._is_similar(candidate.content, existing.content):
|
|
417
|
+
is_duplicate = True
|
|
418
|
+
break
|
|
419
|
+
|
|
420
|
+
if not is_duplicate:
|
|
421
|
+
filtered.append(candidate)
|
|
422
|
+
|
|
423
|
+
return filtered
|
|
424
|
+
|
|
425
|
+
def _is_similar(self, content1: str, content2: str, threshold: float = 0.8) -> bool:
|
|
426
|
+
"""Check if two content strings are similar enough to be considered duplicates.
|
|
427
|
+
|
|
428
|
+
Uses a simple word overlap heuristic.
|
|
429
|
+
|
|
430
|
+
Args:
|
|
431
|
+
content1: First content string
|
|
432
|
+
content2: Second content string
|
|
433
|
+
threshold: Similarity threshold (0.0-1.0)
|
|
434
|
+
|
|
435
|
+
Returns:
|
|
436
|
+
True if contents are similar
|
|
437
|
+
"""
|
|
438
|
+
words1 = set(content1.lower().split())
|
|
439
|
+
words2 = set(content2.lower().split())
|
|
440
|
+
|
|
441
|
+
if not words1 or not words2:
|
|
442
|
+
return False
|
|
443
|
+
|
|
444
|
+
# Jaccard similarity
|
|
445
|
+
intersection = len(words1 & words2)
|
|
446
|
+
union = len(words1 | words2)
|
|
447
|
+
|
|
448
|
+
similarity = intersection / union if union > 0 else 0
|
|
449
|
+
return similarity >= threshold
|
|
450
|
+
|
|
451
|
+
async def _store_memories(
|
|
452
|
+
self,
|
|
453
|
+
candidates: list[MemoryCandidate],
|
|
454
|
+
session_id: str,
|
|
455
|
+
project_id: str | None,
|
|
456
|
+
) -> None:
|
|
457
|
+
"""Store extracted memories.
|
|
458
|
+
|
|
459
|
+
Args:
|
|
460
|
+
candidates: Memory candidates to store
|
|
461
|
+
session_id: Source session ID
|
|
462
|
+
project_id: Project ID for the memories
|
|
463
|
+
"""
|
|
464
|
+
for candidate in candidates:
|
|
465
|
+
try:
|
|
466
|
+
await self.memory_manager.remember(
|
|
467
|
+
content=candidate.content,
|
|
468
|
+
memory_type=candidate.memory_type,
|
|
469
|
+
importance=candidate.importance,
|
|
470
|
+
project_id=project_id,
|
|
471
|
+
source_type="session",
|
|
472
|
+
source_session_id=session_id,
|
|
473
|
+
tags=candidate.tags,
|
|
474
|
+
)
|
|
475
|
+
logger.debug(f"Stored memory: {candidate.content[:50]}...")
|
|
476
|
+
except Exception as e:
|
|
477
|
+
logger.warning(f"Failed to store memory: {e}")
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
"""Multimodal content ingestion for memory system."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import mimetypes
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
from gobby.memory.protocol import MediaAttachment
|
|
11
|
+
from gobby.storage.memories import Memory
|
|
12
|
+
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from gobby.llm.service import LLMService
|
|
15
|
+
from gobby.memory.protocol import MemoryBackendProtocol
|
|
16
|
+
from gobby.storage.memories import LocalMemoryManager
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class MultimodalIngestor:
|
|
22
|
+
"""
|
|
23
|
+
Handles ingestion of multimodal content (images, screenshots) into memory.
|
|
24
|
+
|
|
25
|
+
Extracts image handling from MemoryManager to provide focused
|
|
26
|
+
multimodal processing capabilities.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
storage: LocalMemoryManager,
|
|
32
|
+
backend: MemoryBackendProtocol,
|
|
33
|
+
llm_service: LLMService | None = None,
|
|
34
|
+
):
|
|
35
|
+
"""
|
|
36
|
+
Initialize the multimodal ingestor.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
storage: Memory storage manager for persistence
|
|
40
|
+
backend: Memory backend protocol for creating records
|
|
41
|
+
llm_service: LLM service for image description
|
|
42
|
+
"""
|
|
43
|
+
self._storage = storage
|
|
44
|
+
self._backend = backend
|
|
45
|
+
self._llm_service = llm_service
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def llm_service(self) -> LLMService | None:
|
|
49
|
+
"""Get the LLM service for image description."""
|
|
50
|
+
return self._llm_service
|
|
51
|
+
|
|
52
|
+
@llm_service.setter
|
|
53
|
+
def llm_service(self, service: LLMService | None) -> None:
|
|
54
|
+
"""Set the LLM service for image description."""
|
|
55
|
+
self._llm_service = service
|
|
56
|
+
|
|
57
|
+
async def remember_with_image(
|
|
58
|
+
self,
|
|
59
|
+
image_path: str,
|
|
60
|
+
context: str | None = None,
|
|
61
|
+
memory_type: str = "fact",
|
|
62
|
+
importance: float = 0.5,
|
|
63
|
+
project_id: str | None = None,
|
|
64
|
+
source_type: str = "user",
|
|
65
|
+
source_session_id: str | None = None,
|
|
66
|
+
tags: list[str] | None = None,
|
|
67
|
+
) -> Memory:
|
|
68
|
+
"""
|
|
69
|
+
Store a memory with an image attachment.
|
|
70
|
+
|
|
71
|
+
Uses the configured LLM provider to generate a description of the image,
|
|
72
|
+
then stores the memory with the description as content and the image
|
|
73
|
+
as a media attachment.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
image_path: Path to the image file
|
|
77
|
+
context: Optional context to guide the image description
|
|
78
|
+
memory_type: Type of memory (fact, preference, etc)
|
|
79
|
+
importance: 0.0-1.0 importance score
|
|
80
|
+
project_id: Optional project context
|
|
81
|
+
source_type: Origin of memory
|
|
82
|
+
source_session_id: Origin session
|
|
83
|
+
tags: Optional tags
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
The created Memory object
|
|
87
|
+
|
|
88
|
+
Raises:
|
|
89
|
+
ValueError: If LLM service is not configured or image not found
|
|
90
|
+
"""
|
|
91
|
+
path = Path(image_path)
|
|
92
|
+
if not path.exists():
|
|
93
|
+
raise ValueError(f"Image not found: {image_path}")
|
|
94
|
+
|
|
95
|
+
# Get LLM provider for image description
|
|
96
|
+
if not self._llm_service:
|
|
97
|
+
raise ValueError(
|
|
98
|
+
"LLM service not configured. Pass llm_service to MemoryManager "
|
|
99
|
+
"to enable remember_with_image."
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
provider = self._llm_service.get_default_provider()
|
|
103
|
+
|
|
104
|
+
# Generate image description
|
|
105
|
+
description = await provider.describe_image(image_path, context=context)
|
|
106
|
+
|
|
107
|
+
# Determine MIME type
|
|
108
|
+
mime_type, _ = mimetypes.guess_type(str(path))
|
|
109
|
+
if not mime_type:
|
|
110
|
+
mime_type = "application/octet-stream"
|
|
111
|
+
|
|
112
|
+
# Create media attachment
|
|
113
|
+
media = MediaAttachment(
|
|
114
|
+
media_type="image",
|
|
115
|
+
content_path=str(path.absolute()),
|
|
116
|
+
mime_type=mime_type,
|
|
117
|
+
description=description,
|
|
118
|
+
description_model=provider.provider_name,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
# Store memory with media attachment via backend
|
|
122
|
+
record = await self._backend.create(
|
|
123
|
+
content=description,
|
|
124
|
+
memory_type=memory_type,
|
|
125
|
+
importance=importance,
|
|
126
|
+
project_id=project_id,
|
|
127
|
+
source_type=source_type,
|
|
128
|
+
source_session_id=source_session_id,
|
|
129
|
+
tags=tags,
|
|
130
|
+
media=[media],
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
# Return as Memory object for backward compatibility
|
|
134
|
+
# Note: The backend returns MemoryRecord, but we need Memory
|
|
135
|
+
memory = self._storage.get_memory(record.id)
|
|
136
|
+
if memory is not None:
|
|
137
|
+
return memory
|
|
138
|
+
|
|
139
|
+
# Fallback: construct Memory from MemoryRecord if storage lookup fails
|
|
140
|
+
# This can happen with synthetic records from failed backend calls
|
|
141
|
+
return Memory(
|
|
142
|
+
id=record.id,
|
|
143
|
+
content=record.content,
|
|
144
|
+
memory_type=record.memory_type,
|
|
145
|
+
created_at=record.created_at.isoformat(),
|
|
146
|
+
updated_at=record.updated_at.isoformat()
|
|
147
|
+
if record.updated_at
|
|
148
|
+
else record.created_at.isoformat(),
|
|
149
|
+
project_id=record.project_id,
|
|
150
|
+
source_type=record.source_type,
|
|
151
|
+
source_session_id=record.source_session_id,
|
|
152
|
+
importance=record.importance,
|
|
153
|
+
tags=record.tags,
|
|
154
|
+
)
|
|
155
|
+
|
|
156
|
+
async def remember_screenshot(
|
|
157
|
+
self,
|
|
158
|
+
screenshot_bytes: bytes,
|
|
159
|
+
context: str | None = None,
|
|
160
|
+
memory_type: str = "observation",
|
|
161
|
+
importance: float = 0.5,
|
|
162
|
+
project_id: str | None = None,
|
|
163
|
+
source_type: str = "user",
|
|
164
|
+
source_session_id: str | None = None,
|
|
165
|
+
tags: list[str] | None = None,
|
|
166
|
+
) -> Memory:
|
|
167
|
+
"""
|
|
168
|
+
Store a memory from raw screenshot bytes.
|
|
169
|
+
|
|
170
|
+
Saves the screenshot to .gobby/resources/ with a timestamp-based filename,
|
|
171
|
+
then delegates to remember_with_image() for LLM description and storage.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
screenshot_bytes: Raw PNG screenshot bytes (from Playwright/Puppeteer)
|
|
175
|
+
context: Optional context to guide the image description
|
|
176
|
+
memory_type: Type of memory (default: "observation")
|
|
177
|
+
importance: 0.0-1.0 importance score
|
|
178
|
+
project_id: Optional project context
|
|
179
|
+
source_type: Origin of memory
|
|
180
|
+
source_session_id: Origin session
|
|
181
|
+
tags: Optional tags
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
The created Memory object
|
|
185
|
+
|
|
186
|
+
Raises:
|
|
187
|
+
ValueError: If LLM service is not configured or screenshot bytes are empty
|
|
188
|
+
"""
|
|
189
|
+
if not screenshot_bytes:
|
|
190
|
+
raise ValueError("Screenshot bytes cannot be empty")
|
|
191
|
+
|
|
192
|
+
# Determine resources directory using centralized utility
|
|
193
|
+
from datetime import datetime as dt
|
|
194
|
+
|
|
195
|
+
from gobby.cli.utils import get_resources_dir
|
|
196
|
+
from gobby.utils.project_context import get_project_context
|
|
197
|
+
|
|
198
|
+
ctx = get_project_context()
|
|
199
|
+
project_path = ctx.get("path") if ctx else None
|
|
200
|
+
resources_dir = get_resources_dir(project_path)
|
|
201
|
+
|
|
202
|
+
# Generate timestamp-based filename
|
|
203
|
+
timestamp = dt.now().strftime("%Y%m%d_%H%M%S_%f")
|
|
204
|
+
filename = f"screenshot_{timestamp}.png"
|
|
205
|
+
filepath = resources_dir / filename
|
|
206
|
+
|
|
207
|
+
# Write screenshot to file
|
|
208
|
+
filepath.write_bytes(screenshot_bytes)
|
|
209
|
+
logger.debug(f"Saved screenshot to {filepath}")
|
|
210
|
+
|
|
211
|
+
# Delegate to remember_with_image
|
|
212
|
+
return await self.remember_with_image(
|
|
213
|
+
image_path=str(filepath),
|
|
214
|
+
context=context,
|
|
215
|
+
memory_type=memory_type,
|
|
216
|
+
importance=importance,
|
|
217
|
+
project_id=project_id,
|
|
218
|
+
source_type=source_type,
|
|
219
|
+
source_session_id=source_session_id,
|
|
220
|
+
tags=tags,
|
|
221
|
+
)
|