gobby 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gobby/__init__.py +3 -0
- gobby/adapters/__init__.py +30 -0
- gobby/adapters/base.py +93 -0
- gobby/adapters/claude_code.py +276 -0
- gobby/adapters/codex.py +1292 -0
- gobby/adapters/gemini.py +343 -0
- gobby/agents/__init__.py +37 -0
- gobby/agents/codex_session.py +120 -0
- gobby/agents/constants.py +112 -0
- gobby/agents/context.py +362 -0
- gobby/agents/definitions.py +133 -0
- gobby/agents/gemini_session.py +111 -0
- gobby/agents/registry.py +618 -0
- gobby/agents/runner.py +968 -0
- gobby/agents/session.py +259 -0
- gobby/agents/spawn.py +916 -0
- gobby/agents/spawners/__init__.py +77 -0
- gobby/agents/spawners/base.py +142 -0
- gobby/agents/spawners/cross_platform.py +266 -0
- gobby/agents/spawners/embedded.py +225 -0
- gobby/agents/spawners/headless.py +226 -0
- gobby/agents/spawners/linux.py +125 -0
- gobby/agents/spawners/macos.py +277 -0
- gobby/agents/spawners/windows.py +308 -0
- gobby/agents/tty_config.py +319 -0
- gobby/autonomous/__init__.py +32 -0
- gobby/autonomous/progress_tracker.py +447 -0
- gobby/autonomous/stop_registry.py +269 -0
- gobby/autonomous/stuck_detector.py +383 -0
- gobby/cli/__init__.py +67 -0
- gobby/cli/__main__.py +8 -0
- gobby/cli/agents.py +529 -0
- gobby/cli/artifacts.py +266 -0
- gobby/cli/daemon.py +329 -0
- gobby/cli/extensions.py +526 -0
- gobby/cli/github.py +263 -0
- gobby/cli/init.py +53 -0
- gobby/cli/install.py +614 -0
- gobby/cli/installers/__init__.py +37 -0
- gobby/cli/installers/antigravity.py +65 -0
- gobby/cli/installers/claude.py +363 -0
- gobby/cli/installers/codex.py +192 -0
- gobby/cli/installers/gemini.py +294 -0
- gobby/cli/installers/git_hooks.py +377 -0
- gobby/cli/installers/shared.py +737 -0
- gobby/cli/linear.py +250 -0
- gobby/cli/mcp.py +30 -0
- gobby/cli/mcp_proxy.py +698 -0
- gobby/cli/memory.py +304 -0
- gobby/cli/merge.py +384 -0
- gobby/cli/projects.py +79 -0
- gobby/cli/sessions.py +622 -0
- gobby/cli/tasks/__init__.py +30 -0
- gobby/cli/tasks/_utils.py +658 -0
- gobby/cli/tasks/ai.py +1025 -0
- gobby/cli/tasks/commits.py +169 -0
- gobby/cli/tasks/crud.py +685 -0
- gobby/cli/tasks/deps.py +135 -0
- gobby/cli/tasks/labels.py +63 -0
- gobby/cli/tasks/main.py +273 -0
- gobby/cli/tasks/search.py +178 -0
- gobby/cli/tui.py +34 -0
- gobby/cli/utils.py +513 -0
- gobby/cli/workflows.py +927 -0
- gobby/cli/worktrees.py +481 -0
- gobby/config/__init__.py +129 -0
- gobby/config/app.py +551 -0
- gobby/config/extensions.py +167 -0
- gobby/config/features.py +472 -0
- gobby/config/llm_providers.py +98 -0
- gobby/config/logging.py +66 -0
- gobby/config/mcp.py +346 -0
- gobby/config/persistence.py +247 -0
- gobby/config/servers.py +141 -0
- gobby/config/sessions.py +250 -0
- gobby/config/tasks.py +784 -0
- gobby/hooks/__init__.py +104 -0
- gobby/hooks/artifact_capture.py +213 -0
- gobby/hooks/broadcaster.py +243 -0
- gobby/hooks/event_handlers.py +723 -0
- gobby/hooks/events.py +218 -0
- gobby/hooks/git.py +169 -0
- gobby/hooks/health_monitor.py +171 -0
- gobby/hooks/hook_manager.py +856 -0
- gobby/hooks/hook_types.py +575 -0
- gobby/hooks/plugins.py +813 -0
- gobby/hooks/session_coordinator.py +396 -0
- gobby/hooks/verification_runner.py +268 -0
- gobby/hooks/webhooks.py +339 -0
- gobby/install/claude/commands/gobby/bug.md +51 -0
- gobby/install/claude/commands/gobby/chore.md +51 -0
- gobby/install/claude/commands/gobby/epic.md +52 -0
- gobby/install/claude/commands/gobby/eval.md +235 -0
- gobby/install/claude/commands/gobby/feat.md +49 -0
- gobby/install/claude/commands/gobby/nit.md +52 -0
- gobby/install/claude/commands/gobby/ref.md +52 -0
- gobby/install/claude/hooks/HOOK_SCHEMAS.md +632 -0
- gobby/install/claude/hooks/hook_dispatcher.py +364 -0
- gobby/install/claude/hooks/validate_settings.py +102 -0
- gobby/install/claude/hooks-template.json +118 -0
- gobby/install/codex/hooks/hook_dispatcher.py +153 -0
- gobby/install/codex/prompts/forget.md +7 -0
- gobby/install/codex/prompts/memories.md +7 -0
- gobby/install/codex/prompts/recall.md +7 -0
- gobby/install/codex/prompts/remember.md +13 -0
- gobby/install/gemini/hooks/hook_dispatcher.py +268 -0
- gobby/install/gemini/hooks-template.json +138 -0
- gobby/install/shared/plugins/code_guardian.py +456 -0
- gobby/install/shared/plugins/example_notify.py +331 -0
- gobby/integrations/__init__.py +10 -0
- gobby/integrations/github.py +145 -0
- gobby/integrations/linear.py +145 -0
- gobby/llm/__init__.py +40 -0
- gobby/llm/base.py +120 -0
- gobby/llm/claude.py +578 -0
- gobby/llm/claude_executor.py +503 -0
- gobby/llm/codex.py +322 -0
- gobby/llm/codex_executor.py +513 -0
- gobby/llm/executor.py +316 -0
- gobby/llm/factory.py +34 -0
- gobby/llm/gemini.py +258 -0
- gobby/llm/gemini_executor.py +339 -0
- gobby/llm/litellm.py +287 -0
- gobby/llm/litellm_executor.py +303 -0
- gobby/llm/resolver.py +499 -0
- gobby/llm/service.py +236 -0
- gobby/mcp_proxy/__init__.py +29 -0
- gobby/mcp_proxy/actions.py +175 -0
- gobby/mcp_proxy/daemon_control.py +198 -0
- gobby/mcp_proxy/importer.py +436 -0
- gobby/mcp_proxy/lazy.py +325 -0
- gobby/mcp_proxy/manager.py +798 -0
- gobby/mcp_proxy/metrics.py +609 -0
- gobby/mcp_proxy/models.py +139 -0
- gobby/mcp_proxy/registries.py +215 -0
- gobby/mcp_proxy/schema_hash.py +381 -0
- gobby/mcp_proxy/semantic_search.py +706 -0
- gobby/mcp_proxy/server.py +549 -0
- gobby/mcp_proxy/services/__init__.py +0 -0
- gobby/mcp_proxy/services/fallback.py +306 -0
- gobby/mcp_proxy/services/recommendation.py +224 -0
- gobby/mcp_proxy/services/server_mgmt.py +214 -0
- gobby/mcp_proxy/services/system.py +72 -0
- gobby/mcp_proxy/services/tool_filter.py +231 -0
- gobby/mcp_proxy/services/tool_proxy.py +309 -0
- gobby/mcp_proxy/stdio.py +565 -0
- gobby/mcp_proxy/tools/__init__.py +27 -0
- gobby/mcp_proxy/tools/agents.py +1103 -0
- gobby/mcp_proxy/tools/artifacts.py +207 -0
- gobby/mcp_proxy/tools/hub.py +335 -0
- gobby/mcp_proxy/tools/internal.py +337 -0
- gobby/mcp_proxy/tools/memory.py +543 -0
- gobby/mcp_proxy/tools/merge.py +422 -0
- gobby/mcp_proxy/tools/metrics.py +283 -0
- gobby/mcp_proxy/tools/orchestration/__init__.py +23 -0
- gobby/mcp_proxy/tools/orchestration/cleanup.py +619 -0
- gobby/mcp_proxy/tools/orchestration/monitor.py +380 -0
- gobby/mcp_proxy/tools/orchestration/orchestrate.py +746 -0
- gobby/mcp_proxy/tools/orchestration/review.py +736 -0
- gobby/mcp_proxy/tools/orchestration/utils.py +16 -0
- gobby/mcp_proxy/tools/session_messages.py +1056 -0
- gobby/mcp_proxy/tools/task_dependencies.py +219 -0
- gobby/mcp_proxy/tools/task_expansion.py +591 -0
- gobby/mcp_proxy/tools/task_github.py +393 -0
- gobby/mcp_proxy/tools/task_linear.py +379 -0
- gobby/mcp_proxy/tools/task_orchestration.py +77 -0
- gobby/mcp_proxy/tools/task_readiness.py +522 -0
- gobby/mcp_proxy/tools/task_sync.py +351 -0
- gobby/mcp_proxy/tools/task_validation.py +843 -0
- gobby/mcp_proxy/tools/tasks/__init__.py +25 -0
- gobby/mcp_proxy/tools/tasks/_context.py +112 -0
- gobby/mcp_proxy/tools/tasks/_crud.py +516 -0
- gobby/mcp_proxy/tools/tasks/_factory.py +176 -0
- gobby/mcp_proxy/tools/tasks/_helpers.py +129 -0
- gobby/mcp_proxy/tools/tasks/_lifecycle.py +517 -0
- gobby/mcp_proxy/tools/tasks/_lifecycle_validation.py +301 -0
- gobby/mcp_proxy/tools/tasks/_resolution.py +55 -0
- gobby/mcp_proxy/tools/tasks/_search.py +215 -0
- gobby/mcp_proxy/tools/tasks/_session.py +125 -0
- gobby/mcp_proxy/tools/workflows.py +973 -0
- gobby/mcp_proxy/tools/worktrees.py +1264 -0
- gobby/mcp_proxy/transports/__init__.py +0 -0
- gobby/mcp_proxy/transports/base.py +95 -0
- gobby/mcp_proxy/transports/factory.py +44 -0
- gobby/mcp_proxy/transports/http.py +139 -0
- gobby/mcp_proxy/transports/stdio.py +213 -0
- gobby/mcp_proxy/transports/websocket.py +136 -0
- gobby/memory/backends/__init__.py +116 -0
- gobby/memory/backends/mem0.py +408 -0
- gobby/memory/backends/memu.py +485 -0
- gobby/memory/backends/null.py +111 -0
- gobby/memory/backends/openmemory.py +537 -0
- gobby/memory/backends/sqlite.py +304 -0
- gobby/memory/context.py +87 -0
- gobby/memory/manager.py +1001 -0
- gobby/memory/protocol.py +451 -0
- gobby/memory/search/__init__.py +66 -0
- gobby/memory/search/text.py +127 -0
- gobby/memory/viz.py +258 -0
- gobby/prompts/__init__.py +13 -0
- gobby/prompts/defaults/expansion/system.md +119 -0
- gobby/prompts/defaults/expansion/user.md +48 -0
- gobby/prompts/defaults/external_validation/agent.md +72 -0
- gobby/prompts/defaults/external_validation/external.md +63 -0
- gobby/prompts/defaults/external_validation/spawn.md +83 -0
- gobby/prompts/defaults/external_validation/system.md +6 -0
- gobby/prompts/defaults/features/import_mcp.md +22 -0
- gobby/prompts/defaults/features/import_mcp_github.md +17 -0
- gobby/prompts/defaults/features/import_mcp_search.md +16 -0
- gobby/prompts/defaults/features/recommend_tools.md +32 -0
- gobby/prompts/defaults/features/recommend_tools_hybrid.md +35 -0
- gobby/prompts/defaults/features/recommend_tools_llm.md +30 -0
- gobby/prompts/defaults/features/server_description.md +20 -0
- gobby/prompts/defaults/features/server_description_system.md +6 -0
- gobby/prompts/defaults/features/task_description.md +31 -0
- gobby/prompts/defaults/features/task_description_system.md +6 -0
- gobby/prompts/defaults/features/tool_summary.md +17 -0
- gobby/prompts/defaults/features/tool_summary_system.md +6 -0
- gobby/prompts/defaults/research/step.md +58 -0
- gobby/prompts/defaults/validation/criteria.md +47 -0
- gobby/prompts/defaults/validation/validate.md +38 -0
- gobby/prompts/loader.py +346 -0
- gobby/prompts/models.py +113 -0
- gobby/py.typed +0 -0
- gobby/runner.py +488 -0
- gobby/search/__init__.py +23 -0
- gobby/search/protocol.py +104 -0
- gobby/search/tfidf.py +232 -0
- gobby/servers/__init__.py +7 -0
- gobby/servers/http.py +636 -0
- gobby/servers/models.py +31 -0
- gobby/servers/routes/__init__.py +23 -0
- gobby/servers/routes/admin.py +416 -0
- gobby/servers/routes/dependencies.py +118 -0
- gobby/servers/routes/mcp/__init__.py +24 -0
- gobby/servers/routes/mcp/hooks.py +135 -0
- gobby/servers/routes/mcp/plugins.py +121 -0
- gobby/servers/routes/mcp/tools.py +1337 -0
- gobby/servers/routes/mcp/webhooks.py +159 -0
- gobby/servers/routes/sessions.py +582 -0
- gobby/servers/websocket.py +766 -0
- gobby/sessions/__init__.py +13 -0
- gobby/sessions/analyzer.py +322 -0
- gobby/sessions/lifecycle.py +240 -0
- gobby/sessions/manager.py +563 -0
- gobby/sessions/processor.py +225 -0
- gobby/sessions/summary.py +532 -0
- gobby/sessions/transcripts/__init__.py +41 -0
- gobby/sessions/transcripts/base.py +125 -0
- gobby/sessions/transcripts/claude.py +386 -0
- gobby/sessions/transcripts/codex.py +143 -0
- gobby/sessions/transcripts/gemini.py +195 -0
- gobby/storage/__init__.py +21 -0
- gobby/storage/agents.py +409 -0
- gobby/storage/artifact_classifier.py +341 -0
- gobby/storage/artifacts.py +285 -0
- gobby/storage/compaction.py +67 -0
- gobby/storage/database.py +357 -0
- gobby/storage/inter_session_messages.py +194 -0
- gobby/storage/mcp.py +680 -0
- gobby/storage/memories.py +562 -0
- gobby/storage/merge_resolutions.py +550 -0
- gobby/storage/migrations.py +860 -0
- gobby/storage/migrations_legacy.py +1359 -0
- gobby/storage/projects.py +166 -0
- gobby/storage/session_messages.py +251 -0
- gobby/storage/session_tasks.py +97 -0
- gobby/storage/sessions.py +817 -0
- gobby/storage/task_dependencies.py +223 -0
- gobby/storage/tasks/__init__.py +42 -0
- gobby/storage/tasks/_aggregates.py +180 -0
- gobby/storage/tasks/_crud.py +449 -0
- gobby/storage/tasks/_id.py +104 -0
- gobby/storage/tasks/_lifecycle.py +311 -0
- gobby/storage/tasks/_manager.py +889 -0
- gobby/storage/tasks/_models.py +300 -0
- gobby/storage/tasks/_ordering.py +119 -0
- gobby/storage/tasks/_path_cache.py +110 -0
- gobby/storage/tasks/_queries.py +343 -0
- gobby/storage/tasks/_search.py +143 -0
- gobby/storage/workflow_audit.py +393 -0
- gobby/storage/worktrees.py +547 -0
- gobby/sync/__init__.py +29 -0
- gobby/sync/github.py +333 -0
- gobby/sync/linear.py +304 -0
- gobby/sync/memories.py +284 -0
- gobby/sync/tasks.py +641 -0
- gobby/tasks/__init__.py +8 -0
- gobby/tasks/build_verification.py +193 -0
- gobby/tasks/commits.py +633 -0
- gobby/tasks/context.py +747 -0
- gobby/tasks/criteria.py +342 -0
- gobby/tasks/enhanced_validator.py +226 -0
- gobby/tasks/escalation.py +263 -0
- gobby/tasks/expansion.py +626 -0
- gobby/tasks/external_validator.py +764 -0
- gobby/tasks/issue_extraction.py +171 -0
- gobby/tasks/prompts/expand.py +327 -0
- gobby/tasks/research.py +421 -0
- gobby/tasks/tdd.py +352 -0
- gobby/tasks/tree_builder.py +263 -0
- gobby/tasks/validation.py +712 -0
- gobby/tasks/validation_history.py +357 -0
- gobby/tasks/validation_models.py +89 -0
- gobby/tools/__init__.py +0 -0
- gobby/tools/summarizer.py +170 -0
- gobby/tui/__init__.py +5 -0
- gobby/tui/api_client.py +281 -0
- gobby/tui/app.py +327 -0
- gobby/tui/screens/__init__.py +25 -0
- gobby/tui/screens/agents.py +333 -0
- gobby/tui/screens/chat.py +450 -0
- gobby/tui/screens/dashboard.py +377 -0
- gobby/tui/screens/memory.py +305 -0
- gobby/tui/screens/metrics.py +231 -0
- gobby/tui/screens/orchestrator.py +904 -0
- gobby/tui/screens/sessions.py +412 -0
- gobby/tui/screens/tasks.py +442 -0
- gobby/tui/screens/workflows.py +289 -0
- gobby/tui/screens/worktrees.py +174 -0
- gobby/tui/widgets/__init__.py +21 -0
- gobby/tui/widgets/chat.py +210 -0
- gobby/tui/widgets/conductor.py +104 -0
- gobby/tui/widgets/menu.py +132 -0
- gobby/tui/widgets/message_panel.py +160 -0
- gobby/tui/widgets/review_gate.py +224 -0
- gobby/tui/widgets/task_tree.py +99 -0
- gobby/tui/widgets/token_budget.py +166 -0
- gobby/tui/ws_client.py +258 -0
- gobby/utils/__init__.py +3 -0
- gobby/utils/daemon_client.py +235 -0
- gobby/utils/git.py +222 -0
- gobby/utils/id.py +38 -0
- gobby/utils/json_helpers.py +161 -0
- gobby/utils/logging.py +376 -0
- gobby/utils/machine_id.py +135 -0
- gobby/utils/metrics.py +589 -0
- gobby/utils/project_context.py +182 -0
- gobby/utils/project_init.py +263 -0
- gobby/utils/status.py +256 -0
- gobby/utils/validation.py +80 -0
- gobby/utils/version.py +23 -0
- gobby/workflows/__init__.py +4 -0
- gobby/workflows/actions.py +1310 -0
- gobby/workflows/approval_flow.py +138 -0
- gobby/workflows/artifact_actions.py +103 -0
- gobby/workflows/audit_helpers.py +110 -0
- gobby/workflows/autonomous_actions.py +286 -0
- gobby/workflows/context_actions.py +394 -0
- gobby/workflows/definitions.py +130 -0
- gobby/workflows/detection_helpers.py +208 -0
- gobby/workflows/engine.py +485 -0
- gobby/workflows/evaluator.py +669 -0
- gobby/workflows/git_utils.py +96 -0
- gobby/workflows/hooks.py +169 -0
- gobby/workflows/lifecycle_evaluator.py +613 -0
- gobby/workflows/llm_actions.py +70 -0
- gobby/workflows/loader.py +333 -0
- gobby/workflows/mcp_actions.py +60 -0
- gobby/workflows/memory_actions.py +272 -0
- gobby/workflows/premature_stop.py +164 -0
- gobby/workflows/session_actions.py +139 -0
- gobby/workflows/state_actions.py +123 -0
- gobby/workflows/state_manager.py +104 -0
- gobby/workflows/stop_signal_actions.py +163 -0
- gobby/workflows/summary_actions.py +344 -0
- gobby/workflows/task_actions.py +249 -0
- gobby/workflows/task_enforcement_actions.py +901 -0
- gobby/workflows/templates.py +52 -0
- gobby/workflows/todo_actions.py +84 -0
- gobby/workflows/webhook.py +223 -0
- gobby/workflows/webhook_executor.py +399 -0
- gobby/worktrees/__init__.py +5 -0
- gobby/worktrees/git.py +690 -0
- gobby/worktrees/merge/__init__.py +20 -0
- gobby/worktrees/merge/conflict_parser.py +177 -0
- gobby/worktrees/merge/resolver.py +485 -0
- gobby-0.2.5.dist-info/METADATA +351 -0
- gobby-0.2.5.dist-info/RECORD +383 -0
- gobby-0.2.5.dist-info/WHEEL +5 -0
- gobby-0.2.5.dist-info/entry_points.txt +2 -0
- gobby-0.2.5.dist-info/licenses/LICENSE.md +193 -0
- gobby-0.2.5.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,706 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Semantic tool search using embeddings.
|
|
3
|
+
|
|
4
|
+
Provides infrastructure for embedding-based tool discovery:
|
|
5
|
+
- Tool embedding storage and retrieval
|
|
6
|
+
- Cosine similarity search
|
|
7
|
+
- Integration with OpenAI text-embedding-3-small model
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import hashlib
|
|
11
|
+
import logging
|
|
12
|
+
import math
|
|
13
|
+
import struct
|
|
14
|
+
from dataclasses import dataclass
|
|
15
|
+
from datetime import UTC, datetime
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
from gobby.storage.database import DatabaseProtocol
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
# Default embedding model
|
|
23
|
+
DEFAULT_EMBEDDING_MODEL = "text-embedding-3-small"
|
|
24
|
+
DEFAULT_EMBEDDING_DIM = 1536
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _cosine_similarity(vec1: list[float], vec2: list[float]) -> float:
|
|
28
|
+
"""
|
|
29
|
+
Compute cosine similarity between two vectors.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
vec1: First vector
|
|
33
|
+
vec2: Second vector
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Cosine similarity score between -1 and 1
|
|
37
|
+
"""
|
|
38
|
+
if len(vec1) != len(vec2):
|
|
39
|
+
raise ValueError(f"Vector dimension mismatch: {len(vec1)} vs {len(vec2)}")
|
|
40
|
+
|
|
41
|
+
dot_product = sum(a * b for a, b in zip(vec1, vec2, strict=True))
|
|
42
|
+
norm1 = math.sqrt(sum(a * a for a in vec1))
|
|
43
|
+
norm2 = math.sqrt(sum(b * b for b in vec2))
|
|
44
|
+
|
|
45
|
+
if norm1 == 0 or norm2 == 0:
|
|
46
|
+
return 0.0
|
|
47
|
+
|
|
48
|
+
return dot_product / (norm1 * norm2)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class SearchResult:
|
|
53
|
+
"""Represents a tool search result with similarity score."""
|
|
54
|
+
|
|
55
|
+
tool_id: str
|
|
56
|
+
server_name: str
|
|
57
|
+
tool_name: str
|
|
58
|
+
description: str | None
|
|
59
|
+
similarity: float
|
|
60
|
+
embedding_id: int
|
|
61
|
+
|
|
62
|
+
def to_dict(self) -> dict[str, Any]:
|
|
63
|
+
"""Convert to dictionary."""
|
|
64
|
+
return {
|
|
65
|
+
"tool_id": self.tool_id,
|
|
66
|
+
"server_name": self.server_name,
|
|
67
|
+
"tool_name": self.tool_name,
|
|
68
|
+
"description": self.description,
|
|
69
|
+
"similarity": round(self.similarity, 4),
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@dataclass
|
|
74
|
+
class ToolEmbedding:
|
|
75
|
+
"""Represents a tool's embedding vector with metadata."""
|
|
76
|
+
|
|
77
|
+
id: int
|
|
78
|
+
tool_id: str
|
|
79
|
+
server_name: str
|
|
80
|
+
project_id: str
|
|
81
|
+
embedding: list[float]
|
|
82
|
+
embedding_model: str
|
|
83
|
+
embedding_dim: int
|
|
84
|
+
text_hash: str
|
|
85
|
+
created_at: str
|
|
86
|
+
updated_at: str
|
|
87
|
+
|
|
88
|
+
@classmethod
|
|
89
|
+
def from_row(cls, row: Any) -> "ToolEmbedding":
|
|
90
|
+
"""Create ToolEmbedding from database row."""
|
|
91
|
+
# Decode embedding from BLOB
|
|
92
|
+
embedding_blob = row["embedding"]
|
|
93
|
+
embedding = list(struct.unpack(f"{row['embedding_dim']}f", embedding_blob))
|
|
94
|
+
|
|
95
|
+
return cls(
|
|
96
|
+
id=row["id"],
|
|
97
|
+
tool_id=row["tool_id"],
|
|
98
|
+
server_name=row["server_name"],
|
|
99
|
+
project_id=row["project_id"],
|
|
100
|
+
embedding=embedding,
|
|
101
|
+
embedding_model=row["embedding_model"],
|
|
102
|
+
embedding_dim=row["embedding_dim"],
|
|
103
|
+
text_hash=row["text_hash"],
|
|
104
|
+
created_at=row["created_at"],
|
|
105
|
+
updated_at=row["updated_at"],
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
def to_dict(self) -> dict[str, Any]:
|
|
109
|
+
"""Convert to dictionary (excludes embedding for serialization)."""
|
|
110
|
+
return {
|
|
111
|
+
"id": self.id,
|
|
112
|
+
"tool_id": self.tool_id,
|
|
113
|
+
"server_name": self.server_name,
|
|
114
|
+
"project_id": self.project_id,
|
|
115
|
+
"embedding_model": self.embedding_model,
|
|
116
|
+
"embedding_dim": self.embedding_dim,
|
|
117
|
+
"text_hash": self.text_hash,
|
|
118
|
+
"created_at": self.created_at,
|
|
119
|
+
"updated_at": self.updated_at,
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _embedding_to_blob(embedding: list[float]) -> bytes:
|
|
124
|
+
"""Convert embedding list to binary BLOB."""
|
|
125
|
+
return struct.pack(f"{len(embedding)}f", *embedding)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _compute_text_hash(text: str) -> str:
|
|
129
|
+
"""Compute SHA-256 hash of text for change detection."""
|
|
130
|
+
return hashlib.sha256(text.encode("utf-8")).hexdigest()[:16]
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def _build_tool_text(
|
|
134
|
+
name: str, description: str | None, input_schema: dict[str, Any] | None
|
|
135
|
+
) -> str:
|
|
136
|
+
"""
|
|
137
|
+
Build text representation of a tool for embedding.
|
|
138
|
+
|
|
139
|
+
Combines name, description, and parameter info into a single string
|
|
140
|
+
that captures the tool's semantic meaning.
|
|
141
|
+
"""
|
|
142
|
+
parts = [f"Tool: {name}"]
|
|
143
|
+
|
|
144
|
+
if description:
|
|
145
|
+
parts.append(f"Description: {description}")
|
|
146
|
+
|
|
147
|
+
if input_schema:
|
|
148
|
+
# Extract parameter names and descriptions
|
|
149
|
+
properties = input_schema.get("properties", {})
|
|
150
|
+
if properties:
|
|
151
|
+
param_parts = []
|
|
152
|
+
for param_name, param_def in properties.items():
|
|
153
|
+
param_desc = param_def.get("description", "")
|
|
154
|
+
param_type = param_def.get("type", "any")
|
|
155
|
+
if param_desc:
|
|
156
|
+
param_parts.append(f"{param_name} ({param_type}): {param_desc}")
|
|
157
|
+
else:
|
|
158
|
+
param_parts.append(f"{param_name} ({param_type})")
|
|
159
|
+
if param_parts:
|
|
160
|
+
parts.append("Parameters: " + ", ".join(param_parts))
|
|
161
|
+
|
|
162
|
+
return "\n".join(parts)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class SemanticToolSearch:
|
|
166
|
+
"""
|
|
167
|
+
Manages semantic search over MCP tools using embeddings.
|
|
168
|
+
|
|
169
|
+
Provides:
|
|
170
|
+
- Embedding storage and retrieval (tool_embeddings table)
|
|
171
|
+
- Text hashing for change detection
|
|
172
|
+
- Cosine similarity search (to be implemented)
|
|
173
|
+
- Integration with embedding providers (to be implemented)
|
|
174
|
+
"""
|
|
175
|
+
|
|
176
|
+
def __init__(
|
|
177
|
+
self,
|
|
178
|
+
db: DatabaseProtocol,
|
|
179
|
+
embedding_model: str = DEFAULT_EMBEDDING_MODEL,
|
|
180
|
+
embedding_dim: int = DEFAULT_EMBEDDING_DIM,
|
|
181
|
+
openai_api_key: str | None = None,
|
|
182
|
+
):
|
|
183
|
+
"""
|
|
184
|
+
Initialize semantic search manager.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
db: Database connection
|
|
188
|
+
embedding_model: Model name for embeddings (default: text-embedding-3-small)
|
|
189
|
+
embedding_dim: Dimension of embedding vectors (default: 1536)
|
|
190
|
+
openai_api_key: OpenAI API key (from config or environment)
|
|
191
|
+
"""
|
|
192
|
+
self.db = db
|
|
193
|
+
self.embedding_model = embedding_model
|
|
194
|
+
self.embedding_dim = embedding_dim
|
|
195
|
+
self._openai_api_key = openai_api_key
|
|
196
|
+
|
|
197
|
+
def store_embedding(
|
|
198
|
+
self,
|
|
199
|
+
tool_id: str,
|
|
200
|
+
server_name: str,
|
|
201
|
+
project_id: str,
|
|
202
|
+
embedding: list[float],
|
|
203
|
+
text_hash: str,
|
|
204
|
+
) -> ToolEmbedding:
|
|
205
|
+
"""
|
|
206
|
+
Store or update a tool embedding.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
tool_id: ID of the tool in the tools table
|
|
210
|
+
server_name: Name of the MCP server
|
|
211
|
+
project_id: Project ID
|
|
212
|
+
embedding: Embedding vector as list of floats
|
|
213
|
+
text_hash: Hash of the text used to generate the embedding
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
ToolEmbedding instance
|
|
217
|
+
"""
|
|
218
|
+
now = datetime.now(UTC).isoformat()
|
|
219
|
+
embedding_blob = _embedding_to_blob(embedding)
|
|
220
|
+
|
|
221
|
+
self.db.execute(
|
|
222
|
+
"""
|
|
223
|
+
INSERT INTO tool_embeddings (
|
|
224
|
+
tool_id, server_name, project_id, embedding,
|
|
225
|
+
embedding_model, embedding_dim, text_hash, created_at, updated_at
|
|
226
|
+
)
|
|
227
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
228
|
+
ON CONFLICT(tool_id) DO UPDATE SET
|
|
229
|
+
server_name = excluded.server_name,
|
|
230
|
+
project_id = excluded.project_id,
|
|
231
|
+
embedding = excluded.embedding,
|
|
232
|
+
embedding_model = excluded.embedding_model,
|
|
233
|
+
embedding_dim = excluded.embedding_dim,
|
|
234
|
+
text_hash = excluded.text_hash,
|
|
235
|
+
updated_at = excluded.updated_at
|
|
236
|
+
""",
|
|
237
|
+
(
|
|
238
|
+
tool_id,
|
|
239
|
+
server_name,
|
|
240
|
+
project_id,
|
|
241
|
+
embedding_blob,
|
|
242
|
+
self.embedding_model,
|
|
243
|
+
len(embedding),
|
|
244
|
+
text_hash,
|
|
245
|
+
now,
|
|
246
|
+
now,
|
|
247
|
+
),
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
result = self.get_embedding(tool_id)
|
|
251
|
+
if result is None:
|
|
252
|
+
raise RuntimeError(f"Failed to retrieve embedding for tool {tool_id} after store")
|
|
253
|
+
return result
|
|
254
|
+
|
|
255
|
+
def get_embedding(self, tool_id: str) -> ToolEmbedding | None:
|
|
256
|
+
"""
|
|
257
|
+
Get embedding for a tool.
|
|
258
|
+
|
|
259
|
+
Args:
|
|
260
|
+
tool_id: Tool ID
|
|
261
|
+
|
|
262
|
+
Returns:
|
|
263
|
+
ToolEmbedding or None if not found
|
|
264
|
+
"""
|
|
265
|
+
row = self.db.fetchone(
|
|
266
|
+
"SELECT * FROM tool_embeddings WHERE tool_id = ?",
|
|
267
|
+
(tool_id,),
|
|
268
|
+
)
|
|
269
|
+
return ToolEmbedding.from_row(row) if row else None
|
|
270
|
+
|
|
271
|
+
def get_embeddings_for_project(self, project_id: str) -> list[ToolEmbedding]:
|
|
272
|
+
"""
|
|
273
|
+
Get all embeddings for a project.
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
project_id: Project ID
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
List of ToolEmbedding instances
|
|
280
|
+
"""
|
|
281
|
+
rows = self.db.fetchall(
|
|
282
|
+
"SELECT * FROM tool_embeddings WHERE project_id = ?",
|
|
283
|
+
(project_id,),
|
|
284
|
+
)
|
|
285
|
+
return [ToolEmbedding.from_row(row) for row in rows]
|
|
286
|
+
|
|
287
|
+
def get_embeddings_for_server(self, server_name: str, project_id: str) -> list[ToolEmbedding]:
|
|
288
|
+
"""
|
|
289
|
+
Get all embeddings for a server in a project.
|
|
290
|
+
|
|
291
|
+
Args:
|
|
292
|
+
server_name: Server name
|
|
293
|
+
project_id: Project ID
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
List of ToolEmbedding instances
|
|
297
|
+
"""
|
|
298
|
+
rows = self.db.fetchall(
|
|
299
|
+
"SELECT * FROM tool_embeddings WHERE server_name = ? AND project_id = ?",
|
|
300
|
+
(server_name, project_id),
|
|
301
|
+
)
|
|
302
|
+
return [ToolEmbedding.from_row(row) for row in rows]
|
|
303
|
+
|
|
304
|
+
def delete_embedding(self, tool_id: str) -> bool:
|
|
305
|
+
"""
|
|
306
|
+
Delete embedding for a tool.
|
|
307
|
+
|
|
308
|
+
Args:
|
|
309
|
+
tool_id: Tool ID
|
|
310
|
+
|
|
311
|
+
Returns:
|
|
312
|
+
True if deleted, False if not found
|
|
313
|
+
"""
|
|
314
|
+
cursor = self.db.execute(
|
|
315
|
+
"DELETE FROM tool_embeddings WHERE tool_id = ?",
|
|
316
|
+
(tool_id,),
|
|
317
|
+
)
|
|
318
|
+
return cursor.rowcount > 0
|
|
319
|
+
|
|
320
|
+
def delete_embeddings_for_server(self, server_name: str, project_id: str) -> int:
|
|
321
|
+
"""
|
|
322
|
+
Delete all embeddings for a server.
|
|
323
|
+
|
|
324
|
+
Args:
|
|
325
|
+
server_name: Server name
|
|
326
|
+
project_id: Project ID
|
|
327
|
+
|
|
328
|
+
Returns:
|
|
329
|
+
Number of embeddings deleted
|
|
330
|
+
"""
|
|
331
|
+
cursor = self.db.execute(
|
|
332
|
+
"DELETE FROM tool_embeddings WHERE server_name = ? AND project_id = ?",
|
|
333
|
+
(server_name, project_id),
|
|
334
|
+
)
|
|
335
|
+
return cursor.rowcount
|
|
336
|
+
|
|
337
|
+
def needs_reembedding(
|
|
338
|
+
self,
|
|
339
|
+
tool_id: str,
|
|
340
|
+
name: str,
|
|
341
|
+
description: str | None,
|
|
342
|
+
input_schema: dict[str, Any] | None,
|
|
343
|
+
) -> bool:
|
|
344
|
+
"""
|
|
345
|
+
Check if a tool needs (re)embedding.
|
|
346
|
+
|
|
347
|
+
Computes hash of the tool's text representation and compares
|
|
348
|
+
to stored hash.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
tool_id: Tool ID
|
|
352
|
+
name: Tool name
|
|
353
|
+
description: Tool description
|
|
354
|
+
input_schema: Tool input schema
|
|
355
|
+
|
|
356
|
+
Returns:
|
|
357
|
+
True if embedding is missing or outdated
|
|
358
|
+
"""
|
|
359
|
+
existing = self.get_embedding(tool_id)
|
|
360
|
+
if not existing:
|
|
361
|
+
return True
|
|
362
|
+
|
|
363
|
+
current_hash = _compute_text_hash(_build_tool_text(name, description, input_schema))
|
|
364
|
+
return existing.text_hash != current_hash
|
|
365
|
+
|
|
366
|
+
def get_embedding_stats(self, project_id: str | None = None) -> dict[str, Any]:
|
|
367
|
+
"""
|
|
368
|
+
Get statistics about stored embeddings.
|
|
369
|
+
|
|
370
|
+
Args:
|
|
371
|
+
project_id: Optional project filter
|
|
372
|
+
|
|
373
|
+
Returns:
|
|
374
|
+
Dict with count, servers, and model info
|
|
375
|
+
"""
|
|
376
|
+
if project_id:
|
|
377
|
+
count_row = self.db.fetchone(
|
|
378
|
+
"SELECT COUNT(*) as count FROM tool_embeddings WHERE project_id = ?",
|
|
379
|
+
(project_id,),
|
|
380
|
+
)
|
|
381
|
+
servers_rows = self.db.fetchall(
|
|
382
|
+
"""
|
|
383
|
+
SELECT server_name, COUNT(*) as count
|
|
384
|
+
FROM tool_embeddings
|
|
385
|
+
WHERE project_id = ?
|
|
386
|
+
GROUP BY server_name
|
|
387
|
+
""",
|
|
388
|
+
(project_id,),
|
|
389
|
+
)
|
|
390
|
+
else:
|
|
391
|
+
count_row = self.db.fetchone("SELECT COUNT(*) as count FROM tool_embeddings", ())
|
|
392
|
+
servers_rows = self.db.fetchall(
|
|
393
|
+
"""
|
|
394
|
+
SELECT server_name, COUNT(*) as count
|
|
395
|
+
FROM tool_embeddings
|
|
396
|
+
GROUP BY server_name
|
|
397
|
+
""",
|
|
398
|
+
(),
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
return {
|
|
402
|
+
"total_embeddings": count_row["count"] if count_row else 0,
|
|
403
|
+
"by_server": {row["server_name"]: row["count"] for row in servers_rows},
|
|
404
|
+
"embedding_model": self.embedding_model,
|
|
405
|
+
"embedding_dim": self.embedding_dim,
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
@staticmethod
|
|
409
|
+
def build_tool_text(
|
|
410
|
+
name: str, description: str | None, input_schema: dict[str, Any] | None
|
|
411
|
+
) -> str:
|
|
412
|
+
"""
|
|
413
|
+
Build text representation of a tool for embedding.
|
|
414
|
+
|
|
415
|
+
Public wrapper for the module-level function.
|
|
416
|
+
|
|
417
|
+
Args:
|
|
418
|
+
name: Tool name
|
|
419
|
+
description: Tool description
|
|
420
|
+
input_schema: Tool input schema
|
|
421
|
+
|
|
422
|
+
Returns:
|
|
423
|
+
Text suitable for embedding
|
|
424
|
+
"""
|
|
425
|
+
return _build_tool_text(name, description, input_schema)
|
|
426
|
+
|
|
427
|
+
@staticmethod
|
|
428
|
+
def compute_text_hash(text: str) -> str:
|
|
429
|
+
"""
|
|
430
|
+
Compute hash of text for change detection.
|
|
431
|
+
|
|
432
|
+
Public wrapper for the module-level function.
|
|
433
|
+
|
|
434
|
+
Args:
|
|
435
|
+
text: Text to hash
|
|
436
|
+
|
|
437
|
+
Returns:
|
|
438
|
+
16-character hex hash
|
|
439
|
+
"""
|
|
440
|
+
return _compute_text_hash(text)
|
|
441
|
+
|
|
442
|
+
async def embed_text(self, text: str) -> list[float]:
|
|
443
|
+
"""
|
|
444
|
+
Generate embedding for text using OpenAI.
|
|
445
|
+
|
|
446
|
+
Requires OPENAI_API_KEY in environment (set by LiteLLM provider from config).
|
|
447
|
+
|
|
448
|
+
Args:
|
|
449
|
+
text: Text to embed
|
|
450
|
+
|
|
451
|
+
Returns:
|
|
452
|
+
Embedding vector as list of floats (1536 dimensions)
|
|
453
|
+
|
|
454
|
+
Raises:
|
|
455
|
+
RuntimeError: If OPENAI_API_KEY not set or embedding fails
|
|
456
|
+
"""
|
|
457
|
+
import os
|
|
458
|
+
|
|
459
|
+
api_key = self._openai_api_key or os.environ.get("OPENAI_API_KEY")
|
|
460
|
+
if not api_key:
|
|
461
|
+
raise RuntimeError(
|
|
462
|
+
"OPENAI_API_KEY not configured. Add it to llm_providers.api_keys in config.yaml"
|
|
463
|
+
)
|
|
464
|
+
return await self._embed_text_litellm(text, api_key=api_key)
|
|
465
|
+
|
|
466
|
+
async def _embed_text_litellm(self, text: str, api_key: str) -> list[float]:
|
|
467
|
+
"""Generate embedding using LiteLLM (OpenAI API).
|
|
468
|
+
|
|
469
|
+
Args:
|
|
470
|
+
text: Text to embed
|
|
471
|
+
api_key: OpenAI API key (from Codex auth or environment)
|
|
472
|
+
|
|
473
|
+
Returns:
|
|
474
|
+
Embedding vector as list of floats
|
|
475
|
+
"""
|
|
476
|
+
try:
|
|
477
|
+
import litellm
|
|
478
|
+
except ImportError as e:
|
|
479
|
+
raise RuntimeError("litellm package not installed. Run: pip install litellm") from e
|
|
480
|
+
|
|
481
|
+
try:
|
|
482
|
+
response = await litellm.aembedding(
|
|
483
|
+
model=self.embedding_model,
|
|
484
|
+
input=[text],
|
|
485
|
+
api_key=api_key,
|
|
486
|
+
)
|
|
487
|
+
embedding: list[float] = response.data[0]["embedding"]
|
|
488
|
+
logger.debug(f"Generated embedding via LiteLLM with {len(embedding)} dimensions")
|
|
489
|
+
return embedding
|
|
490
|
+
except Exception as e:
|
|
491
|
+
logger.error(f"Failed to generate embedding with LiteLLM: {e}")
|
|
492
|
+
raise RuntimeError(f"Embedding generation failed: {e}") from e
|
|
493
|
+
|
|
494
|
+
async def embed_tool(
|
|
495
|
+
self,
|
|
496
|
+
tool_id: str,
|
|
497
|
+
name: str,
|
|
498
|
+
description: str | None,
|
|
499
|
+
input_schema: dict[str, Any] | None,
|
|
500
|
+
server_name: str,
|
|
501
|
+
project_id: str,
|
|
502
|
+
force: bool = False,
|
|
503
|
+
) -> ToolEmbedding | None:
|
|
504
|
+
"""
|
|
505
|
+
Generate and store embedding for a tool.
|
|
506
|
+
|
|
507
|
+
Checks if re-embedding is needed based on content hash.
|
|
508
|
+
|
|
509
|
+
Args:
|
|
510
|
+
tool_id: Tool ID
|
|
511
|
+
name: Tool name
|
|
512
|
+
description: Tool description
|
|
513
|
+
input_schema: Tool input schema
|
|
514
|
+
server_name: MCP server name
|
|
515
|
+
project_id: Project ID
|
|
516
|
+
force: Force re-embedding even if content unchanged
|
|
517
|
+
|
|
518
|
+
Returns:
|
|
519
|
+
ToolEmbedding if generated, None if skipped (already up-to-date)
|
|
520
|
+
"""
|
|
521
|
+
# Check if we need to generate embedding
|
|
522
|
+
if not force and not self.needs_reembedding(tool_id, name, description, input_schema):
|
|
523
|
+
logger.debug(f"Tool {name} embedding is up-to-date, skipping")
|
|
524
|
+
return None
|
|
525
|
+
|
|
526
|
+
# Build text and generate embedding
|
|
527
|
+
text = _build_tool_text(name, description, input_schema)
|
|
528
|
+
text_hash = _compute_text_hash(text)
|
|
529
|
+
|
|
530
|
+
embedding = await self.embed_text(text)
|
|
531
|
+
|
|
532
|
+
# Store embedding
|
|
533
|
+
return self.store_embedding(
|
|
534
|
+
tool_id=tool_id,
|
|
535
|
+
server_name=server_name,
|
|
536
|
+
project_id=project_id,
|
|
537
|
+
embedding=embedding,
|
|
538
|
+
text_hash=text_hash,
|
|
539
|
+
)
|
|
540
|
+
|
|
541
|
+
async def embed_all_tools(
|
|
542
|
+
self,
|
|
543
|
+
project_id: str,
|
|
544
|
+
mcp_manager: Any,
|
|
545
|
+
force: bool = False,
|
|
546
|
+
) -> dict[str, Any]:
|
|
547
|
+
"""
|
|
548
|
+
Generate embeddings for all tools in a project.
|
|
549
|
+
|
|
550
|
+
Iterates through all MCP servers and their tools, generating
|
|
551
|
+
embeddings for tools that need them.
|
|
552
|
+
|
|
553
|
+
Args:
|
|
554
|
+
project_id: Project ID
|
|
555
|
+
mcp_manager: LocalMCPManager instance for accessing tools
|
|
556
|
+
force: Force re-embedding all tools
|
|
557
|
+
|
|
558
|
+
Returns:
|
|
559
|
+
Dict with statistics: embedded, skipped, failed, by_server
|
|
560
|
+
"""
|
|
561
|
+
from gobby.storage.mcp import LocalMCPManager
|
|
562
|
+
|
|
563
|
+
if not isinstance(mcp_manager, LocalMCPManager):
|
|
564
|
+
raise TypeError("mcp_manager must be a LocalMCPManager instance")
|
|
565
|
+
|
|
566
|
+
stats: dict[str, Any] = {
|
|
567
|
+
"embedded": 0,
|
|
568
|
+
"skipped": 0,
|
|
569
|
+
"failed": 0,
|
|
570
|
+
"errors": [],
|
|
571
|
+
"by_server": {},
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
# Get all servers for the project
|
|
575
|
+
servers = mcp_manager.list_servers(project_id=project_id, enabled_only=False)
|
|
576
|
+
|
|
577
|
+
for server in servers:
|
|
578
|
+
server_stats = {"embedded": 0, "skipped": 0, "failed": 0}
|
|
579
|
+
|
|
580
|
+
# Get tools for this server
|
|
581
|
+
tools = mcp_manager.get_cached_tools(server.name, project_id=project_id)
|
|
582
|
+
|
|
583
|
+
for tool in tools:
|
|
584
|
+
try:
|
|
585
|
+
result = await self.embed_tool(
|
|
586
|
+
tool_id=tool.id,
|
|
587
|
+
name=tool.name,
|
|
588
|
+
description=tool.description,
|
|
589
|
+
input_schema=tool.input_schema,
|
|
590
|
+
server_name=server.name,
|
|
591
|
+
project_id=project_id,
|
|
592
|
+
force=force,
|
|
593
|
+
)
|
|
594
|
+
|
|
595
|
+
if result:
|
|
596
|
+
server_stats["embedded"] += 1
|
|
597
|
+
stats["embedded"] += 1
|
|
598
|
+
logger.info(f"Embedded tool: {server.name}/{tool.name}")
|
|
599
|
+
else:
|
|
600
|
+
server_stats["skipped"] += 1
|
|
601
|
+
stats["skipped"] += 1
|
|
602
|
+
|
|
603
|
+
except Exception as e:
|
|
604
|
+
server_stats["failed"] += 1
|
|
605
|
+
stats["failed"] += 1
|
|
606
|
+
error_msg = f"{server.name}/{tool.name}: {e}"
|
|
607
|
+
stats["errors"].append(error_msg)
|
|
608
|
+
logger.error(f"Failed to embed tool {error_msg}")
|
|
609
|
+
|
|
610
|
+
stats["by_server"][server.name] = server_stats
|
|
611
|
+
|
|
612
|
+
return stats
|
|
613
|
+
|
|
614
|
+
async def search_tools(
|
|
615
|
+
self,
|
|
616
|
+
query: str,
|
|
617
|
+
project_id: str,
|
|
618
|
+
top_k: int = 10,
|
|
619
|
+
min_similarity: float = 0.0,
|
|
620
|
+
server_filter: str | None = None,
|
|
621
|
+
) -> list[SearchResult]:
|
|
622
|
+
"""
|
|
623
|
+
Search for tools semantically similar to a query.
|
|
624
|
+
|
|
625
|
+
Embeds the query and computes cosine similarity against all
|
|
626
|
+
stored tool embeddings, returning ranked results.
|
|
627
|
+
|
|
628
|
+
Args:
|
|
629
|
+
query: Search query text
|
|
630
|
+
project_id: Project ID to search within
|
|
631
|
+
top_k: Maximum number of results to return
|
|
632
|
+
min_similarity: Minimum similarity threshold (0.0 to 1.0)
|
|
633
|
+
server_filter: Optional server name to filter results
|
|
634
|
+
|
|
635
|
+
Returns:
|
|
636
|
+
List of SearchResult sorted by similarity (descending)
|
|
637
|
+
"""
|
|
638
|
+
# Embed the query
|
|
639
|
+
query_embedding = await self.embed_text(query)
|
|
640
|
+
|
|
641
|
+
# Get all embeddings for the project
|
|
642
|
+
if server_filter:
|
|
643
|
+
embeddings = self.get_embeddings_for_server(server_filter, project_id)
|
|
644
|
+
else:
|
|
645
|
+
embeddings = self.get_embeddings_for_project(project_id)
|
|
646
|
+
|
|
647
|
+
if not embeddings:
|
|
648
|
+
logger.debug(f"No embeddings found for project {project_id}")
|
|
649
|
+
return []
|
|
650
|
+
|
|
651
|
+
# Get tool metadata for results
|
|
652
|
+
tool_info = self._get_tool_info_map(project_id, server_filter)
|
|
653
|
+
|
|
654
|
+
# Compute similarities
|
|
655
|
+
results: list[SearchResult] = []
|
|
656
|
+
for emb in embeddings:
|
|
657
|
+
similarity = _cosine_similarity(query_embedding, emb.embedding)
|
|
658
|
+
|
|
659
|
+
if similarity >= min_similarity:
|
|
660
|
+
tool_data = tool_info.get(emb.tool_id, {})
|
|
661
|
+
results.append(
|
|
662
|
+
SearchResult(
|
|
663
|
+
tool_id=emb.tool_id,
|
|
664
|
+
server_name=emb.server_name,
|
|
665
|
+
tool_name=tool_data.get("name", "unknown"),
|
|
666
|
+
description=tool_data.get("description"),
|
|
667
|
+
similarity=similarity,
|
|
668
|
+
embedding_id=emb.id,
|
|
669
|
+
)
|
|
670
|
+
)
|
|
671
|
+
|
|
672
|
+
# Sort by similarity descending and limit
|
|
673
|
+
results.sort(key=lambda x: x.similarity, reverse=True)
|
|
674
|
+
return results[:top_k]
|
|
675
|
+
|
|
676
|
+
def _get_tool_info_map(
|
|
677
|
+
self, project_id: str, server_filter: str | None = None
|
|
678
|
+
) -> dict[str, dict[str, Any]]:
|
|
679
|
+
"""
|
|
680
|
+
Get tool metadata map for search results.
|
|
681
|
+
|
|
682
|
+
Args:
|
|
683
|
+
project_id: Project ID
|
|
684
|
+
server_filter: Optional server name filter
|
|
685
|
+
|
|
686
|
+
Returns:
|
|
687
|
+
Dict mapping tool_id to {name, description}
|
|
688
|
+
"""
|
|
689
|
+
if server_filter:
|
|
690
|
+
query = """
|
|
691
|
+
SELECT t.id, t.name, t.description
|
|
692
|
+
FROM tools t
|
|
693
|
+
JOIN mcp_servers s ON t.mcp_server_id = s.id
|
|
694
|
+
WHERE s.project_id = ? AND s.name = ?
|
|
695
|
+
"""
|
|
696
|
+
rows = self.db.fetchall(query, (project_id, server_filter))
|
|
697
|
+
else:
|
|
698
|
+
query = """
|
|
699
|
+
SELECT t.id, t.name, t.description
|
|
700
|
+
FROM tools t
|
|
701
|
+
JOIN mcp_servers s ON t.mcp_server_id = s.id
|
|
702
|
+
WHERE s.project_id = ?
|
|
703
|
+
"""
|
|
704
|
+
rows = self.db.fetchall(query, (project_id,))
|
|
705
|
+
|
|
706
|
+
return {row["id"]: {"name": row["name"], "description": row["description"]} for row in rows}
|