gobby 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (383) hide show
  1. gobby/__init__.py +3 -0
  2. gobby/adapters/__init__.py +30 -0
  3. gobby/adapters/base.py +93 -0
  4. gobby/adapters/claude_code.py +276 -0
  5. gobby/adapters/codex.py +1292 -0
  6. gobby/adapters/gemini.py +343 -0
  7. gobby/agents/__init__.py +37 -0
  8. gobby/agents/codex_session.py +120 -0
  9. gobby/agents/constants.py +112 -0
  10. gobby/agents/context.py +362 -0
  11. gobby/agents/definitions.py +133 -0
  12. gobby/agents/gemini_session.py +111 -0
  13. gobby/agents/registry.py +618 -0
  14. gobby/agents/runner.py +968 -0
  15. gobby/agents/session.py +259 -0
  16. gobby/agents/spawn.py +916 -0
  17. gobby/agents/spawners/__init__.py +77 -0
  18. gobby/agents/spawners/base.py +142 -0
  19. gobby/agents/spawners/cross_platform.py +266 -0
  20. gobby/agents/spawners/embedded.py +225 -0
  21. gobby/agents/spawners/headless.py +226 -0
  22. gobby/agents/spawners/linux.py +125 -0
  23. gobby/agents/spawners/macos.py +277 -0
  24. gobby/agents/spawners/windows.py +308 -0
  25. gobby/agents/tty_config.py +319 -0
  26. gobby/autonomous/__init__.py +32 -0
  27. gobby/autonomous/progress_tracker.py +447 -0
  28. gobby/autonomous/stop_registry.py +269 -0
  29. gobby/autonomous/stuck_detector.py +383 -0
  30. gobby/cli/__init__.py +67 -0
  31. gobby/cli/__main__.py +8 -0
  32. gobby/cli/agents.py +529 -0
  33. gobby/cli/artifacts.py +266 -0
  34. gobby/cli/daemon.py +329 -0
  35. gobby/cli/extensions.py +526 -0
  36. gobby/cli/github.py +263 -0
  37. gobby/cli/init.py +53 -0
  38. gobby/cli/install.py +614 -0
  39. gobby/cli/installers/__init__.py +37 -0
  40. gobby/cli/installers/antigravity.py +65 -0
  41. gobby/cli/installers/claude.py +363 -0
  42. gobby/cli/installers/codex.py +192 -0
  43. gobby/cli/installers/gemini.py +294 -0
  44. gobby/cli/installers/git_hooks.py +377 -0
  45. gobby/cli/installers/shared.py +737 -0
  46. gobby/cli/linear.py +250 -0
  47. gobby/cli/mcp.py +30 -0
  48. gobby/cli/mcp_proxy.py +698 -0
  49. gobby/cli/memory.py +304 -0
  50. gobby/cli/merge.py +384 -0
  51. gobby/cli/projects.py +79 -0
  52. gobby/cli/sessions.py +622 -0
  53. gobby/cli/tasks/__init__.py +30 -0
  54. gobby/cli/tasks/_utils.py +658 -0
  55. gobby/cli/tasks/ai.py +1025 -0
  56. gobby/cli/tasks/commits.py +169 -0
  57. gobby/cli/tasks/crud.py +685 -0
  58. gobby/cli/tasks/deps.py +135 -0
  59. gobby/cli/tasks/labels.py +63 -0
  60. gobby/cli/tasks/main.py +273 -0
  61. gobby/cli/tasks/search.py +178 -0
  62. gobby/cli/tui.py +34 -0
  63. gobby/cli/utils.py +513 -0
  64. gobby/cli/workflows.py +927 -0
  65. gobby/cli/worktrees.py +481 -0
  66. gobby/config/__init__.py +129 -0
  67. gobby/config/app.py +551 -0
  68. gobby/config/extensions.py +167 -0
  69. gobby/config/features.py +472 -0
  70. gobby/config/llm_providers.py +98 -0
  71. gobby/config/logging.py +66 -0
  72. gobby/config/mcp.py +346 -0
  73. gobby/config/persistence.py +247 -0
  74. gobby/config/servers.py +141 -0
  75. gobby/config/sessions.py +250 -0
  76. gobby/config/tasks.py +784 -0
  77. gobby/hooks/__init__.py +104 -0
  78. gobby/hooks/artifact_capture.py +213 -0
  79. gobby/hooks/broadcaster.py +243 -0
  80. gobby/hooks/event_handlers.py +723 -0
  81. gobby/hooks/events.py +218 -0
  82. gobby/hooks/git.py +169 -0
  83. gobby/hooks/health_monitor.py +171 -0
  84. gobby/hooks/hook_manager.py +856 -0
  85. gobby/hooks/hook_types.py +575 -0
  86. gobby/hooks/plugins.py +813 -0
  87. gobby/hooks/session_coordinator.py +396 -0
  88. gobby/hooks/verification_runner.py +268 -0
  89. gobby/hooks/webhooks.py +339 -0
  90. gobby/install/claude/commands/gobby/bug.md +51 -0
  91. gobby/install/claude/commands/gobby/chore.md +51 -0
  92. gobby/install/claude/commands/gobby/epic.md +52 -0
  93. gobby/install/claude/commands/gobby/eval.md +235 -0
  94. gobby/install/claude/commands/gobby/feat.md +49 -0
  95. gobby/install/claude/commands/gobby/nit.md +52 -0
  96. gobby/install/claude/commands/gobby/ref.md +52 -0
  97. gobby/install/claude/hooks/HOOK_SCHEMAS.md +632 -0
  98. gobby/install/claude/hooks/hook_dispatcher.py +364 -0
  99. gobby/install/claude/hooks/validate_settings.py +102 -0
  100. gobby/install/claude/hooks-template.json +118 -0
  101. gobby/install/codex/hooks/hook_dispatcher.py +153 -0
  102. gobby/install/codex/prompts/forget.md +7 -0
  103. gobby/install/codex/prompts/memories.md +7 -0
  104. gobby/install/codex/prompts/recall.md +7 -0
  105. gobby/install/codex/prompts/remember.md +13 -0
  106. gobby/install/gemini/hooks/hook_dispatcher.py +268 -0
  107. gobby/install/gemini/hooks-template.json +138 -0
  108. gobby/install/shared/plugins/code_guardian.py +456 -0
  109. gobby/install/shared/plugins/example_notify.py +331 -0
  110. gobby/integrations/__init__.py +10 -0
  111. gobby/integrations/github.py +145 -0
  112. gobby/integrations/linear.py +145 -0
  113. gobby/llm/__init__.py +40 -0
  114. gobby/llm/base.py +120 -0
  115. gobby/llm/claude.py +578 -0
  116. gobby/llm/claude_executor.py +503 -0
  117. gobby/llm/codex.py +322 -0
  118. gobby/llm/codex_executor.py +513 -0
  119. gobby/llm/executor.py +316 -0
  120. gobby/llm/factory.py +34 -0
  121. gobby/llm/gemini.py +258 -0
  122. gobby/llm/gemini_executor.py +339 -0
  123. gobby/llm/litellm.py +287 -0
  124. gobby/llm/litellm_executor.py +303 -0
  125. gobby/llm/resolver.py +499 -0
  126. gobby/llm/service.py +236 -0
  127. gobby/mcp_proxy/__init__.py +29 -0
  128. gobby/mcp_proxy/actions.py +175 -0
  129. gobby/mcp_proxy/daemon_control.py +198 -0
  130. gobby/mcp_proxy/importer.py +436 -0
  131. gobby/mcp_proxy/lazy.py +325 -0
  132. gobby/mcp_proxy/manager.py +798 -0
  133. gobby/mcp_proxy/metrics.py +609 -0
  134. gobby/mcp_proxy/models.py +139 -0
  135. gobby/mcp_proxy/registries.py +215 -0
  136. gobby/mcp_proxy/schema_hash.py +381 -0
  137. gobby/mcp_proxy/semantic_search.py +706 -0
  138. gobby/mcp_proxy/server.py +549 -0
  139. gobby/mcp_proxy/services/__init__.py +0 -0
  140. gobby/mcp_proxy/services/fallback.py +306 -0
  141. gobby/mcp_proxy/services/recommendation.py +224 -0
  142. gobby/mcp_proxy/services/server_mgmt.py +214 -0
  143. gobby/mcp_proxy/services/system.py +72 -0
  144. gobby/mcp_proxy/services/tool_filter.py +231 -0
  145. gobby/mcp_proxy/services/tool_proxy.py +309 -0
  146. gobby/mcp_proxy/stdio.py +565 -0
  147. gobby/mcp_proxy/tools/__init__.py +27 -0
  148. gobby/mcp_proxy/tools/agents.py +1103 -0
  149. gobby/mcp_proxy/tools/artifacts.py +207 -0
  150. gobby/mcp_proxy/tools/hub.py +335 -0
  151. gobby/mcp_proxy/tools/internal.py +337 -0
  152. gobby/mcp_proxy/tools/memory.py +543 -0
  153. gobby/mcp_proxy/tools/merge.py +422 -0
  154. gobby/mcp_proxy/tools/metrics.py +283 -0
  155. gobby/mcp_proxy/tools/orchestration/__init__.py +23 -0
  156. gobby/mcp_proxy/tools/orchestration/cleanup.py +619 -0
  157. gobby/mcp_proxy/tools/orchestration/monitor.py +380 -0
  158. gobby/mcp_proxy/tools/orchestration/orchestrate.py +746 -0
  159. gobby/mcp_proxy/tools/orchestration/review.py +736 -0
  160. gobby/mcp_proxy/tools/orchestration/utils.py +16 -0
  161. gobby/mcp_proxy/tools/session_messages.py +1056 -0
  162. gobby/mcp_proxy/tools/task_dependencies.py +219 -0
  163. gobby/mcp_proxy/tools/task_expansion.py +591 -0
  164. gobby/mcp_proxy/tools/task_github.py +393 -0
  165. gobby/mcp_proxy/tools/task_linear.py +379 -0
  166. gobby/mcp_proxy/tools/task_orchestration.py +77 -0
  167. gobby/mcp_proxy/tools/task_readiness.py +522 -0
  168. gobby/mcp_proxy/tools/task_sync.py +351 -0
  169. gobby/mcp_proxy/tools/task_validation.py +843 -0
  170. gobby/mcp_proxy/tools/tasks/__init__.py +25 -0
  171. gobby/mcp_proxy/tools/tasks/_context.py +112 -0
  172. gobby/mcp_proxy/tools/tasks/_crud.py +516 -0
  173. gobby/mcp_proxy/tools/tasks/_factory.py +176 -0
  174. gobby/mcp_proxy/tools/tasks/_helpers.py +129 -0
  175. gobby/mcp_proxy/tools/tasks/_lifecycle.py +517 -0
  176. gobby/mcp_proxy/tools/tasks/_lifecycle_validation.py +301 -0
  177. gobby/mcp_proxy/tools/tasks/_resolution.py +55 -0
  178. gobby/mcp_proxy/tools/tasks/_search.py +215 -0
  179. gobby/mcp_proxy/tools/tasks/_session.py +125 -0
  180. gobby/mcp_proxy/tools/workflows.py +973 -0
  181. gobby/mcp_proxy/tools/worktrees.py +1264 -0
  182. gobby/mcp_proxy/transports/__init__.py +0 -0
  183. gobby/mcp_proxy/transports/base.py +95 -0
  184. gobby/mcp_proxy/transports/factory.py +44 -0
  185. gobby/mcp_proxy/transports/http.py +139 -0
  186. gobby/mcp_proxy/transports/stdio.py +213 -0
  187. gobby/mcp_proxy/transports/websocket.py +136 -0
  188. gobby/memory/backends/__init__.py +116 -0
  189. gobby/memory/backends/mem0.py +408 -0
  190. gobby/memory/backends/memu.py +485 -0
  191. gobby/memory/backends/null.py +111 -0
  192. gobby/memory/backends/openmemory.py +537 -0
  193. gobby/memory/backends/sqlite.py +304 -0
  194. gobby/memory/context.py +87 -0
  195. gobby/memory/manager.py +1001 -0
  196. gobby/memory/protocol.py +451 -0
  197. gobby/memory/search/__init__.py +66 -0
  198. gobby/memory/search/text.py +127 -0
  199. gobby/memory/viz.py +258 -0
  200. gobby/prompts/__init__.py +13 -0
  201. gobby/prompts/defaults/expansion/system.md +119 -0
  202. gobby/prompts/defaults/expansion/user.md +48 -0
  203. gobby/prompts/defaults/external_validation/agent.md +72 -0
  204. gobby/prompts/defaults/external_validation/external.md +63 -0
  205. gobby/prompts/defaults/external_validation/spawn.md +83 -0
  206. gobby/prompts/defaults/external_validation/system.md +6 -0
  207. gobby/prompts/defaults/features/import_mcp.md +22 -0
  208. gobby/prompts/defaults/features/import_mcp_github.md +17 -0
  209. gobby/prompts/defaults/features/import_mcp_search.md +16 -0
  210. gobby/prompts/defaults/features/recommend_tools.md +32 -0
  211. gobby/prompts/defaults/features/recommend_tools_hybrid.md +35 -0
  212. gobby/prompts/defaults/features/recommend_tools_llm.md +30 -0
  213. gobby/prompts/defaults/features/server_description.md +20 -0
  214. gobby/prompts/defaults/features/server_description_system.md +6 -0
  215. gobby/prompts/defaults/features/task_description.md +31 -0
  216. gobby/prompts/defaults/features/task_description_system.md +6 -0
  217. gobby/prompts/defaults/features/tool_summary.md +17 -0
  218. gobby/prompts/defaults/features/tool_summary_system.md +6 -0
  219. gobby/prompts/defaults/research/step.md +58 -0
  220. gobby/prompts/defaults/validation/criteria.md +47 -0
  221. gobby/prompts/defaults/validation/validate.md +38 -0
  222. gobby/prompts/loader.py +346 -0
  223. gobby/prompts/models.py +113 -0
  224. gobby/py.typed +0 -0
  225. gobby/runner.py +488 -0
  226. gobby/search/__init__.py +23 -0
  227. gobby/search/protocol.py +104 -0
  228. gobby/search/tfidf.py +232 -0
  229. gobby/servers/__init__.py +7 -0
  230. gobby/servers/http.py +636 -0
  231. gobby/servers/models.py +31 -0
  232. gobby/servers/routes/__init__.py +23 -0
  233. gobby/servers/routes/admin.py +416 -0
  234. gobby/servers/routes/dependencies.py +118 -0
  235. gobby/servers/routes/mcp/__init__.py +24 -0
  236. gobby/servers/routes/mcp/hooks.py +135 -0
  237. gobby/servers/routes/mcp/plugins.py +121 -0
  238. gobby/servers/routes/mcp/tools.py +1337 -0
  239. gobby/servers/routes/mcp/webhooks.py +159 -0
  240. gobby/servers/routes/sessions.py +582 -0
  241. gobby/servers/websocket.py +766 -0
  242. gobby/sessions/__init__.py +13 -0
  243. gobby/sessions/analyzer.py +322 -0
  244. gobby/sessions/lifecycle.py +240 -0
  245. gobby/sessions/manager.py +563 -0
  246. gobby/sessions/processor.py +225 -0
  247. gobby/sessions/summary.py +532 -0
  248. gobby/sessions/transcripts/__init__.py +41 -0
  249. gobby/sessions/transcripts/base.py +125 -0
  250. gobby/sessions/transcripts/claude.py +386 -0
  251. gobby/sessions/transcripts/codex.py +143 -0
  252. gobby/sessions/transcripts/gemini.py +195 -0
  253. gobby/storage/__init__.py +21 -0
  254. gobby/storage/agents.py +409 -0
  255. gobby/storage/artifact_classifier.py +341 -0
  256. gobby/storage/artifacts.py +285 -0
  257. gobby/storage/compaction.py +67 -0
  258. gobby/storage/database.py +357 -0
  259. gobby/storage/inter_session_messages.py +194 -0
  260. gobby/storage/mcp.py +680 -0
  261. gobby/storage/memories.py +562 -0
  262. gobby/storage/merge_resolutions.py +550 -0
  263. gobby/storage/migrations.py +860 -0
  264. gobby/storage/migrations_legacy.py +1359 -0
  265. gobby/storage/projects.py +166 -0
  266. gobby/storage/session_messages.py +251 -0
  267. gobby/storage/session_tasks.py +97 -0
  268. gobby/storage/sessions.py +817 -0
  269. gobby/storage/task_dependencies.py +223 -0
  270. gobby/storage/tasks/__init__.py +42 -0
  271. gobby/storage/tasks/_aggregates.py +180 -0
  272. gobby/storage/tasks/_crud.py +449 -0
  273. gobby/storage/tasks/_id.py +104 -0
  274. gobby/storage/tasks/_lifecycle.py +311 -0
  275. gobby/storage/tasks/_manager.py +889 -0
  276. gobby/storage/tasks/_models.py +300 -0
  277. gobby/storage/tasks/_ordering.py +119 -0
  278. gobby/storage/tasks/_path_cache.py +110 -0
  279. gobby/storage/tasks/_queries.py +343 -0
  280. gobby/storage/tasks/_search.py +143 -0
  281. gobby/storage/workflow_audit.py +393 -0
  282. gobby/storage/worktrees.py +547 -0
  283. gobby/sync/__init__.py +29 -0
  284. gobby/sync/github.py +333 -0
  285. gobby/sync/linear.py +304 -0
  286. gobby/sync/memories.py +284 -0
  287. gobby/sync/tasks.py +641 -0
  288. gobby/tasks/__init__.py +8 -0
  289. gobby/tasks/build_verification.py +193 -0
  290. gobby/tasks/commits.py +633 -0
  291. gobby/tasks/context.py +747 -0
  292. gobby/tasks/criteria.py +342 -0
  293. gobby/tasks/enhanced_validator.py +226 -0
  294. gobby/tasks/escalation.py +263 -0
  295. gobby/tasks/expansion.py +626 -0
  296. gobby/tasks/external_validator.py +764 -0
  297. gobby/tasks/issue_extraction.py +171 -0
  298. gobby/tasks/prompts/expand.py +327 -0
  299. gobby/tasks/research.py +421 -0
  300. gobby/tasks/tdd.py +352 -0
  301. gobby/tasks/tree_builder.py +263 -0
  302. gobby/tasks/validation.py +712 -0
  303. gobby/tasks/validation_history.py +357 -0
  304. gobby/tasks/validation_models.py +89 -0
  305. gobby/tools/__init__.py +0 -0
  306. gobby/tools/summarizer.py +170 -0
  307. gobby/tui/__init__.py +5 -0
  308. gobby/tui/api_client.py +281 -0
  309. gobby/tui/app.py +327 -0
  310. gobby/tui/screens/__init__.py +25 -0
  311. gobby/tui/screens/agents.py +333 -0
  312. gobby/tui/screens/chat.py +450 -0
  313. gobby/tui/screens/dashboard.py +377 -0
  314. gobby/tui/screens/memory.py +305 -0
  315. gobby/tui/screens/metrics.py +231 -0
  316. gobby/tui/screens/orchestrator.py +904 -0
  317. gobby/tui/screens/sessions.py +412 -0
  318. gobby/tui/screens/tasks.py +442 -0
  319. gobby/tui/screens/workflows.py +289 -0
  320. gobby/tui/screens/worktrees.py +174 -0
  321. gobby/tui/widgets/__init__.py +21 -0
  322. gobby/tui/widgets/chat.py +210 -0
  323. gobby/tui/widgets/conductor.py +104 -0
  324. gobby/tui/widgets/menu.py +132 -0
  325. gobby/tui/widgets/message_panel.py +160 -0
  326. gobby/tui/widgets/review_gate.py +224 -0
  327. gobby/tui/widgets/task_tree.py +99 -0
  328. gobby/tui/widgets/token_budget.py +166 -0
  329. gobby/tui/ws_client.py +258 -0
  330. gobby/utils/__init__.py +3 -0
  331. gobby/utils/daemon_client.py +235 -0
  332. gobby/utils/git.py +222 -0
  333. gobby/utils/id.py +38 -0
  334. gobby/utils/json_helpers.py +161 -0
  335. gobby/utils/logging.py +376 -0
  336. gobby/utils/machine_id.py +135 -0
  337. gobby/utils/metrics.py +589 -0
  338. gobby/utils/project_context.py +182 -0
  339. gobby/utils/project_init.py +263 -0
  340. gobby/utils/status.py +256 -0
  341. gobby/utils/validation.py +80 -0
  342. gobby/utils/version.py +23 -0
  343. gobby/workflows/__init__.py +4 -0
  344. gobby/workflows/actions.py +1310 -0
  345. gobby/workflows/approval_flow.py +138 -0
  346. gobby/workflows/artifact_actions.py +103 -0
  347. gobby/workflows/audit_helpers.py +110 -0
  348. gobby/workflows/autonomous_actions.py +286 -0
  349. gobby/workflows/context_actions.py +394 -0
  350. gobby/workflows/definitions.py +130 -0
  351. gobby/workflows/detection_helpers.py +208 -0
  352. gobby/workflows/engine.py +485 -0
  353. gobby/workflows/evaluator.py +669 -0
  354. gobby/workflows/git_utils.py +96 -0
  355. gobby/workflows/hooks.py +169 -0
  356. gobby/workflows/lifecycle_evaluator.py +613 -0
  357. gobby/workflows/llm_actions.py +70 -0
  358. gobby/workflows/loader.py +333 -0
  359. gobby/workflows/mcp_actions.py +60 -0
  360. gobby/workflows/memory_actions.py +272 -0
  361. gobby/workflows/premature_stop.py +164 -0
  362. gobby/workflows/session_actions.py +139 -0
  363. gobby/workflows/state_actions.py +123 -0
  364. gobby/workflows/state_manager.py +104 -0
  365. gobby/workflows/stop_signal_actions.py +163 -0
  366. gobby/workflows/summary_actions.py +344 -0
  367. gobby/workflows/task_actions.py +249 -0
  368. gobby/workflows/task_enforcement_actions.py +901 -0
  369. gobby/workflows/templates.py +52 -0
  370. gobby/workflows/todo_actions.py +84 -0
  371. gobby/workflows/webhook.py +223 -0
  372. gobby/workflows/webhook_executor.py +399 -0
  373. gobby/worktrees/__init__.py +5 -0
  374. gobby/worktrees/git.py +690 -0
  375. gobby/worktrees/merge/__init__.py +20 -0
  376. gobby/worktrees/merge/conflict_parser.py +177 -0
  377. gobby/worktrees/merge/resolver.py +485 -0
  378. gobby-0.2.5.dist-info/METADATA +351 -0
  379. gobby-0.2.5.dist-info/RECORD +383 -0
  380. gobby-0.2.5.dist-info/WHEEL +5 -0
  381. gobby-0.2.5.dist-info/entry_points.txt +2 -0
  382. gobby-0.2.5.dist-info/licenses/LICENSE.md +193 -0
  383. gobby-0.2.5.dist-info/top_level.txt +1 -0
@@ -0,0 +1,706 @@
1
+ """
2
+ Semantic tool search using embeddings.
3
+
4
+ Provides infrastructure for embedding-based tool discovery:
5
+ - Tool embedding storage and retrieval
6
+ - Cosine similarity search
7
+ - Integration with OpenAI text-embedding-3-small model
8
+ """
9
+
10
+ import hashlib
11
+ import logging
12
+ import math
13
+ import struct
14
+ from dataclasses import dataclass
15
+ from datetime import UTC, datetime
16
+ from typing import Any
17
+
18
+ from gobby.storage.database import DatabaseProtocol
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ # Default embedding model
23
+ DEFAULT_EMBEDDING_MODEL = "text-embedding-3-small"
24
+ DEFAULT_EMBEDDING_DIM = 1536
25
+
26
+
27
+ def _cosine_similarity(vec1: list[float], vec2: list[float]) -> float:
28
+ """
29
+ Compute cosine similarity between two vectors.
30
+
31
+ Args:
32
+ vec1: First vector
33
+ vec2: Second vector
34
+
35
+ Returns:
36
+ Cosine similarity score between -1 and 1
37
+ """
38
+ if len(vec1) != len(vec2):
39
+ raise ValueError(f"Vector dimension mismatch: {len(vec1)} vs {len(vec2)}")
40
+
41
+ dot_product = sum(a * b for a, b in zip(vec1, vec2, strict=True))
42
+ norm1 = math.sqrt(sum(a * a for a in vec1))
43
+ norm2 = math.sqrt(sum(b * b for b in vec2))
44
+
45
+ if norm1 == 0 or norm2 == 0:
46
+ return 0.0
47
+
48
+ return dot_product / (norm1 * norm2)
49
+
50
+
51
+ @dataclass
52
+ class SearchResult:
53
+ """Represents a tool search result with similarity score."""
54
+
55
+ tool_id: str
56
+ server_name: str
57
+ tool_name: str
58
+ description: str | None
59
+ similarity: float
60
+ embedding_id: int
61
+
62
+ def to_dict(self) -> dict[str, Any]:
63
+ """Convert to dictionary."""
64
+ return {
65
+ "tool_id": self.tool_id,
66
+ "server_name": self.server_name,
67
+ "tool_name": self.tool_name,
68
+ "description": self.description,
69
+ "similarity": round(self.similarity, 4),
70
+ }
71
+
72
+
73
+ @dataclass
74
+ class ToolEmbedding:
75
+ """Represents a tool's embedding vector with metadata."""
76
+
77
+ id: int
78
+ tool_id: str
79
+ server_name: str
80
+ project_id: str
81
+ embedding: list[float]
82
+ embedding_model: str
83
+ embedding_dim: int
84
+ text_hash: str
85
+ created_at: str
86
+ updated_at: str
87
+
88
+ @classmethod
89
+ def from_row(cls, row: Any) -> "ToolEmbedding":
90
+ """Create ToolEmbedding from database row."""
91
+ # Decode embedding from BLOB
92
+ embedding_blob = row["embedding"]
93
+ embedding = list(struct.unpack(f"{row['embedding_dim']}f", embedding_blob))
94
+
95
+ return cls(
96
+ id=row["id"],
97
+ tool_id=row["tool_id"],
98
+ server_name=row["server_name"],
99
+ project_id=row["project_id"],
100
+ embedding=embedding,
101
+ embedding_model=row["embedding_model"],
102
+ embedding_dim=row["embedding_dim"],
103
+ text_hash=row["text_hash"],
104
+ created_at=row["created_at"],
105
+ updated_at=row["updated_at"],
106
+ )
107
+
108
+ def to_dict(self) -> dict[str, Any]:
109
+ """Convert to dictionary (excludes embedding for serialization)."""
110
+ return {
111
+ "id": self.id,
112
+ "tool_id": self.tool_id,
113
+ "server_name": self.server_name,
114
+ "project_id": self.project_id,
115
+ "embedding_model": self.embedding_model,
116
+ "embedding_dim": self.embedding_dim,
117
+ "text_hash": self.text_hash,
118
+ "created_at": self.created_at,
119
+ "updated_at": self.updated_at,
120
+ }
121
+
122
+
123
+ def _embedding_to_blob(embedding: list[float]) -> bytes:
124
+ """Convert embedding list to binary BLOB."""
125
+ return struct.pack(f"{len(embedding)}f", *embedding)
126
+
127
+
128
+ def _compute_text_hash(text: str) -> str:
129
+ """Compute SHA-256 hash of text for change detection."""
130
+ return hashlib.sha256(text.encode("utf-8")).hexdigest()[:16]
131
+
132
+
133
+ def _build_tool_text(
134
+ name: str, description: str | None, input_schema: dict[str, Any] | None
135
+ ) -> str:
136
+ """
137
+ Build text representation of a tool for embedding.
138
+
139
+ Combines name, description, and parameter info into a single string
140
+ that captures the tool's semantic meaning.
141
+ """
142
+ parts = [f"Tool: {name}"]
143
+
144
+ if description:
145
+ parts.append(f"Description: {description}")
146
+
147
+ if input_schema:
148
+ # Extract parameter names and descriptions
149
+ properties = input_schema.get("properties", {})
150
+ if properties:
151
+ param_parts = []
152
+ for param_name, param_def in properties.items():
153
+ param_desc = param_def.get("description", "")
154
+ param_type = param_def.get("type", "any")
155
+ if param_desc:
156
+ param_parts.append(f"{param_name} ({param_type}): {param_desc}")
157
+ else:
158
+ param_parts.append(f"{param_name} ({param_type})")
159
+ if param_parts:
160
+ parts.append("Parameters: " + ", ".join(param_parts))
161
+
162
+ return "\n".join(parts)
163
+
164
+
165
+ class SemanticToolSearch:
166
+ """
167
+ Manages semantic search over MCP tools using embeddings.
168
+
169
+ Provides:
170
+ - Embedding storage and retrieval (tool_embeddings table)
171
+ - Text hashing for change detection
172
+ - Cosine similarity search (to be implemented)
173
+ - Integration with embedding providers (to be implemented)
174
+ """
175
+
176
+ def __init__(
177
+ self,
178
+ db: DatabaseProtocol,
179
+ embedding_model: str = DEFAULT_EMBEDDING_MODEL,
180
+ embedding_dim: int = DEFAULT_EMBEDDING_DIM,
181
+ openai_api_key: str | None = None,
182
+ ):
183
+ """
184
+ Initialize semantic search manager.
185
+
186
+ Args:
187
+ db: Database connection
188
+ embedding_model: Model name for embeddings (default: text-embedding-3-small)
189
+ embedding_dim: Dimension of embedding vectors (default: 1536)
190
+ openai_api_key: OpenAI API key (from config or environment)
191
+ """
192
+ self.db = db
193
+ self.embedding_model = embedding_model
194
+ self.embedding_dim = embedding_dim
195
+ self._openai_api_key = openai_api_key
196
+
197
+ def store_embedding(
198
+ self,
199
+ tool_id: str,
200
+ server_name: str,
201
+ project_id: str,
202
+ embedding: list[float],
203
+ text_hash: str,
204
+ ) -> ToolEmbedding:
205
+ """
206
+ Store or update a tool embedding.
207
+
208
+ Args:
209
+ tool_id: ID of the tool in the tools table
210
+ server_name: Name of the MCP server
211
+ project_id: Project ID
212
+ embedding: Embedding vector as list of floats
213
+ text_hash: Hash of the text used to generate the embedding
214
+
215
+ Returns:
216
+ ToolEmbedding instance
217
+ """
218
+ now = datetime.now(UTC).isoformat()
219
+ embedding_blob = _embedding_to_blob(embedding)
220
+
221
+ self.db.execute(
222
+ """
223
+ INSERT INTO tool_embeddings (
224
+ tool_id, server_name, project_id, embedding,
225
+ embedding_model, embedding_dim, text_hash, created_at, updated_at
226
+ )
227
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
228
+ ON CONFLICT(tool_id) DO UPDATE SET
229
+ server_name = excluded.server_name,
230
+ project_id = excluded.project_id,
231
+ embedding = excluded.embedding,
232
+ embedding_model = excluded.embedding_model,
233
+ embedding_dim = excluded.embedding_dim,
234
+ text_hash = excluded.text_hash,
235
+ updated_at = excluded.updated_at
236
+ """,
237
+ (
238
+ tool_id,
239
+ server_name,
240
+ project_id,
241
+ embedding_blob,
242
+ self.embedding_model,
243
+ len(embedding),
244
+ text_hash,
245
+ now,
246
+ now,
247
+ ),
248
+ )
249
+
250
+ result = self.get_embedding(tool_id)
251
+ if result is None:
252
+ raise RuntimeError(f"Failed to retrieve embedding for tool {tool_id} after store")
253
+ return result
254
+
255
+ def get_embedding(self, tool_id: str) -> ToolEmbedding | None:
256
+ """
257
+ Get embedding for a tool.
258
+
259
+ Args:
260
+ tool_id: Tool ID
261
+
262
+ Returns:
263
+ ToolEmbedding or None if not found
264
+ """
265
+ row = self.db.fetchone(
266
+ "SELECT * FROM tool_embeddings WHERE tool_id = ?",
267
+ (tool_id,),
268
+ )
269
+ return ToolEmbedding.from_row(row) if row else None
270
+
271
+ def get_embeddings_for_project(self, project_id: str) -> list[ToolEmbedding]:
272
+ """
273
+ Get all embeddings for a project.
274
+
275
+ Args:
276
+ project_id: Project ID
277
+
278
+ Returns:
279
+ List of ToolEmbedding instances
280
+ """
281
+ rows = self.db.fetchall(
282
+ "SELECT * FROM tool_embeddings WHERE project_id = ?",
283
+ (project_id,),
284
+ )
285
+ return [ToolEmbedding.from_row(row) for row in rows]
286
+
287
+ def get_embeddings_for_server(self, server_name: str, project_id: str) -> list[ToolEmbedding]:
288
+ """
289
+ Get all embeddings for a server in a project.
290
+
291
+ Args:
292
+ server_name: Server name
293
+ project_id: Project ID
294
+
295
+ Returns:
296
+ List of ToolEmbedding instances
297
+ """
298
+ rows = self.db.fetchall(
299
+ "SELECT * FROM tool_embeddings WHERE server_name = ? AND project_id = ?",
300
+ (server_name, project_id),
301
+ )
302
+ return [ToolEmbedding.from_row(row) for row in rows]
303
+
304
+ def delete_embedding(self, tool_id: str) -> bool:
305
+ """
306
+ Delete embedding for a tool.
307
+
308
+ Args:
309
+ tool_id: Tool ID
310
+
311
+ Returns:
312
+ True if deleted, False if not found
313
+ """
314
+ cursor = self.db.execute(
315
+ "DELETE FROM tool_embeddings WHERE tool_id = ?",
316
+ (tool_id,),
317
+ )
318
+ return cursor.rowcount > 0
319
+
320
+ def delete_embeddings_for_server(self, server_name: str, project_id: str) -> int:
321
+ """
322
+ Delete all embeddings for a server.
323
+
324
+ Args:
325
+ server_name: Server name
326
+ project_id: Project ID
327
+
328
+ Returns:
329
+ Number of embeddings deleted
330
+ """
331
+ cursor = self.db.execute(
332
+ "DELETE FROM tool_embeddings WHERE server_name = ? AND project_id = ?",
333
+ (server_name, project_id),
334
+ )
335
+ return cursor.rowcount
336
+
337
+ def needs_reembedding(
338
+ self,
339
+ tool_id: str,
340
+ name: str,
341
+ description: str | None,
342
+ input_schema: dict[str, Any] | None,
343
+ ) -> bool:
344
+ """
345
+ Check if a tool needs (re)embedding.
346
+
347
+ Computes hash of the tool's text representation and compares
348
+ to stored hash.
349
+
350
+ Args:
351
+ tool_id: Tool ID
352
+ name: Tool name
353
+ description: Tool description
354
+ input_schema: Tool input schema
355
+
356
+ Returns:
357
+ True if embedding is missing or outdated
358
+ """
359
+ existing = self.get_embedding(tool_id)
360
+ if not existing:
361
+ return True
362
+
363
+ current_hash = _compute_text_hash(_build_tool_text(name, description, input_schema))
364
+ return existing.text_hash != current_hash
365
+
366
+ def get_embedding_stats(self, project_id: str | None = None) -> dict[str, Any]:
367
+ """
368
+ Get statistics about stored embeddings.
369
+
370
+ Args:
371
+ project_id: Optional project filter
372
+
373
+ Returns:
374
+ Dict with count, servers, and model info
375
+ """
376
+ if project_id:
377
+ count_row = self.db.fetchone(
378
+ "SELECT COUNT(*) as count FROM tool_embeddings WHERE project_id = ?",
379
+ (project_id,),
380
+ )
381
+ servers_rows = self.db.fetchall(
382
+ """
383
+ SELECT server_name, COUNT(*) as count
384
+ FROM tool_embeddings
385
+ WHERE project_id = ?
386
+ GROUP BY server_name
387
+ """,
388
+ (project_id,),
389
+ )
390
+ else:
391
+ count_row = self.db.fetchone("SELECT COUNT(*) as count FROM tool_embeddings", ())
392
+ servers_rows = self.db.fetchall(
393
+ """
394
+ SELECT server_name, COUNT(*) as count
395
+ FROM tool_embeddings
396
+ GROUP BY server_name
397
+ """,
398
+ (),
399
+ )
400
+
401
+ return {
402
+ "total_embeddings": count_row["count"] if count_row else 0,
403
+ "by_server": {row["server_name"]: row["count"] for row in servers_rows},
404
+ "embedding_model": self.embedding_model,
405
+ "embedding_dim": self.embedding_dim,
406
+ }
407
+
408
+ @staticmethod
409
+ def build_tool_text(
410
+ name: str, description: str | None, input_schema: dict[str, Any] | None
411
+ ) -> str:
412
+ """
413
+ Build text representation of a tool for embedding.
414
+
415
+ Public wrapper for the module-level function.
416
+
417
+ Args:
418
+ name: Tool name
419
+ description: Tool description
420
+ input_schema: Tool input schema
421
+
422
+ Returns:
423
+ Text suitable for embedding
424
+ """
425
+ return _build_tool_text(name, description, input_schema)
426
+
427
+ @staticmethod
428
+ def compute_text_hash(text: str) -> str:
429
+ """
430
+ Compute hash of text for change detection.
431
+
432
+ Public wrapper for the module-level function.
433
+
434
+ Args:
435
+ text: Text to hash
436
+
437
+ Returns:
438
+ 16-character hex hash
439
+ """
440
+ return _compute_text_hash(text)
441
+
442
+ async def embed_text(self, text: str) -> list[float]:
443
+ """
444
+ Generate embedding for text using OpenAI.
445
+
446
+ Requires OPENAI_API_KEY in environment (set by LiteLLM provider from config).
447
+
448
+ Args:
449
+ text: Text to embed
450
+
451
+ Returns:
452
+ Embedding vector as list of floats (1536 dimensions)
453
+
454
+ Raises:
455
+ RuntimeError: If OPENAI_API_KEY not set or embedding fails
456
+ """
457
+ import os
458
+
459
+ api_key = self._openai_api_key or os.environ.get("OPENAI_API_KEY")
460
+ if not api_key:
461
+ raise RuntimeError(
462
+ "OPENAI_API_KEY not configured. Add it to llm_providers.api_keys in config.yaml"
463
+ )
464
+ return await self._embed_text_litellm(text, api_key=api_key)
465
+
466
+ async def _embed_text_litellm(self, text: str, api_key: str) -> list[float]:
467
+ """Generate embedding using LiteLLM (OpenAI API).
468
+
469
+ Args:
470
+ text: Text to embed
471
+ api_key: OpenAI API key (from Codex auth or environment)
472
+
473
+ Returns:
474
+ Embedding vector as list of floats
475
+ """
476
+ try:
477
+ import litellm
478
+ except ImportError as e:
479
+ raise RuntimeError("litellm package not installed. Run: pip install litellm") from e
480
+
481
+ try:
482
+ response = await litellm.aembedding(
483
+ model=self.embedding_model,
484
+ input=[text],
485
+ api_key=api_key,
486
+ )
487
+ embedding: list[float] = response.data[0]["embedding"]
488
+ logger.debug(f"Generated embedding via LiteLLM with {len(embedding)} dimensions")
489
+ return embedding
490
+ except Exception as e:
491
+ logger.error(f"Failed to generate embedding with LiteLLM: {e}")
492
+ raise RuntimeError(f"Embedding generation failed: {e}") from e
493
+
494
+ async def embed_tool(
495
+ self,
496
+ tool_id: str,
497
+ name: str,
498
+ description: str | None,
499
+ input_schema: dict[str, Any] | None,
500
+ server_name: str,
501
+ project_id: str,
502
+ force: bool = False,
503
+ ) -> ToolEmbedding | None:
504
+ """
505
+ Generate and store embedding for a tool.
506
+
507
+ Checks if re-embedding is needed based on content hash.
508
+
509
+ Args:
510
+ tool_id: Tool ID
511
+ name: Tool name
512
+ description: Tool description
513
+ input_schema: Tool input schema
514
+ server_name: MCP server name
515
+ project_id: Project ID
516
+ force: Force re-embedding even if content unchanged
517
+
518
+ Returns:
519
+ ToolEmbedding if generated, None if skipped (already up-to-date)
520
+ """
521
+ # Check if we need to generate embedding
522
+ if not force and not self.needs_reembedding(tool_id, name, description, input_schema):
523
+ logger.debug(f"Tool {name} embedding is up-to-date, skipping")
524
+ return None
525
+
526
+ # Build text and generate embedding
527
+ text = _build_tool_text(name, description, input_schema)
528
+ text_hash = _compute_text_hash(text)
529
+
530
+ embedding = await self.embed_text(text)
531
+
532
+ # Store embedding
533
+ return self.store_embedding(
534
+ tool_id=tool_id,
535
+ server_name=server_name,
536
+ project_id=project_id,
537
+ embedding=embedding,
538
+ text_hash=text_hash,
539
+ )
540
+
541
+ async def embed_all_tools(
542
+ self,
543
+ project_id: str,
544
+ mcp_manager: Any,
545
+ force: bool = False,
546
+ ) -> dict[str, Any]:
547
+ """
548
+ Generate embeddings for all tools in a project.
549
+
550
+ Iterates through all MCP servers and their tools, generating
551
+ embeddings for tools that need them.
552
+
553
+ Args:
554
+ project_id: Project ID
555
+ mcp_manager: LocalMCPManager instance for accessing tools
556
+ force: Force re-embedding all tools
557
+
558
+ Returns:
559
+ Dict with statistics: embedded, skipped, failed, by_server
560
+ """
561
+ from gobby.storage.mcp import LocalMCPManager
562
+
563
+ if not isinstance(mcp_manager, LocalMCPManager):
564
+ raise TypeError("mcp_manager must be a LocalMCPManager instance")
565
+
566
+ stats: dict[str, Any] = {
567
+ "embedded": 0,
568
+ "skipped": 0,
569
+ "failed": 0,
570
+ "errors": [],
571
+ "by_server": {},
572
+ }
573
+
574
+ # Get all servers for the project
575
+ servers = mcp_manager.list_servers(project_id=project_id, enabled_only=False)
576
+
577
+ for server in servers:
578
+ server_stats = {"embedded": 0, "skipped": 0, "failed": 0}
579
+
580
+ # Get tools for this server
581
+ tools = mcp_manager.get_cached_tools(server.name, project_id=project_id)
582
+
583
+ for tool in tools:
584
+ try:
585
+ result = await self.embed_tool(
586
+ tool_id=tool.id,
587
+ name=tool.name,
588
+ description=tool.description,
589
+ input_schema=tool.input_schema,
590
+ server_name=server.name,
591
+ project_id=project_id,
592
+ force=force,
593
+ )
594
+
595
+ if result:
596
+ server_stats["embedded"] += 1
597
+ stats["embedded"] += 1
598
+ logger.info(f"Embedded tool: {server.name}/{tool.name}")
599
+ else:
600
+ server_stats["skipped"] += 1
601
+ stats["skipped"] += 1
602
+
603
+ except Exception as e:
604
+ server_stats["failed"] += 1
605
+ stats["failed"] += 1
606
+ error_msg = f"{server.name}/{tool.name}: {e}"
607
+ stats["errors"].append(error_msg)
608
+ logger.error(f"Failed to embed tool {error_msg}")
609
+
610
+ stats["by_server"][server.name] = server_stats
611
+
612
+ return stats
613
+
614
+ async def search_tools(
615
+ self,
616
+ query: str,
617
+ project_id: str,
618
+ top_k: int = 10,
619
+ min_similarity: float = 0.0,
620
+ server_filter: str | None = None,
621
+ ) -> list[SearchResult]:
622
+ """
623
+ Search for tools semantically similar to a query.
624
+
625
+ Embeds the query and computes cosine similarity against all
626
+ stored tool embeddings, returning ranked results.
627
+
628
+ Args:
629
+ query: Search query text
630
+ project_id: Project ID to search within
631
+ top_k: Maximum number of results to return
632
+ min_similarity: Minimum similarity threshold (0.0 to 1.0)
633
+ server_filter: Optional server name to filter results
634
+
635
+ Returns:
636
+ List of SearchResult sorted by similarity (descending)
637
+ """
638
+ # Embed the query
639
+ query_embedding = await self.embed_text(query)
640
+
641
+ # Get all embeddings for the project
642
+ if server_filter:
643
+ embeddings = self.get_embeddings_for_server(server_filter, project_id)
644
+ else:
645
+ embeddings = self.get_embeddings_for_project(project_id)
646
+
647
+ if not embeddings:
648
+ logger.debug(f"No embeddings found for project {project_id}")
649
+ return []
650
+
651
+ # Get tool metadata for results
652
+ tool_info = self._get_tool_info_map(project_id, server_filter)
653
+
654
+ # Compute similarities
655
+ results: list[SearchResult] = []
656
+ for emb in embeddings:
657
+ similarity = _cosine_similarity(query_embedding, emb.embedding)
658
+
659
+ if similarity >= min_similarity:
660
+ tool_data = tool_info.get(emb.tool_id, {})
661
+ results.append(
662
+ SearchResult(
663
+ tool_id=emb.tool_id,
664
+ server_name=emb.server_name,
665
+ tool_name=tool_data.get("name", "unknown"),
666
+ description=tool_data.get("description"),
667
+ similarity=similarity,
668
+ embedding_id=emb.id,
669
+ )
670
+ )
671
+
672
+ # Sort by similarity descending and limit
673
+ results.sort(key=lambda x: x.similarity, reverse=True)
674
+ return results[:top_k]
675
+
676
+ def _get_tool_info_map(
677
+ self, project_id: str, server_filter: str | None = None
678
+ ) -> dict[str, dict[str, Any]]:
679
+ """
680
+ Get tool metadata map for search results.
681
+
682
+ Args:
683
+ project_id: Project ID
684
+ server_filter: Optional server name filter
685
+
686
+ Returns:
687
+ Dict mapping tool_id to {name, description}
688
+ """
689
+ if server_filter:
690
+ query = """
691
+ SELECT t.id, t.name, t.description
692
+ FROM tools t
693
+ JOIN mcp_servers s ON t.mcp_server_id = s.id
694
+ WHERE s.project_id = ? AND s.name = ?
695
+ """
696
+ rows = self.db.fetchall(query, (project_id, server_filter))
697
+ else:
698
+ query = """
699
+ SELECT t.id, t.name, t.description
700
+ FROM tools t
701
+ JOIN mcp_servers s ON t.mcp_server_id = s.id
702
+ WHERE s.project_id = ?
703
+ """
704
+ rows = self.db.fetchall(query, (project_id,))
705
+
706
+ return {row["id"]: {"name": row["name"], "description": row["description"]} for row in rows}