gobby 0.2.5__py3-none-any.whl → 0.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (244) hide show
  1. gobby/__init__.py +1 -1
  2. gobby/adapters/__init__.py +2 -1
  3. gobby/adapters/claude_code.py +13 -4
  4. gobby/adapters/codex_impl/__init__.py +28 -0
  5. gobby/adapters/codex_impl/adapter.py +722 -0
  6. gobby/adapters/codex_impl/client.py +679 -0
  7. gobby/adapters/codex_impl/protocol.py +20 -0
  8. gobby/adapters/codex_impl/types.py +68 -0
  9. gobby/agents/definitions.py +11 -1
  10. gobby/agents/isolation.py +395 -0
  11. gobby/agents/runner.py +8 -0
  12. gobby/agents/sandbox.py +261 -0
  13. gobby/agents/spawn.py +42 -287
  14. gobby/agents/spawn_executor.py +385 -0
  15. gobby/agents/spawners/__init__.py +24 -0
  16. gobby/agents/spawners/command_builder.py +189 -0
  17. gobby/agents/spawners/embedded.py +21 -2
  18. gobby/agents/spawners/headless.py +21 -2
  19. gobby/agents/spawners/prompt_manager.py +125 -0
  20. gobby/cli/__init__.py +6 -0
  21. gobby/cli/clones.py +419 -0
  22. gobby/cli/conductor.py +266 -0
  23. gobby/cli/install.py +4 -4
  24. gobby/cli/installers/antigravity.py +3 -9
  25. gobby/cli/installers/claude.py +15 -9
  26. gobby/cli/installers/codex.py +2 -8
  27. gobby/cli/installers/gemini.py +8 -8
  28. gobby/cli/installers/shared.py +175 -13
  29. gobby/cli/sessions.py +1 -1
  30. gobby/cli/skills.py +858 -0
  31. gobby/cli/tasks/ai.py +0 -440
  32. gobby/cli/tasks/crud.py +44 -6
  33. gobby/cli/tasks/main.py +0 -4
  34. gobby/cli/tui.py +2 -2
  35. gobby/cli/utils.py +12 -5
  36. gobby/clones/__init__.py +13 -0
  37. gobby/clones/git.py +547 -0
  38. gobby/conductor/__init__.py +16 -0
  39. gobby/conductor/alerts.py +135 -0
  40. gobby/conductor/loop.py +164 -0
  41. gobby/conductor/monitors/__init__.py +11 -0
  42. gobby/conductor/monitors/agents.py +116 -0
  43. gobby/conductor/monitors/tasks.py +155 -0
  44. gobby/conductor/pricing.py +234 -0
  45. gobby/conductor/token_tracker.py +160 -0
  46. gobby/config/__init__.py +12 -97
  47. gobby/config/app.py +69 -91
  48. gobby/config/extensions.py +2 -2
  49. gobby/config/features.py +7 -130
  50. gobby/config/search.py +110 -0
  51. gobby/config/servers.py +1 -1
  52. gobby/config/skills.py +43 -0
  53. gobby/config/tasks.py +9 -41
  54. gobby/hooks/__init__.py +0 -13
  55. gobby/hooks/event_handlers.py +188 -2
  56. gobby/hooks/hook_manager.py +50 -4
  57. gobby/hooks/plugins.py +1 -1
  58. gobby/hooks/skill_manager.py +130 -0
  59. gobby/hooks/webhooks.py +1 -1
  60. gobby/install/claude/hooks/hook_dispatcher.py +4 -4
  61. gobby/install/codex/hooks/hook_dispatcher.py +1 -1
  62. gobby/install/gemini/hooks/hook_dispatcher.py +87 -12
  63. gobby/llm/claude.py +22 -34
  64. gobby/llm/claude_executor.py +46 -256
  65. gobby/llm/codex_executor.py +59 -291
  66. gobby/llm/executor.py +21 -0
  67. gobby/llm/gemini.py +134 -110
  68. gobby/llm/litellm_executor.py +143 -6
  69. gobby/llm/resolver.py +98 -35
  70. gobby/mcp_proxy/importer.py +62 -4
  71. gobby/mcp_proxy/instructions.py +56 -0
  72. gobby/mcp_proxy/models.py +15 -0
  73. gobby/mcp_proxy/registries.py +68 -8
  74. gobby/mcp_proxy/server.py +33 -3
  75. gobby/mcp_proxy/services/recommendation.py +43 -11
  76. gobby/mcp_proxy/services/tool_proxy.py +81 -1
  77. gobby/mcp_proxy/stdio.py +2 -1
  78. gobby/mcp_proxy/tools/__init__.py +0 -2
  79. gobby/mcp_proxy/tools/agent_messaging.py +317 -0
  80. gobby/mcp_proxy/tools/agents.py +31 -731
  81. gobby/mcp_proxy/tools/clones.py +518 -0
  82. gobby/mcp_proxy/tools/memory.py +3 -26
  83. gobby/mcp_proxy/tools/metrics.py +65 -1
  84. gobby/mcp_proxy/tools/orchestration/__init__.py +3 -0
  85. gobby/mcp_proxy/tools/orchestration/cleanup.py +151 -0
  86. gobby/mcp_proxy/tools/orchestration/wait.py +467 -0
  87. gobby/mcp_proxy/tools/sessions/__init__.py +14 -0
  88. gobby/mcp_proxy/tools/sessions/_commits.py +232 -0
  89. gobby/mcp_proxy/tools/sessions/_crud.py +253 -0
  90. gobby/mcp_proxy/tools/sessions/_factory.py +63 -0
  91. gobby/mcp_proxy/tools/sessions/_handoff.py +499 -0
  92. gobby/mcp_proxy/tools/sessions/_messages.py +138 -0
  93. gobby/mcp_proxy/tools/skills/__init__.py +616 -0
  94. gobby/mcp_proxy/tools/spawn_agent.py +417 -0
  95. gobby/mcp_proxy/tools/task_orchestration.py +7 -0
  96. gobby/mcp_proxy/tools/task_readiness.py +14 -0
  97. gobby/mcp_proxy/tools/task_sync.py +1 -1
  98. gobby/mcp_proxy/tools/tasks/_context.py +0 -20
  99. gobby/mcp_proxy/tools/tasks/_crud.py +91 -4
  100. gobby/mcp_proxy/tools/tasks/_expansion.py +348 -0
  101. gobby/mcp_proxy/tools/tasks/_factory.py +6 -16
  102. gobby/mcp_proxy/tools/tasks/_lifecycle.py +110 -45
  103. gobby/mcp_proxy/tools/tasks/_lifecycle_validation.py +18 -29
  104. gobby/mcp_proxy/tools/workflows.py +1 -1
  105. gobby/mcp_proxy/tools/worktrees.py +0 -338
  106. gobby/memory/backends/__init__.py +6 -1
  107. gobby/memory/backends/mem0.py +6 -1
  108. gobby/memory/extractor.py +477 -0
  109. gobby/memory/ingestion/__init__.py +5 -0
  110. gobby/memory/ingestion/multimodal.py +221 -0
  111. gobby/memory/manager.py +73 -285
  112. gobby/memory/search/__init__.py +10 -0
  113. gobby/memory/search/coordinator.py +248 -0
  114. gobby/memory/services/__init__.py +5 -0
  115. gobby/memory/services/crossref.py +142 -0
  116. gobby/prompts/loader.py +5 -2
  117. gobby/runner.py +37 -16
  118. gobby/search/__init__.py +48 -6
  119. gobby/search/backends/__init__.py +159 -0
  120. gobby/search/backends/embedding.py +225 -0
  121. gobby/search/embeddings.py +238 -0
  122. gobby/search/models.py +148 -0
  123. gobby/search/unified.py +496 -0
  124. gobby/servers/http.py +24 -12
  125. gobby/servers/routes/admin.py +294 -0
  126. gobby/servers/routes/mcp/endpoints/__init__.py +61 -0
  127. gobby/servers/routes/mcp/endpoints/discovery.py +405 -0
  128. gobby/servers/routes/mcp/endpoints/execution.py +568 -0
  129. gobby/servers/routes/mcp/endpoints/registry.py +378 -0
  130. gobby/servers/routes/mcp/endpoints/server.py +304 -0
  131. gobby/servers/routes/mcp/hooks.py +1 -1
  132. gobby/servers/routes/mcp/tools.py +48 -1317
  133. gobby/servers/websocket.py +2 -2
  134. gobby/sessions/analyzer.py +2 -0
  135. gobby/sessions/lifecycle.py +1 -1
  136. gobby/sessions/processor.py +10 -0
  137. gobby/sessions/transcripts/base.py +2 -0
  138. gobby/sessions/transcripts/claude.py +79 -10
  139. gobby/skills/__init__.py +91 -0
  140. gobby/skills/loader.py +685 -0
  141. gobby/skills/manager.py +384 -0
  142. gobby/skills/parser.py +286 -0
  143. gobby/skills/search.py +463 -0
  144. gobby/skills/sync.py +119 -0
  145. gobby/skills/updater.py +385 -0
  146. gobby/skills/validator.py +368 -0
  147. gobby/storage/clones.py +378 -0
  148. gobby/storage/database.py +1 -1
  149. gobby/storage/memories.py +43 -13
  150. gobby/storage/migrations.py +162 -201
  151. gobby/storage/sessions.py +116 -7
  152. gobby/storage/skills.py +782 -0
  153. gobby/storage/tasks/_crud.py +4 -4
  154. gobby/storage/tasks/_lifecycle.py +57 -7
  155. gobby/storage/tasks/_manager.py +14 -5
  156. gobby/storage/tasks/_models.py +8 -3
  157. gobby/sync/memories.py +40 -5
  158. gobby/sync/tasks.py +83 -6
  159. gobby/tasks/__init__.py +1 -2
  160. gobby/tasks/external_validator.py +1 -1
  161. gobby/tasks/validation.py +46 -35
  162. gobby/tools/summarizer.py +91 -10
  163. gobby/tui/api_client.py +4 -7
  164. gobby/tui/app.py +5 -3
  165. gobby/tui/screens/orchestrator.py +1 -2
  166. gobby/tui/screens/tasks.py +2 -4
  167. gobby/tui/ws_client.py +1 -1
  168. gobby/utils/daemon_client.py +2 -2
  169. gobby/utils/project_context.py +2 -3
  170. gobby/utils/status.py +13 -0
  171. gobby/workflows/actions.py +221 -1135
  172. gobby/workflows/artifact_actions.py +31 -0
  173. gobby/workflows/autonomous_actions.py +11 -0
  174. gobby/workflows/context_actions.py +93 -1
  175. gobby/workflows/detection_helpers.py +115 -31
  176. gobby/workflows/enforcement/__init__.py +47 -0
  177. gobby/workflows/enforcement/blocking.py +269 -0
  178. gobby/workflows/enforcement/commit_policy.py +283 -0
  179. gobby/workflows/enforcement/handlers.py +269 -0
  180. gobby/workflows/{task_enforcement_actions.py → enforcement/task_policy.py} +29 -388
  181. gobby/workflows/engine.py +13 -2
  182. gobby/workflows/git_utils.py +106 -0
  183. gobby/workflows/lifecycle_evaluator.py +29 -1
  184. gobby/workflows/llm_actions.py +30 -0
  185. gobby/workflows/loader.py +19 -6
  186. gobby/workflows/mcp_actions.py +20 -1
  187. gobby/workflows/memory_actions.py +154 -0
  188. gobby/workflows/safe_evaluator.py +183 -0
  189. gobby/workflows/session_actions.py +44 -0
  190. gobby/workflows/state_actions.py +60 -1
  191. gobby/workflows/stop_signal_actions.py +55 -0
  192. gobby/workflows/summary_actions.py +111 -1
  193. gobby/workflows/task_sync_actions.py +347 -0
  194. gobby/workflows/todo_actions.py +34 -1
  195. gobby/workflows/webhook_actions.py +185 -0
  196. {gobby-0.2.5.dist-info → gobby-0.2.7.dist-info}/METADATA +87 -21
  197. {gobby-0.2.5.dist-info → gobby-0.2.7.dist-info}/RECORD +201 -172
  198. {gobby-0.2.5.dist-info → gobby-0.2.7.dist-info}/WHEEL +1 -1
  199. gobby/adapters/codex.py +0 -1292
  200. gobby/install/claude/commands/gobby/bug.md +0 -51
  201. gobby/install/claude/commands/gobby/chore.md +0 -51
  202. gobby/install/claude/commands/gobby/epic.md +0 -52
  203. gobby/install/claude/commands/gobby/eval.md +0 -235
  204. gobby/install/claude/commands/gobby/feat.md +0 -49
  205. gobby/install/claude/commands/gobby/nit.md +0 -52
  206. gobby/install/claude/commands/gobby/ref.md +0 -52
  207. gobby/install/codex/prompts/forget.md +0 -7
  208. gobby/install/codex/prompts/memories.md +0 -7
  209. gobby/install/codex/prompts/recall.md +0 -7
  210. gobby/install/codex/prompts/remember.md +0 -13
  211. gobby/llm/gemini_executor.py +0 -339
  212. gobby/mcp_proxy/tools/session_messages.py +0 -1056
  213. gobby/mcp_proxy/tools/task_expansion.py +0 -591
  214. gobby/prompts/defaults/expansion/system.md +0 -119
  215. gobby/prompts/defaults/expansion/user.md +0 -48
  216. gobby/prompts/defaults/external_validation/agent.md +0 -72
  217. gobby/prompts/defaults/external_validation/external.md +0 -63
  218. gobby/prompts/defaults/external_validation/spawn.md +0 -83
  219. gobby/prompts/defaults/external_validation/system.md +0 -6
  220. gobby/prompts/defaults/features/import_mcp.md +0 -22
  221. gobby/prompts/defaults/features/import_mcp_github.md +0 -17
  222. gobby/prompts/defaults/features/import_mcp_search.md +0 -16
  223. gobby/prompts/defaults/features/recommend_tools.md +0 -32
  224. gobby/prompts/defaults/features/recommend_tools_hybrid.md +0 -35
  225. gobby/prompts/defaults/features/recommend_tools_llm.md +0 -30
  226. gobby/prompts/defaults/features/server_description.md +0 -20
  227. gobby/prompts/defaults/features/server_description_system.md +0 -6
  228. gobby/prompts/defaults/features/task_description.md +0 -31
  229. gobby/prompts/defaults/features/task_description_system.md +0 -6
  230. gobby/prompts/defaults/features/tool_summary.md +0 -17
  231. gobby/prompts/defaults/features/tool_summary_system.md +0 -6
  232. gobby/prompts/defaults/research/step.md +0 -58
  233. gobby/prompts/defaults/validation/criteria.md +0 -47
  234. gobby/prompts/defaults/validation/validate.md +0 -38
  235. gobby/storage/migrations_legacy.py +0 -1359
  236. gobby/tasks/context.py +0 -747
  237. gobby/tasks/criteria.py +0 -342
  238. gobby/tasks/expansion.py +0 -626
  239. gobby/tasks/prompts/expand.py +0 -327
  240. gobby/tasks/research.py +0 -421
  241. gobby/tasks/tdd.py +0 -352
  242. {gobby-0.2.5.dist-info → gobby-0.2.7.dist-info}/entry_points.txt +0 -0
  243. {gobby-0.2.5.dist-info → gobby-0.2.7.dist-info}/licenses/LICENSE.md +0 -0
  244. {gobby-0.2.5.dist-info → gobby-0.2.7.dist-info}/top_level.txt +0 -0
gobby/search/models.py ADDED
@@ -0,0 +1,148 @@
1
+ """Search models and configuration.
2
+
3
+ This module defines the core data structures for the unified search layer:
4
+ - SearchMode: Enum for search modes (tfidf, embedding, auto, hybrid)
5
+ - SearchConfig: Configuration for search behavior
6
+ - FallbackEvent: Event emitted when falling back to TF-IDF
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from dataclasses import dataclass, field
12
+ from datetime import UTC, datetime
13
+ from enum import Enum
14
+ from typing import Any
15
+
16
+ from pydantic import BaseModel, Field
17
+
18
+
19
+ class SearchMode(str, Enum):
20
+ """Search mode options for UnifiedSearcher.
21
+
22
+ Modes:
23
+ - TFIDF: TF-IDF only (always works, no API needed)
24
+ - EMBEDDING: Embedding-based search only (fails if unavailable)
25
+ - AUTO: Try embedding, fallback to TF-IDF if unavailable
26
+ - HYBRID: Combine both with weighted scores
27
+ """
28
+
29
+ TFIDF = "tfidf"
30
+ EMBEDDING = "embedding"
31
+ AUTO = "auto"
32
+ HYBRID = "hybrid"
33
+
34
+
35
+ class SearchConfig(BaseModel):
36
+ """Configuration for unified search with fallback.
37
+
38
+ This config controls how UnifiedSearcher behaves, including:
39
+ - Which search mode to use (tfidf, embedding, auto, hybrid)
40
+ - Which embedding model to use (LiteLLM format)
41
+ - Weights for hybrid mode
42
+ - Whether to notify on fallback
43
+
44
+ Example configs:
45
+ # OpenAI (default - just needs OPENAI_API_KEY env var)
46
+ SearchConfig(mode="auto", embedding_model="text-embedding-3-small")
47
+
48
+ # Ollama (local, no API key needed)
49
+ SearchConfig(
50
+ mode="auto",
51
+ embedding_model="openai/nomic-embed-text",
52
+ embedding_api_base="http://localhost:11434/v1"
53
+ )
54
+
55
+ # Gemini
56
+ SearchConfig(mode="hybrid", embedding_model="gemini/text-embedding-004")
57
+ """
58
+
59
+ mode: str = Field(
60
+ default="auto",
61
+ description="Search mode: tfidf, embedding, auto, hybrid",
62
+ )
63
+ embedding_model: str = Field(
64
+ default="text-embedding-3-small",
65
+ description="LiteLLM model string (e.g., text-embedding-3-small, openai/nomic-embed-text)",
66
+ )
67
+ embedding_api_base: str | None = Field(
68
+ default=None,
69
+ description="API base URL for Ollama/custom endpoints (e.g., http://localhost:11434/v1)",
70
+ )
71
+ embedding_api_key: str | None = Field(
72
+ default=None,
73
+ description="API key for embedding provider (uses env var if not set)",
74
+ )
75
+ tfidf_weight: float = Field(
76
+ default=0.4,
77
+ ge=0.0,
78
+ le=1.0,
79
+ description="Weight for TF-IDF scores in hybrid mode",
80
+ )
81
+ embedding_weight: float = Field(
82
+ default=0.6,
83
+ ge=0.0,
84
+ le=1.0,
85
+ description="Weight for embedding scores in hybrid mode",
86
+ )
87
+ notify_on_fallback: bool = Field(
88
+ default=True,
89
+ description="Log warning when falling back to TF-IDF",
90
+ )
91
+
92
+ def get_mode_enum(self) -> SearchMode:
93
+ """Get the mode as a SearchMode enum."""
94
+ return SearchMode(self.mode)
95
+
96
+ def get_normalized_weights(self) -> tuple[float, float]:
97
+ """Get normalized weights that sum to 1.0.
98
+
99
+ Returns:
100
+ Tuple of (tfidf_weight, embedding_weight) normalized to sum to 1.0
101
+ """
102
+ total = self.tfidf_weight + self.embedding_weight
103
+ if total == 0:
104
+ # Default to equal weights if both are 0
105
+ return (0.5, 0.5)
106
+ return (self.tfidf_weight / total, self.embedding_weight / total)
107
+
108
+
109
+ @dataclass
110
+ class FallbackEvent:
111
+ """Event emitted when UnifiedSearcher falls back to TF-IDF.
112
+
113
+ This event is emitted via the event_callback when:
114
+ - Embedding provider is unavailable (no API key, no connection)
115
+ - Embedding API call fails (rate limit, timeout, error)
116
+ - Any other embedding-related error occurs
117
+
118
+ Attributes:
119
+ reason: Human-readable explanation of why fallback occurred
120
+ original_error: The underlying exception, if any
121
+ timestamp: When the fallback occurred
122
+ mode: The original search mode that was attempted
123
+ items_reindexed: Number of items reindexed into TF-IDF (if applicable)
124
+ metadata: Additional context about the fallback
125
+ """
126
+
127
+ reason: str
128
+ original_error: Exception | None = None
129
+ timestamp: datetime = field(default_factory=lambda: datetime.now(UTC))
130
+ mode: str = "auto"
131
+ items_reindexed: int = 0
132
+ metadata: dict[str, Any] = field(default_factory=dict)
133
+
134
+ def to_dict(self) -> dict[str, Any]:
135
+ """Convert to dictionary for logging/serialization."""
136
+ return {
137
+ "reason": self.reason,
138
+ "original_error": str(self.original_error) if self.original_error else None,
139
+ "timestamp": self.timestamp.isoformat(),
140
+ "mode": self.mode,
141
+ "items_reindexed": self.items_reindexed,
142
+ "metadata": self.metadata,
143
+ }
144
+
145
+ def __str__(self) -> str:
146
+ """Human-readable string representation."""
147
+ error_info = f" ({self.original_error})" if self.original_error else ""
148
+ return f"FallbackEvent: {self.reason}{error_info}"
@@ -0,0 +1,496 @@
1
+ """Unified search orchestration with fallback.
2
+
3
+ This module provides UnifiedSearcher, the main entry point for the unified
4
+ search layer. It orchestrates between embedding-based and TF-IDF backends
5
+ with automatic fallback and configurable search modes.
6
+
7
+ Example usage:
8
+ from gobby.search.unified import UnifiedSearcher
9
+ from gobby.search.models import SearchConfig
10
+
11
+ config = SearchConfig(mode="auto")
12
+ searcher = UnifiedSearcher(config)
13
+
14
+ await searcher.fit_async([
15
+ ("id1", "hello world"),
16
+ ("id2", "foo bar"),
17
+ ])
18
+
19
+ results = await searcher.search_async("greeting", top_k=5)
20
+ # Returns: [("id1", 0.85), ...]
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import logging
26
+ from collections.abc import Callable
27
+ from typing import TYPE_CHECKING, Any
28
+
29
+ from gobby.search.backends import EmbeddingBackend, TFIDFBackend
30
+ from gobby.search.embeddings import is_embedding_available
31
+ from gobby.search.models import FallbackEvent, SearchConfig, SearchMode
32
+
33
+ if TYPE_CHECKING:
34
+ pass
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+ # Type alias for fallback event callback
39
+ FallbackCallback = Callable[[FallbackEvent], None]
40
+
41
+
42
+ class UnifiedSearcher:
43
+ """Unified search with automatic fallback.
44
+
45
+ This class orchestrates between embedding-based and TF-IDF search
46
+ backends based on the configured mode and availability of embedding
47
+ providers.
48
+
49
+ Search Modes:
50
+ - tfidf: TF-IDF only (always works, no API needed)
51
+ - embedding: Embedding-based only (fails if unavailable)
52
+ - auto: Try embedding, fallback to TF-IDF if unavailable
53
+ - hybrid: Combine both with weighted scores
54
+
55
+ Fallback Behavior:
56
+ When in "auto" mode and embedding fails (no API key, connection error,
57
+ rate limit), the searcher will:
58
+ 1. Emit a FallbackEvent via the event_callback
59
+ 2. Log a warning (if notify_on_fallback is True)
60
+ 3. Reindex items into TF-IDF (if not already indexed)
61
+ 4. Return TF-IDF results for this and future searches
62
+
63
+ Example:
64
+ config = SearchConfig(mode="auto")
65
+ searcher = UnifiedSearcher(
66
+ config,
67
+ event_callback=lambda e: print(f"Fallback: {e}")
68
+ )
69
+
70
+ await searcher.fit_async([("id1", "content1")])
71
+ results = await searcher.search_async("query")
72
+
73
+ if searcher.is_using_fallback():
74
+ print("Using TF-IDF fallback")
75
+ """
76
+
77
+ def __init__(
78
+ self,
79
+ config: SearchConfig | None = None,
80
+ event_callback: FallbackCallback | None = None,
81
+ ):
82
+ """Initialize UnifiedSearcher.
83
+
84
+ Args:
85
+ config: Search configuration (defaults to SearchConfig())
86
+ event_callback: Optional callback for fallback events
87
+ """
88
+ self._config = config or SearchConfig()
89
+ self._event_callback = event_callback
90
+
91
+ # Initialize backends lazily
92
+ self._tfidf_backend: TFIDFBackend | None = None
93
+ self._embedding_backend: EmbeddingBackend | None = None
94
+
95
+ # State tracking
96
+ self._items: list[tuple[str, str]] = [] # Cache for reindexing
97
+ self._fitted = False
98
+ self._fitted_mode: SearchMode | None = None # Track mode used during fit
99
+ self._using_fallback = False
100
+ self._fallback_reason: str | None = None
101
+ self._active_backend: str | None = None
102
+
103
+ @property
104
+ def config(self) -> SearchConfig:
105
+ """Get the current configuration."""
106
+ return self._config
107
+
108
+ def _get_tfidf_backend(self) -> TFIDFBackend:
109
+ """Get or create the TF-IDF backend."""
110
+ if self._tfidf_backend is None:
111
+ self._tfidf_backend = TFIDFBackend()
112
+ return self._tfidf_backend
113
+
114
+ def _get_embedding_backend(self) -> EmbeddingBackend:
115
+ """Get or create the embedding backend."""
116
+ if self._embedding_backend is None:
117
+ self._embedding_backend = EmbeddingBackend(
118
+ model=self._config.embedding_model,
119
+ api_base=self._config.embedding_api_base,
120
+ api_key=self._config.embedding_api_key,
121
+ )
122
+ return self._embedding_backend
123
+
124
+ def _emit_fallback_event(
125
+ self,
126
+ reason: str,
127
+ error: Exception | None = None,
128
+ items_reindexed: int = 0,
129
+ ) -> None:
130
+ """Emit a fallback event and log if configured."""
131
+ event = FallbackEvent(
132
+ reason=reason,
133
+ original_error=error,
134
+ mode=self._config.mode,
135
+ items_reindexed=items_reindexed,
136
+ )
137
+
138
+ # Log warning if configured
139
+ if self._config.notify_on_fallback:
140
+ logger.warning(f"Search fallback: {reason}")
141
+
142
+ # Call event callback if provided
143
+ if self._event_callback:
144
+ try:
145
+ self._event_callback(event)
146
+ except Exception as e:
147
+ logger.error(f"Fallback callback error: {e}")
148
+
149
+ async def _fallback_to_tfidf(
150
+ self,
151
+ reason: str,
152
+ error: Exception | None = None,
153
+ items: list[tuple[str, str]] | None = None,
154
+ ) -> None:
155
+ """Switch to TF-IDF backend and reindex.
156
+
157
+ Args:
158
+ reason: Human-readable reason for fallback
159
+ error: Optional exception that caused the fallback
160
+ items: Items to index. If None, uses cached self._items
161
+ """
162
+ self._using_fallback = True
163
+ self._fallback_reason = reason
164
+ self._active_backend = "tfidf"
165
+
166
+ # Fit TF-IDF with provided items or cached items
167
+ fit_items = items if items is not None else self._items
168
+ items_reindexed = 0
169
+ if fit_items:
170
+ tfidf = self._get_tfidf_backend()
171
+ await tfidf.fit_async(fit_items)
172
+ items_reindexed = len(fit_items)
173
+ self._fitted = True
174
+ self._fitted_mode = SearchMode.TFIDF # Fallback always uses TF-IDF
175
+
176
+ self._emit_fallback_event(reason, error, items_reindexed)
177
+
178
+ async def fit_async(self, items: list[tuple[str, str]]) -> None:
179
+ """Build or rebuild the search index.
180
+
181
+ Indexes items into the appropriate backend(s) based on mode:
182
+ - tfidf: TF-IDF only
183
+ - embedding: Embedding only (raises if unavailable)
184
+ - auto: Try embedding, fallback to TF-IDF if unavailable
185
+ - hybrid: Both TF-IDF and embedding
186
+
187
+ Args:
188
+ items: List of (item_id, content) tuples to index
189
+
190
+ Raises:
191
+ RuntimeError: If mode is "embedding" and embedding unavailable
192
+ """
193
+ self._items = items.copy()
194
+ self._fitted = False
195
+ self._fitted_mode = None
196
+ mode = self._config.get_mode_enum()
197
+
198
+ if mode == SearchMode.TFIDF:
199
+ # TF-IDF only
200
+ tfidf = self._get_tfidf_backend()
201
+ await tfidf.fit_async(items)
202
+ self._active_backend = "tfidf"
203
+ self._fitted = True
204
+ self._fitted_mode = mode
205
+
206
+ elif mode == SearchMode.EMBEDDING:
207
+ # Embedding only - fail if unavailable
208
+ if not is_embedding_available(
209
+ model=self._config.embedding_model,
210
+ api_key=self._config.embedding_api_key,
211
+ api_base=self._config.embedding_api_base,
212
+ ):
213
+ raise RuntimeError(
214
+ f"Embedding unavailable for model {self._config.embedding_model}. "
215
+ "Set the appropriate API key or use mode='auto' for fallback."
216
+ )
217
+
218
+ embedding = self._get_embedding_backend()
219
+ await embedding.fit_async(items)
220
+ self._active_backend = "embedding"
221
+ self._fitted = True
222
+ self._fitted_mode = mode
223
+
224
+ elif mode == SearchMode.AUTO:
225
+ # Try embedding, fallback to TF-IDF
226
+ if not is_embedding_available(
227
+ model=self._config.embedding_model,
228
+ api_key=self._config.embedding_api_key,
229
+ api_base=self._config.embedding_api_base,
230
+ ):
231
+ # No embedding available - use TF-IDF
232
+ await self._fallback_to_tfidf(
233
+ f"Embedding unavailable (no API key for {self._config.embedding_model})",
234
+ items=items,
235
+ )
236
+ else:
237
+ try:
238
+ embedding = self._get_embedding_backend()
239
+ await embedding.fit_async(items)
240
+ self._active_backend = "embedding"
241
+ self._fitted = True
242
+ self._fitted_mode = mode
243
+ except Exception as e:
244
+ # Embedding failed - fallback to TF-IDF
245
+ await self._fallback_to_tfidf(
246
+ f"Embedding indexing failed: {e}",
247
+ error=e,
248
+ items=items,
249
+ )
250
+
251
+ elif mode == SearchMode.HYBRID:
252
+ # Both TF-IDF and embedding
253
+ tfidf = self._get_tfidf_backend()
254
+ await tfidf.fit_async(items)
255
+
256
+ if is_embedding_available(
257
+ model=self._config.embedding_model,
258
+ api_key=self._config.embedding_api_key,
259
+ api_base=self._config.embedding_api_base,
260
+ ):
261
+ try:
262
+ embedding = self._get_embedding_backend()
263
+ await embedding.fit_async(items)
264
+ self._active_backend = "hybrid"
265
+ except Exception as e:
266
+ logger.warning(f"Hybrid embedding indexing failed: {e}")
267
+ self._emit_fallback_event(
268
+ f"Hybrid mode embedding failed: {e}",
269
+ error=e,
270
+ )
271
+ self._active_backend = "tfidf"
272
+ else:
273
+ self._emit_fallback_event(
274
+ f"Hybrid mode: embedding unavailable for {self._config.embedding_model}"
275
+ )
276
+ self._active_backend = "tfidf"
277
+
278
+ self._fitted = True
279
+ self._fitted_mode = mode
280
+
281
+ async def search_async(
282
+ self,
283
+ query: str,
284
+ top_k: int = 10,
285
+ ) -> list[tuple[str, float]]:
286
+ """Search for items matching the query.
287
+
288
+ Uses the appropriate backend(s) based on mode and fallback state.
289
+
290
+ Args:
291
+ query: Search query text
292
+ top_k: Maximum number of results to return
293
+
294
+ Returns:
295
+ List of (item_id, similarity_score) tuples, sorted by
296
+ relevance (highest first). Returns an empty list if the
297
+ searcher has not been fitted.
298
+ """
299
+ if not self._fitted:
300
+ return []
301
+
302
+ mode = self._config.get_mode_enum()
303
+
304
+ # Check for mode mismatch between fit and search
305
+ if self._fitted_mode is not None and self._fitted_mode != mode:
306
+ logger.warning(
307
+ f"Search mode changed from {self._fitted_mode.value} to {mode.value} "
308
+ "since last fit. Falling back to TF-IDF. Call fit_async() to reindex."
309
+ )
310
+ # Safe fallback to TF-IDF
311
+ if self._tfidf_backend is not None and not self._tfidf_backend.needs_refit():
312
+ return await self._tfidf_backend.search_async(query, top_k)
313
+ # TF-IDF not available, trigger fallback with reindexing
314
+ await self._fallback_to_tfidf(
315
+ f"Mode changed from {self._fitted_mode.value} to {mode.value}"
316
+ )
317
+ return await self._get_tfidf_backend().search_async(query, top_k)
318
+
319
+ # If we've already fallen back, use TF-IDF
320
+ if self._using_fallback:
321
+ return await self._get_tfidf_backend().search_async(query, top_k)
322
+
323
+ if mode == SearchMode.TFIDF:
324
+ return await self._get_tfidf_backend().search_async(query, top_k)
325
+
326
+ elif mode == SearchMode.EMBEDDING:
327
+ # Verify embedding backend is actually fitted - strict mode, no fallback
328
+ embedding_backend = self._get_embedding_backend()
329
+ if embedding_backend.needs_refit():
330
+ raise RuntimeError(
331
+ "Embedding backend unavailable or needs refit. "
332
+ "Call fit_async() first or use mode='auto' for fallback."
333
+ )
334
+ return await embedding_backend.search_async(query, top_k)
335
+
336
+ elif mode == SearchMode.AUTO:
337
+ # Try embedding, fallback to TF-IDF on error
338
+ embedding_backend = self._get_embedding_backend()
339
+ # Defensively check if embedding backend is fitted
340
+ if embedding_backend.needs_refit():
341
+ logger.warning(
342
+ "Embedding backend needs refit in AUTO mode. Falling back to TF-IDF."
343
+ )
344
+ await self._fallback_to_tfidf("Embedding backend not properly fitted")
345
+ return await self._get_tfidf_backend().search_async(query, top_k)
346
+ try:
347
+ return await embedding_backend.search_async(query, top_k)
348
+ except Exception as e:
349
+ # Fallback to TF-IDF
350
+ await self._fallback_to_tfidf(f"Embedding search failed: {e}", error=e)
351
+ return await self._get_tfidf_backend().search_async(query, top_k)
352
+
353
+ elif mode == SearchMode.HYBRID:
354
+ return await self._search_hybrid(query, top_k)
355
+
356
+ return []
357
+
358
+ async def _search_hybrid(
359
+ self,
360
+ query: str,
361
+ top_k: int,
362
+ ) -> list[tuple[str, float]]:
363
+ """Perform hybrid search combining TF-IDF and embedding scores."""
364
+ tfidf_weight, embedding_weight = self._config.get_normalized_weights()
365
+
366
+ # Get TF-IDF results
367
+ tfidf_results = await self._get_tfidf_backend().search_async(query, top_k * 2)
368
+ tfidf_scores = dict(tfidf_results)
369
+
370
+ # Try to get embedding results
371
+ embedding_scores: dict[str, float] = {}
372
+ if self._embedding_backend is not None and not self._using_fallback:
373
+ try:
374
+ embedding_results = await self._embedding_backend.search_async(query, top_k * 2)
375
+ embedding_scores = dict(embedding_results)
376
+ except Exception as e:
377
+ logger.warning(f"Hybrid embedding search failed: {e}")
378
+ self._emit_fallback_event(f"Hybrid search embedding failed: {e}", error=e)
379
+ # Continue with TF-IDF only for this search
380
+
381
+ # Combine scores
382
+ all_ids = set(tfidf_scores.keys()) | set(embedding_scores.keys())
383
+ combined: list[tuple[str, float]] = []
384
+
385
+ for item_id in all_ids:
386
+ tfidf_score = tfidf_scores.get(item_id, 0.0)
387
+ emb_score = embedding_scores.get(item_id, 0.0)
388
+ combined_score = (tfidf_weight * tfidf_score) + (embedding_weight * emb_score)
389
+ combined.append((item_id, combined_score))
390
+
391
+ # Sort by combined score descending
392
+ combined.sort(key=lambda x: x[1], reverse=True)
393
+
394
+ return combined[:top_k]
395
+
396
+ def get_active_backend(self) -> str:
397
+ """Get the name of the currently active backend.
398
+
399
+ Returns:
400
+ One of "tfidf", "embedding", "hybrid", or "none" if not fitted.
401
+ """
402
+ return self._active_backend or "none"
403
+
404
+ def is_using_fallback(self) -> bool:
405
+ """Check if search is currently using TF-IDF fallback.
406
+
407
+ Returns:
408
+ True if using TF-IDF due to embedding failure.
409
+ """
410
+ return self._using_fallback
411
+
412
+ def get_fallback_reason(self) -> str | None:
413
+ """Get the reason for fallback, if any.
414
+
415
+ Returns:
416
+ Human-readable fallback reason, or None if not using fallback.
417
+ """
418
+ return self._fallback_reason
419
+
420
+ def needs_refit(self) -> bool:
421
+ """Check if the search index needs rebuilding.
422
+
423
+ Returns:
424
+ True if fit_async() should be called before search_async().
425
+ """
426
+ if not self._fitted:
427
+ return True
428
+
429
+ mode = self._config.get_mode_enum()
430
+
431
+ if mode == SearchMode.TFIDF or self._using_fallback:
432
+ return self._get_tfidf_backend().needs_refit()
433
+
434
+ if mode == SearchMode.EMBEDDING:
435
+ return self._get_embedding_backend().needs_refit()
436
+
437
+ if mode == SearchMode.HYBRID:
438
+ tfidf_needs = self._get_tfidf_backend().needs_refit()
439
+ embedding_needs = (
440
+ self._embedding_backend.needs_refit() if self._embedding_backend else False
441
+ )
442
+ return tfidf_needs or embedding_needs
443
+
444
+ if mode == SearchMode.AUTO:
445
+ # _using_fallback case already handled above in the TFIDF branch
446
+ return self._get_embedding_backend().needs_refit()
447
+
448
+ return True
449
+
450
+ def get_stats(self) -> dict[str, Any]:
451
+ """Get statistics about the search backends.
452
+
453
+ Returns:
454
+ Dict with unified statistics including active backend info.
455
+ """
456
+ stats: dict[str, Any] = {
457
+ "mode": self._config.mode,
458
+ "fitted": self._fitted,
459
+ "fitted_mode": self._fitted_mode.value if self._fitted_mode else None,
460
+ "active_backend": self._active_backend,
461
+ "using_fallback": self._using_fallback,
462
+ "fallback_reason": self._fallback_reason,
463
+ "item_count": len(self._items),
464
+ }
465
+
466
+ if self._tfidf_backend:
467
+ stats["tfidf"] = self._tfidf_backend.get_stats()
468
+
469
+ if self._embedding_backend:
470
+ stats["embedding"] = self._embedding_backend.get_stats()
471
+
472
+ return stats
473
+
474
+ def clear(self) -> None:
475
+ """Clear all search indexes and reset state."""
476
+ if self._tfidf_backend:
477
+ self._tfidf_backend.clear()
478
+ if self._embedding_backend:
479
+ self._embedding_backend.clear()
480
+
481
+ self._items = []
482
+ self._fitted = False
483
+ self._fitted_mode = None
484
+ self._using_fallback = False
485
+ self._fallback_reason = None
486
+ self._active_backend = None
487
+
488
+ def mark_update(self) -> None:
489
+ """Mark that an item update occurred.
490
+
491
+ Call this after adding/updating/removing items to track
492
+ when a refit is needed.
493
+ """
494
+ if self._tfidf_backend:
495
+ self._tfidf_backend.mark_update()
496
+ # Embedding backend tracks updates through fitted state