gnosisllm-knowledge 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. gnosisllm_knowledge/__init__.py +152 -0
  2. gnosisllm_knowledge/api/__init__.py +5 -0
  3. gnosisllm_knowledge/api/knowledge.py +548 -0
  4. gnosisllm_knowledge/backends/__init__.py +26 -0
  5. gnosisllm_knowledge/backends/memory/__init__.py +9 -0
  6. gnosisllm_knowledge/backends/memory/indexer.py +384 -0
  7. gnosisllm_knowledge/backends/memory/searcher.py +516 -0
  8. gnosisllm_knowledge/backends/opensearch/__init__.py +19 -0
  9. gnosisllm_knowledge/backends/opensearch/agentic.py +738 -0
  10. gnosisllm_knowledge/backends/opensearch/config.py +195 -0
  11. gnosisllm_knowledge/backends/opensearch/indexer.py +499 -0
  12. gnosisllm_knowledge/backends/opensearch/mappings.py +255 -0
  13. gnosisllm_knowledge/backends/opensearch/queries.py +445 -0
  14. gnosisllm_knowledge/backends/opensearch/searcher.py +383 -0
  15. gnosisllm_knowledge/backends/opensearch/setup.py +1390 -0
  16. gnosisllm_knowledge/chunking/__init__.py +9 -0
  17. gnosisllm_knowledge/chunking/fixed.py +138 -0
  18. gnosisllm_knowledge/chunking/sentence.py +239 -0
  19. gnosisllm_knowledge/cli/__init__.py +18 -0
  20. gnosisllm_knowledge/cli/app.py +509 -0
  21. gnosisllm_knowledge/cli/commands/__init__.py +7 -0
  22. gnosisllm_knowledge/cli/commands/agentic.py +529 -0
  23. gnosisllm_knowledge/cli/commands/load.py +369 -0
  24. gnosisllm_knowledge/cli/commands/search.py +440 -0
  25. gnosisllm_knowledge/cli/commands/setup.py +228 -0
  26. gnosisllm_knowledge/cli/display/__init__.py +5 -0
  27. gnosisllm_knowledge/cli/display/service.py +555 -0
  28. gnosisllm_knowledge/cli/utils/__init__.py +5 -0
  29. gnosisllm_knowledge/cli/utils/config.py +207 -0
  30. gnosisllm_knowledge/core/__init__.py +87 -0
  31. gnosisllm_knowledge/core/domain/__init__.py +43 -0
  32. gnosisllm_knowledge/core/domain/document.py +240 -0
  33. gnosisllm_knowledge/core/domain/result.py +176 -0
  34. gnosisllm_knowledge/core/domain/search.py +327 -0
  35. gnosisllm_knowledge/core/domain/source.py +139 -0
  36. gnosisllm_knowledge/core/events/__init__.py +23 -0
  37. gnosisllm_knowledge/core/events/emitter.py +216 -0
  38. gnosisllm_knowledge/core/events/types.py +226 -0
  39. gnosisllm_knowledge/core/exceptions.py +407 -0
  40. gnosisllm_knowledge/core/interfaces/__init__.py +20 -0
  41. gnosisllm_knowledge/core/interfaces/agentic.py +136 -0
  42. gnosisllm_knowledge/core/interfaces/chunker.py +64 -0
  43. gnosisllm_knowledge/core/interfaces/fetcher.py +112 -0
  44. gnosisllm_knowledge/core/interfaces/indexer.py +244 -0
  45. gnosisllm_knowledge/core/interfaces/loader.py +102 -0
  46. gnosisllm_knowledge/core/interfaces/searcher.py +178 -0
  47. gnosisllm_knowledge/core/interfaces/setup.py +164 -0
  48. gnosisllm_knowledge/fetchers/__init__.py +12 -0
  49. gnosisllm_knowledge/fetchers/config.py +77 -0
  50. gnosisllm_knowledge/fetchers/http.py +167 -0
  51. gnosisllm_knowledge/fetchers/neoreader.py +204 -0
  52. gnosisllm_knowledge/loaders/__init__.py +13 -0
  53. gnosisllm_knowledge/loaders/base.py +399 -0
  54. gnosisllm_knowledge/loaders/factory.py +202 -0
  55. gnosisllm_knowledge/loaders/sitemap.py +285 -0
  56. gnosisllm_knowledge/loaders/website.py +57 -0
  57. gnosisllm_knowledge/py.typed +0 -0
  58. gnosisllm_knowledge/services/__init__.py +9 -0
  59. gnosisllm_knowledge/services/indexing.py +387 -0
  60. gnosisllm_knowledge/services/search.py +349 -0
  61. gnosisllm_knowledge-0.2.0.dist-info/METADATA +382 -0
  62. gnosisllm_knowledge-0.2.0.dist-info/RECORD +64 -0
  63. gnosisllm_knowledge-0.2.0.dist-info/WHEEL +4 -0
  64. gnosisllm_knowledge-0.2.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,738 @@
1
+ """OpenSearch agentic searcher implementation.
2
+
3
+ Uses OpenSearch ML agents for AI-powered search with reasoning capabilities.
4
+ Supports flow agents (fast RAG) and conversational agents (multi-turn with memory).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import asyncio
10
+ import json
11
+ import logging
12
+ import uuid
13
+ from datetime import UTC, datetime
14
+ from typing import TYPE_CHECKING, Any
15
+
16
+ from opensearchpy import AsyncOpenSearch
17
+
18
+ from gnosisllm_knowledge.backends.opensearch.config import OpenSearchConfig
19
+ from gnosisllm_knowledge.core.domain.search import (
20
+ AgentType,
21
+ AgenticSearchQuery,
22
+ AgenticSearchResult,
23
+ ReasoningStep,
24
+ SearchMode,
25
+ SearchResultItem,
26
+ )
27
+ from gnosisllm_knowledge.core.exceptions import AgenticSearchError
28
+
29
+ if TYPE_CHECKING:
30
+ from gnosisllm_knowledge.core.interfaces.agentic import IAgenticSearcher
31
+
32
+
33
+ class OpenSearchAgenticSearcher:
34
+ """Executes agentic search using OpenSearch ML agents.
35
+
36
+ Supports two agent types:
37
+ - Flow Agent: Fast, sequential RAG with minimal reasoning
38
+ - Conversational Agent: Multi-turn with memory and detailed reasoning
39
+
40
+ The searcher integrates with OpenSearch's ML Commons plugin to:
41
+ 1. Execute agents via /_plugins/_ml/agents/{agent_id}/_execute
42
+ 2. Parse agent responses including reasoning traces
43
+ 3. Extract search results from VectorDBTool outputs
44
+ 4. Manage conversation memory for multi-turn interactions
45
+
46
+ Example:
47
+ ```python
48
+ config = OpenSearchConfig.from_env()
49
+ client = AsyncOpenSearch(hosts=[config.url])
50
+ searcher = OpenSearchAgenticSearcher(client, config)
51
+
52
+ query = AgenticSearchQuery(
53
+ text="How do I configure OAuth2?",
54
+ agent_type=AgentType.FLOW,
55
+ )
56
+ result = await searcher.agentic_search(query, "knowledge")
57
+ print(result.answer)
58
+ ```
59
+ """
60
+
61
+ def __init__(
62
+ self,
63
+ client: AsyncOpenSearch,
64
+ config: OpenSearchConfig,
65
+ ) -> None:
66
+ """Initialize the agentic searcher.
67
+
68
+ Args:
69
+ client: Async OpenSearch client.
70
+ config: OpenSearch configuration with agent IDs.
71
+ """
72
+ self._client = client
73
+ self._config = config
74
+ self._logger = logging.getLogger(__name__)
75
+
76
+ @property
77
+ def is_configured(self) -> bool:
78
+ """Check if at least one agent is configured."""
79
+ return self.flow_agent_available or self.conversational_agent_available
80
+
81
+ @property
82
+ def flow_agent_available(self) -> bool:
83
+ """Check if flow agent is configured."""
84
+ return bool(self._config.flow_agent_id)
85
+
86
+ @property
87
+ def conversational_agent_available(self) -> bool:
88
+ """Check if conversational agent is configured."""
89
+ return bool(self._config.conversational_agent_id)
90
+
91
+ async def agentic_search(
92
+ self,
93
+ query: AgenticSearchQuery,
94
+ index_name: str,
95
+ **options: Any,
96
+ ) -> AgenticSearchResult:
97
+ """Execute agentic search with agent orchestration.
98
+
99
+ The flow:
100
+ 1. Select agent based on query.agent_type
101
+ 2. Build execution request with query and filters
102
+ 3. Execute agent via OpenSearch ML API
103
+ 4. Parse response for answer, reasoning, and results
104
+
105
+ Args:
106
+ query: Agentic search query with agent type and context.
107
+ index_name: Target index name.
108
+ **options: Additional agent options.
109
+
110
+ Returns:
111
+ AgenticSearchResult with answer, reasoning, and sources.
112
+
113
+ Raises:
114
+ AgenticSearchError: If agent execution fails.
115
+ """
116
+ start = datetime.now(UTC)
117
+
118
+ # Select agent based on type
119
+ agent_id = self._get_agent_id(query.agent_type)
120
+ if not agent_id:
121
+ raise AgenticSearchError(
122
+ message=f"Agent not configured for type: {query.agent_type.value}",
123
+ agent_type=query.agent_type.value,
124
+ details={"hint": "Run 'gnosisllm-knowledge agentic setup' to configure agents."},
125
+ )
126
+
127
+ # Build execution request
128
+ execute_body = self._build_execute_request(query, index_name)
129
+
130
+ self._logger.debug(
131
+ "Executing agentic search",
132
+ extra={
133
+ "agent_id": agent_id,
134
+ "agent_type": query.agent_type.value,
135
+ "query": query.text[:100],
136
+ },
137
+ )
138
+
139
+ # Execute agent
140
+ response = await self._execute_agent(agent_id, execute_body)
141
+
142
+ duration_ms = (datetime.now(UTC) - start).total_seconds() * 1000
143
+
144
+ return self._parse_agentic_response(query, response, duration_ms)
145
+
146
+ async def get_conversation(
147
+ self,
148
+ conversation_id: str,
149
+ ) -> list[dict[str, Any]]:
150
+ """Get conversation history for multi-turn searches.
151
+
152
+ Args:
153
+ conversation_id: Conversation identifier (memory_id).
154
+
155
+ Returns:
156
+ List of conversation messages with role and content.
157
+ """
158
+ try:
159
+ response = await self._client.transport.perform_request(
160
+ "GET",
161
+ f"/_plugins/_ml/memory/{conversation_id}/_messages",
162
+ )
163
+ messages = response.get("messages", [])
164
+ return [
165
+ {
166
+ "role": msg.get("role", "unknown"),
167
+ "content": msg.get("content", ""),
168
+ "timestamp": msg.get("create_time"),
169
+ }
170
+ for msg in messages
171
+ ]
172
+ except Exception as e:
173
+ self._logger.warning(f"Failed to get conversation: {e}")
174
+ return []
175
+
176
+ async def clear_conversation(
177
+ self,
178
+ conversation_id: str,
179
+ ) -> bool:
180
+ """Clear conversation history.
181
+
182
+ Args:
183
+ conversation_id: Conversation to clear.
184
+
185
+ Returns:
186
+ True if cleared successfully, False if not found.
187
+ """
188
+ try:
189
+ await self._client.transport.perform_request(
190
+ "DELETE",
191
+ f"/_plugins/_ml/memory/{conversation_id}",
192
+ )
193
+ return True
194
+ except Exception as e:
195
+ self._logger.warning(f"Failed to clear conversation: {e}")
196
+ return False
197
+
198
+ async def list_conversations(
199
+ self,
200
+ account_id: str | None = None,
201
+ limit: int = 100,
202
+ ) -> list[dict[str, Any]]:
203
+ """List active conversations.
204
+
205
+ Args:
206
+ account_id: Filter by account (multi-tenant).
207
+ limit: Maximum number of conversations.
208
+
209
+ Returns:
210
+ List of conversation metadata dicts.
211
+ """
212
+ try:
213
+ body: dict[str, Any] = {"size": limit}
214
+ if account_id:
215
+ body["query"] = {"term": {"account_id": account_id}}
216
+
217
+ response = await self._client.transport.perform_request(
218
+ "POST",
219
+ "/_plugins/_ml/memory/_search",
220
+ body=body,
221
+ )
222
+ hits = response.get("hits", {}).get("hits", [])
223
+ return [
224
+ {
225
+ "conversation_id": hit.get("_id"),
226
+ "name": hit.get("_source", {}).get("name"),
227
+ "created_at": hit.get("_source", {}).get("create_time"),
228
+ "updated_at": hit.get("_source", {}).get("update_time"),
229
+ }
230
+ for hit in hits
231
+ ]
232
+ except Exception as e:
233
+ self._logger.warning(f"Failed to list conversations: {e}")
234
+ return []
235
+
236
+ async def get_agent_status(
237
+ self,
238
+ agent_id: str,
239
+ ) -> dict[str, Any] | None:
240
+ """Get status of an agent.
241
+
242
+ Args:
243
+ agent_id: Agent identifier.
244
+
245
+ Returns:
246
+ Agent status info or None if not found.
247
+ """
248
+ try:
249
+ response = await self._client.transport.perform_request(
250
+ "GET",
251
+ f"/_plugins/_ml/agents/{agent_id}",
252
+ )
253
+ return {
254
+ "agent_id": agent_id,
255
+ "name": response.get("name"),
256
+ "type": response.get("type"),
257
+ "description": response.get("description"),
258
+ "tools": [t.get("name") for t in response.get("tools", [])],
259
+ "created_at": response.get("created_time"),
260
+ }
261
+ except Exception as e:
262
+ self._logger.warning(f"Failed to get agent status: {e}")
263
+ return None
264
+
265
+ async def create_conversation(
266
+ self,
267
+ name: str | None = None,
268
+ account_id: str | None = None,
269
+ ) -> str | None:
270
+ """Create a new conversation memory.
271
+
272
+ Uses the OpenSearch Memory API to create a conversation memory.
273
+ The endpoint is POST /_plugins/_ml/memory (introduced in 2.12).
274
+
275
+ Args:
276
+ name: Optional name for the conversation.
277
+ account_id: Optional account ID for multi-tenancy.
278
+
279
+ Returns:
280
+ The new conversation/memory ID, or None if creation fails.
281
+ """
282
+ body: dict[str, Any] = {}
283
+ if name:
284
+ body["name"] = name
285
+ if account_id:
286
+ body["account_id"] = account_id
287
+
288
+ try:
289
+ # POST /_plugins/_ml/memory creates a new memory (OpenSearch 2.12+)
290
+ response = await self._client.transport.perform_request(
291
+ "POST",
292
+ "/_plugins/_ml/memory",
293
+ body=body if body else None,
294
+ )
295
+ memory_id = response.get("memory_id")
296
+ if memory_id:
297
+ self._logger.debug(f"Created conversation memory: {memory_id}")
298
+ return memory_id
299
+ except Exception as e:
300
+ self._logger.warning(f"Failed to create conversation: {e}")
301
+ # Return None - agent will work without pre-created memory
302
+ # (agent may create its own memory on first use)
303
+ return None
304
+
305
+ def _get_agent_id(self, agent_type: AgentType) -> str | None:
306
+ """Get agent ID for the specified type."""
307
+ if agent_type == AgentType.FLOW:
308
+ return self._config.flow_agent_id
309
+ elif agent_type == AgentType.CONVERSATIONAL:
310
+ return self._config.conversational_agent_id
311
+ return None
312
+
313
+ def _build_execute_request(
314
+ self,
315
+ query: AgenticSearchQuery,
316
+ index_name: str,
317
+ ) -> dict[str, Any]:
318
+ """Build agent execution request.
319
+
320
+ Only includes parameters that the agent actually uses:
321
+ - question: The user's query (required)
322
+ - memory_id: For conversation continuity (conversational agents)
323
+ - message_history_limit: Number of historical messages to include
324
+
325
+ Note: VectorDBTool's index and model_id are configured in the agent,
326
+ not passed at runtime. Extra parameters cause IllegalArgumentException.
327
+
328
+ Args:
329
+ query: The agentic search query.
330
+ index_name: Target index name (not used - agent has hardcoded index).
331
+
332
+ Returns:
333
+ Request body for agent execution.
334
+ """
335
+ request: dict[str, Any] = {
336
+ "parameters": {
337
+ "question": query.text,
338
+ }
339
+ }
340
+
341
+ # Add conversation context for conversational agents
342
+ # OpenSearch handles memory injection automatically with app_type=rag
343
+ if query.agent_type == AgentType.CONVERSATIONAL and query.conversation_id:
344
+ request["parameters"]["memory_id"] = query.conversation_id
345
+
346
+ return request
347
+
348
+ async def _execute_agent(
349
+ self,
350
+ agent_id: str,
351
+ body: dict[str, Any],
352
+ ) -> dict[str, Any]:
353
+ """Execute agent and return response.
354
+
355
+ Args:
356
+ agent_id: The agent ID to execute.
357
+ body: Request body with parameters.
358
+
359
+ Returns:
360
+ Agent execution response.
361
+
362
+ Raises:
363
+ AgenticSearchError: If execution fails or times out.
364
+ """
365
+ try:
366
+ response = await asyncio.wait_for(
367
+ self._client.transport.perform_request(
368
+ "POST",
369
+ f"/_plugins/_ml/agents/{agent_id}/_execute",
370
+ body=body,
371
+ ),
372
+ timeout=self._config.agentic_timeout_seconds,
373
+ )
374
+ return response
375
+ except asyncio.TimeoutError:
376
+ raise AgenticSearchError(
377
+ message="Agent execution timed out",
378
+ agent_id=agent_id,
379
+ details={"timeout_seconds": self._config.agentic_timeout_seconds},
380
+ )
381
+ except Exception as e:
382
+ self._logger.error(f"Agent execution failed: {e}")
383
+ raise AgenticSearchError(
384
+ message=f"Agent execution failed: {e}",
385
+ agent_id=agent_id,
386
+ cause=e,
387
+ )
388
+
389
+ def _parse_agentic_response(
390
+ self,
391
+ query: AgenticSearchQuery,
392
+ response: dict[str, Any],
393
+ duration_ms: float,
394
+ ) -> AgenticSearchResult:
395
+ """Parse agent response into AgenticSearchResult.
396
+
397
+ The response structure from OpenSearch ML agents:
398
+ {
399
+ "inference_results": [
400
+ {
401
+ "output": [
402
+ {"name": "response", "result": "The answer..."},
403
+ {"name": "knowledge_search", "result": {...}}
404
+ ]
405
+ }
406
+ ],
407
+ "memory_id": "...",
408
+ ...
409
+ }
410
+
411
+ Args:
412
+ query: The original query.
413
+ response: Agent execution response.
414
+ duration_ms: Total execution duration.
415
+
416
+ Returns:
417
+ Parsed AgenticSearchResult.
418
+ """
419
+ answer: str | None = None
420
+ reasoning_steps: list[ReasoningStep] = []
421
+ items: list[SearchResultItem] = []
422
+ conversation_id = response.get("memory_id")
423
+ total_tokens = 0
424
+ prompt_tokens = 0
425
+ completion_tokens = 0
426
+
427
+ # Parse inference results
428
+ inference_results = response.get("inference_results", [])
429
+ if inference_results:
430
+ outputs = inference_results[0].get("output", [])
431
+
432
+ for output in outputs:
433
+ name = output.get("name", "")
434
+ # Handle both direct result and dataAsMap structure (conversational agents)
435
+ result = output.get("result", "")
436
+ data_as_map = output.get("dataAsMap", {})
437
+ if data_as_map and "response" in data_as_map:
438
+ result = data_as_map.get("response", result)
439
+
440
+ if name == "memory_id":
441
+ # Extract conversation ID from conversational agent
442
+ conversation_id = str(result) if result else None
443
+ elif name == "parent_message_id":
444
+ # Track parent message ID for conversation threading
445
+ pass # Could store for future use
446
+ elif name in ("response", "answer_generator", "MLModelTool"):
447
+ # Parse answer from output
448
+ answer = self._extract_answer_from_result(result)
449
+
450
+ # Add reasoning step for answer generation
451
+ if query.include_reasoning:
452
+ reasoning_steps.append(
453
+ ReasoningStep(
454
+ tool="MLModelTool",
455
+ action="answer_generation",
456
+ input=query.text,
457
+ output=answer[:100] if answer else None,
458
+ duration_ms=0,
459
+ )
460
+ )
461
+ elif name in ("knowledge_search", "VectorDBTool"):
462
+ # Parse search results from tool output
463
+ items.extend(self._parse_tool_search_results(result))
464
+
465
+ # Add reasoning step
466
+ if query.include_reasoning:
467
+ reasoning_steps.append(
468
+ ReasoningStep(
469
+ tool="VectorDBTool",
470
+ action="search",
471
+ input=query.text,
472
+ output=f"Found {len(items)} documents",
473
+ duration_ms=0, # Not tracked per-step
474
+ )
475
+ )
476
+
477
+ # Parse token usage if available
478
+ usage = inference_results[0].get("usage", {})
479
+ total_tokens = usage.get("total_tokens", 0)
480
+ prompt_tokens = usage.get("prompt_tokens", 0)
481
+ completion_tokens = usage.get("completion_tokens", 0)
482
+
483
+ # Parse agentic context for reasoning traces
484
+ agentic_context = response.get("agentic_context", {})
485
+ traces = agentic_context.get("traces", [])
486
+ for trace in traces:
487
+ if query.include_reasoning:
488
+ reasoning_steps.append(
489
+ ReasoningStep(
490
+ tool=trace.get("tool", "unknown"),
491
+ action=trace.get("action", ""),
492
+ input=trace.get("input"),
493
+ output=trace.get("output"),
494
+ duration_ms=trace.get("duration_ms", 0),
495
+ tokens_used=trace.get("tokens", 0),
496
+ )
497
+ )
498
+
499
+ # If no answer from structured output, try to get from raw response
500
+ if not answer and "response" in response:
501
+ answer = response.get("response")
502
+
503
+ # Preserve the query's conversation_id if agent didn't return one
504
+ # This allows multi-turn conversations when memory was created beforehand
505
+ final_conversation_id = conversation_id or query.conversation_id
506
+
507
+ return AgenticSearchResult(
508
+ query=query.text,
509
+ mode=SearchMode.AGENTIC,
510
+ items=items,
511
+ total_hits=len(items),
512
+ duration_ms=duration_ms,
513
+ max_score=items[0].score if items else None,
514
+ answer=answer,
515
+ reasoning_steps=reasoning_steps,
516
+ conversation_id=final_conversation_id,
517
+ agent_type=query.agent_type,
518
+ citations=[item.doc_id for item in items[:5]], # Top 5 as citations
519
+ total_tokens=total_tokens,
520
+ prompt_tokens=prompt_tokens,
521
+ completion_tokens=completion_tokens,
522
+ )
523
+
524
+ def _extract_answer_from_result(
525
+ self,
526
+ result: str | dict[str, Any],
527
+ ) -> str | None:
528
+ """Extract answer text from LLM tool result.
529
+
530
+ Handles raw OpenAI API response format:
531
+ {
532
+ "choices": [
533
+ {"message": {"content": "The answer..."}}
534
+ ]
535
+ }
536
+
537
+ Args:
538
+ result: Tool output (may be string JSON or dict).
539
+
540
+ Returns:
541
+ Extracted answer text or None.
542
+ """
543
+ # If it's already plain text, return it
544
+ if isinstance(result, str):
545
+ if not result.strip().startswith("{"):
546
+ return result
547
+
548
+ # Try to parse as JSON (OpenAI response format)
549
+ try:
550
+ result = json.loads(result)
551
+ except json.JSONDecodeError:
552
+ return result # Return as-is if not valid JSON
553
+
554
+ if not isinstance(result, dict):
555
+ return str(result) if result else None
556
+
557
+ # OpenAI response format
558
+ choices = result.get("choices", [])
559
+ if choices:
560
+ message = choices[0].get("message", {})
561
+ content = message.get("content")
562
+ if content:
563
+ return content.strip()
564
+
565
+ # Fallback: look for common answer keys
566
+ for key in ("answer", "text", "content", "output", "result"):
567
+ if key in result:
568
+ return str(result[key]).strip()
569
+
570
+ return None
571
+
572
+ def _parse_tool_search_results(
573
+ self,
574
+ result: str | dict[str, Any] | list[Any],
575
+ ) -> list[SearchResultItem]:
576
+ """Parse search results from VectorDBTool output.
577
+
578
+ Args:
579
+ result: Tool output (may be string JSON or dict).
580
+
581
+ Returns:
582
+ List of SearchResultItem.
583
+ """
584
+ items: list[SearchResultItem] = []
585
+
586
+ # Parse if string
587
+ if isinstance(result, str):
588
+ try:
589
+ result = json.loads(result)
590
+ except json.JSONDecodeError:
591
+ return items
592
+
593
+ # Handle list of hits
594
+ if isinstance(result, list):
595
+ hits = result
596
+ elif isinstance(result, dict):
597
+ hits = result.get("hits", {}).get("hits", [])
598
+ if not hits:
599
+ hits = result.get("hits", [])
600
+ else:
601
+ return items
602
+
603
+ for hit in hits:
604
+ source = hit.get("_source", hit) if isinstance(hit, dict) else {}
605
+ if not source:
606
+ continue
607
+
608
+ items.append(
609
+ SearchResultItem(
610
+ doc_id=hit.get("_id", source.get("id", "")),
611
+ content=source.get("content", ""),
612
+ score=hit.get("_score", source.get("score", 0.0)),
613
+ title=source.get("title"),
614
+ url=source.get("url"),
615
+ source=source.get("source"),
616
+ collection_id=source.get("collection_id"),
617
+ source_id=source.get("source_id"),
618
+ chunk_index=source.get("chunk_index"),
619
+ metadata=source.get("metadata"),
620
+ )
621
+ )
622
+
623
+ return items
624
+
625
+
626
+ class AgenticSearchFallback:
627
+ """Fallback handler for when agentic search fails.
628
+
629
+ Provides graceful degradation to standard hybrid search when:
630
+ - Agents are not configured
631
+ - Agent execution fails
632
+ - Timeout occurs
633
+
634
+ This ensures users always get results, even if the AI-powered
635
+ answer generation is unavailable.
636
+
637
+ Example:
638
+ ```python
639
+ agentic_searcher = OpenSearchAgenticSearcher(client, config)
640
+ standard_searcher = OpenSearchKnowledgeSearcher(client, config)
641
+ fallback = AgenticSearchFallback(agentic_searcher, standard_searcher)
642
+
643
+ # Always returns results, with or without AI answer
644
+ result = await fallback.search_with_fallback(query, "knowledge")
645
+ ```
646
+ """
647
+
648
+ def __init__(
649
+ self,
650
+ agentic_searcher: OpenSearchAgenticSearcher,
651
+ standard_searcher: Any, # OpenSearchKnowledgeSearcher
652
+ ) -> None:
653
+ """Initialize the fallback handler.
654
+
655
+ Args:
656
+ agentic_searcher: Agentic search implementation.
657
+ standard_searcher: Standard knowledge searcher for fallback.
658
+ """
659
+ self._agentic = agentic_searcher
660
+ self._standard = standard_searcher
661
+ self._logger = logging.getLogger(__name__)
662
+
663
+ @property
664
+ def is_agentic_available(self) -> bool:
665
+ """Check if agentic search is available."""
666
+ return self._agentic.is_configured
667
+
668
+ async def search_with_fallback(
669
+ self,
670
+ query: AgenticSearchQuery,
671
+ index_name: str,
672
+ **options: Any,
673
+ ) -> AgenticSearchResult:
674
+ """Execute agentic search with fallback to standard search.
675
+
676
+ If agentic search is not configured or fails, automatically
677
+ falls back to hybrid search and wraps the results in an
678
+ AgenticSearchResult without an AI-generated answer.
679
+
680
+ Args:
681
+ query: Agentic search query.
682
+ index_name: Target index name.
683
+ **options: Additional options.
684
+
685
+ Returns:
686
+ AgenticSearchResult (may not have answer if in fallback mode).
687
+ """
688
+ if not self._agentic.is_configured:
689
+ self._logger.warning("Agentic search not configured, using fallback")
690
+ return await self._execute_fallback(query, index_name, "Agents not configured")
691
+
692
+ try:
693
+ return await self._agentic.agentic_search(query, index_name, **options)
694
+ except Exception as e:
695
+ self._logger.warning(f"Agentic search failed, falling back: {e}")
696
+ return await self._execute_fallback(query, index_name, str(e))
697
+
698
+ async def _execute_fallback(
699
+ self,
700
+ query: AgenticSearchQuery,
701
+ index_name: str,
702
+ reason: str,
703
+ ) -> AgenticSearchResult:
704
+ """Execute fallback search and convert to AgenticSearchResult.
705
+
706
+ Args:
707
+ query: Original agentic query.
708
+ index_name: Target index.
709
+ reason: Reason for fallback.
710
+
711
+ Returns:
712
+ AgenticSearchResult without AI answer.
713
+ """
714
+ # Convert to standard search query
715
+ standard_query = query.to_search_query()
716
+
717
+ # Execute standard hybrid search
718
+ result = await self._standard.search(standard_query, index_name)
719
+
720
+ # Convert to AgenticSearchResult
721
+ return AgenticSearchResult.from_search_result(
722
+ result,
723
+ answer=None, # No AI answer in fallback
724
+ reasoning_steps=[
725
+ ReasoningStep(
726
+ tool="FallbackSearch",
727
+ action="hybrid_search",
728
+ input=query.text,
729
+ output=f"Fallback mode: {reason}. Found {result.total_hits} results.",
730
+ duration_ms=result.duration_ms,
731
+ )
732
+ ],
733
+ agent_type=query.agent_type,
734
+ )
735
+
736
+
737
+ # Type alias for protocol compliance
738
+ AgenticSearcherImpl: type[IAgenticSearcher] = OpenSearchAgenticSearcher # type: ignore[assignment]