gnosisllm-knowledge 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gnosisllm_knowledge/__init__.py +152 -0
- gnosisllm_knowledge/api/__init__.py +5 -0
- gnosisllm_knowledge/api/knowledge.py +548 -0
- gnosisllm_knowledge/backends/__init__.py +26 -0
- gnosisllm_knowledge/backends/memory/__init__.py +9 -0
- gnosisllm_knowledge/backends/memory/indexer.py +384 -0
- gnosisllm_knowledge/backends/memory/searcher.py +516 -0
- gnosisllm_knowledge/backends/opensearch/__init__.py +19 -0
- gnosisllm_knowledge/backends/opensearch/agentic.py +738 -0
- gnosisllm_knowledge/backends/opensearch/config.py +195 -0
- gnosisllm_knowledge/backends/opensearch/indexer.py +499 -0
- gnosisllm_knowledge/backends/opensearch/mappings.py +255 -0
- gnosisllm_knowledge/backends/opensearch/queries.py +445 -0
- gnosisllm_knowledge/backends/opensearch/searcher.py +383 -0
- gnosisllm_knowledge/backends/opensearch/setup.py +1390 -0
- gnosisllm_knowledge/chunking/__init__.py +9 -0
- gnosisllm_knowledge/chunking/fixed.py +138 -0
- gnosisllm_knowledge/chunking/sentence.py +239 -0
- gnosisllm_knowledge/cli/__init__.py +18 -0
- gnosisllm_knowledge/cli/app.py +509 -0
- gnosisllm_knowledge/cli/commands/__init__.py +7 -0
- gnosisllm_knowledge/cli/commands/agentic.py +529 -0
- gnosisllm_knowledge/cli/commands/load.py +369 -0
- gnosisllm_knowledge/cli/commands/search.py +440 -0
- gnosisllm_knowledge/cli/commands/setup.py +228 -0
- gnosisllm_knowledge/cli/display/__init__.py +5 -0
- gnosisllm_knowledge/cli/display/service.py +555 -0
- gnosisllm_knowledge/cli/utils/__init__.py +5 -0
- gnosisllm_knowledge/cli/utils/config.py +207 -0
- gnosisllm_knowledge/core/__init__.py +87 -0
- gnosisllm_knowledge/core/domain/__init__.py +43 -0
- gnosisllm_knowledge/core/domain/document.py +240 -0
- gnosisllm_knowledge/core/domain/result.py +176 -0
- gnosisllm_knowledge/core/domain/search.py +327 -0
- gnosisllm_knowledge/core/domain/source.py +139 -0
- gnosisllm_knowledge/core/events/__init__.py +23 -0
- gnosisllm_knowledge/core/events/emitter.py +216 -0
- gnosisllm_knowledge/core/events/types.py +226 -0
- gnosisllm_knowledge/core/exceptions.py +407 -0
- gnosisllm_knowledge/core/interfaces/__init__.py +20 -0
- gnosisllm_knowledge/core/interfaces/agentic.py +136 -0
- gnosisllm_knowledge/core/interfaces/chunker.py +64 -0
- gnosisllm_knowledge/core/interfaces/fetcher.py +112 -0
- gnosisllm_knowledge/core/interfaces/indexer.py +244 -0
- gnosisllm_knowledge/core/interfaces/loader.py +102 -0
- gnosisllm_knowledge/core/interfaces/searcher.py +178 -0
- gnosisllm_knowledge/core/interfaces/setup.py +164 -0
- gnosisllm_knowledge/fetchers/__init__.py +12 -0
- gnosisllm_knowledge/fetchers/config.py +77 -0
- gnosisllm_knowledge/fetchers/http.py +167 -0
- gnosisllm_knowledge/fetchers/neoreader.py +204 -0
- gnosisllm_knowledge/loaders/__init__.py +13 -0
- gnosisllm_knowledge/loaders/base.py +399 -0
- gnosisllm_knowledge/loaders/factory.py +202 -0
- gnosisllm_knowledge/loaders/sitemap.py +285 -0
- gnosisllm_knowledge/loaders/website.py +57 -0
- gnosisllm_knowledge/py.typed +0 -0
- gnosisllm_knowledge/services/__init__.py +9 -0
- gnosisllm_knowledge/services/indexing.py +387 -0
- gnosisllm_knowledge/services/search.py +349 -0
- gnosisllm_knowledge-0.2.0.dist-info/METADATA +382 -0
- gnosisllm_knowledge-0.2.0.dist-info/RECORD +64 -0
- gnosisllm_knowledge-0.2.0.dist-info/WHEEL +4 -0
- gnosisllm_knowledge-0.2.0.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,440 @@
|
|
|
1
|
+
"""Search command for querying indexed knowledge.
|
|
2
|
+
|
|
3
|
+
Supports multiple search modes:
|
|
4
|
+
- semantic: Meaning-based vector search using embeddings
|
|
5
|
+
- keyword: Traditional BM25 text matching
|
|
6
|
+
- hybrid: Combined semantic + keyword (default, best results)
|
|
7
|
+
- agentic: AI-powered search with reasoning and answer generation
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import json
|
|
13
|
+
import sys
|
|
14
|
+
from typing import TYPE_CHECKING, Any
|
|
15
|
+
|
|
16
|
+
from opensearchpy import AsyncOpenSearch
|
|
17
|
+
from rich.prompt import Prompt
|
|
18
|
+
|
|
19
|
+
from gnosisllm_knowledge.backends.opensearch.config import OpenSearchConfig
|
|
20
|
+
from gnosisllm_knowledge.backends.opensearch.searcher import OpenSearchKnowledgeSearcher
|
|
21
|
+
from gnosisllm_knowledge.cli.display.service import RichDisplayService, SearchResultDisplay
|
|
22
|
+
from gnosisllm_knowledge.cli.utils.config import CliConfig
|
|
23
|
+
from gnosisllm_knowledge.core.domain.search import SearchMode, SearchQuery
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
pass
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _get_search_mode(mode: str) -> SearchMode:
|
|
30
|
+
"""Convert mode string to SearchMode enum."""
|
|
31
|
+
mode_map = {
|
|
32
|
+
"semantic": SearchMode.SEMANTIC,
|
|
33
|
+
"keyword": SearchMode.KEYWORD,
|
|
34
|
+
"hybrid": SearchMode.HYBRID,
|
|
35
|
+
"agentic": SearchMode.AGENTIC,
|
|
36
|
+
}
|
|
37
|
+
return mode_map.get(mode.lower(), SearchMode.HYBRID)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
async def search_command(
|
|
41
|
+
display: RichDisplayService,
|
|
42
|
+
query: str | None = None,
|
|
43
|
+
mode: str = "hybrid",
|
|
44
|
+
index_name: str = "knowledge",
|
|
45
|
+
limit: int = 5,
|
|
46
|
+
offset: int = 0,
|
|
47
|
+
account_id: str | None = None,
|
|
48
|
+
collection_ids: str | None = None,
|
|
49
|
+
source_ids: str | None = None,
|
|
50
|
+
min_score: float = 0.0,
|
|
51
|
+
explain: bool = False,
|
|
52
|
+
json_output: bool = False,
|
|
53
|
+
interactive: bool = False,
|
|
54
|
+
verbose: bool = False,
|
|
55
|
+
) -> None:
|
|
56
|
+
"""Execute the search command.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
display: Display service for output.
|
|
60
|
+
query: Search query text.
|
|
61
|
+
mode: Search mode (semantic, keyword, hybrid, agentic).
|
|
62
|
+
index_name: Index to search.
|
|
63
|
+
limit: Maximum results to return.
|
|
64
|
+
offset: Pagination offset.
|
|
65
|
+
account_id: Filter by account ID.
|
|
66
|
+
collection_ids: Filter by collection IDs (comma-separated).
|
|
67
|
+
source_ids: Filter by source IDs (comma-separated).
|
|
68
|
+
min_score: Minimum score threshold.
|
|
69
|
+
explain: Show score explanation.
|
|
70
|
+
json_output: Output as JSON for scripting.
|
|
71
|
+
interactive: Interactive search session.
|
|
72
|
+
verbose: Show full content (not truncated).
|
|
73
|
+
"""
|
|
74
|
+
# Load configuration
|
|
75
|
+
cli_config = CliConfig.from_env()
|
|
76
|
+
|
|
77
|
+
# Validate configuration for semantic/hybrid search
|
|
78
|
+
search_mode = _get_search_mode(mode)
|
|
79
|
+
|
|
80
|
+
# Handle agentic mode - redirect to agentic search
|
|
81
|
+
if search_mode == SearchMode.AGENTIC:
|
|
82
|
+
from gnosisllm_knowledge.cli.commands.agentic import agentic_search_command
|
|
83
|
+
|
|
84
|
+
result = await agentic_search_command(
|
|
85
|
+
display=display,
|
|
86
|
+
query=query or "",
|
|
87
|
+
index_name=index_name,
|
|
88
|
+
agent_type="flow", # Default to flow for single queries
|
|
89
|
+
account_id=account_id,
|
|
90
|
+
collection_ids=collection_ids,
|
|
91
|
+
source_ids=source_ids,
|
|
92
|
+
limit=limit,
|
|
93
|
+
json_output=json_output,
|
|
94
|
+
verbose=verbose,
|
|
95
|
+
)
|
|
96
|
+
if not result and not json_output:
|
|
97
|
+
sys.exit(1)
|
|
98
|
+
return
|
|
99
|
+
|
|
100
|
+
if search_mode in (SearchMode.SEMANTIC, SearchMode.HYBRID):
|
|
101
|
+
if not cli_config.opensearch_model_id:
|
|
102
|
+
if not json_output:
|
|
103
|
+
display.format_error_with_suggestion(
|
|
104
|
+
error="OPENSEARCH_MODEL_ID is required for semantic/hybrid search.",
|
|
105
|
+
suggestion="Run setup first or use --mode keyword for basic search.",
|
|
106
|
+
command="gnosisllm-knowledge setup",
|
|
107
|
+
)
|
|
108
|
+
else:
|
|
109
|
+
print(json.dumps({"error": "OPENSEARCH_MODEL_ID required"}))
|
|
110
|
+
sys.exit(1)
|
|
111
|
+
|
|
112
|
+
# Interactive mode
|
|
113
|
+
if interactive:
|
|
114
|
+
await _interactive_search(
|
|
115
|
+
display=display,
|
|
116
|
+
cli_config=cli_config,
|
|
117
|
+
index_name=index_name,
|
|
118
|
+
mode=mode,
|
|
119
|
+
limit=limit,
|
|
120
|
+
account_id=account_id,
|
|
121
|
+
collection_ids=collection_ids,
|
|
122
|
+
source_ids=source_ids,
|
|
123
|
+
min_score=min_score,
|
|
124
|
+
verbose=verbose,
|
|
125
|
+
)
|
|
126
|
+
return
|
|
127
|
+
|
|
128
|
+
# Regular search requires query
|
|
129
|
+
if not query:
|
|
130
|
+
if not json_output:
|
|
131
|
+
display.format_error_with_suggestion(
|
|
132
|
+
error="Search query is required.",
|
|
133
|
+
suggestion="Provide a query or use --interactive mode.",
|
|
134
|
+
command='gnosisllm-knowledge search "your query here"',
|
|
135
|
+
)
|
|
136
|
+
else:
|
|
137
|
+
print(json.dumps({"error": "Query required"}))
|
|
138
|
+
sys.exit(1)
|
|
139
|
+
|
|
140
|
+
# Execute single search
|
|
141
|
+
result = await _execute_search(
|
|
142
|
+
display=display,
|
|
143
|
+
cli_config=cli_config,
|
|
144
|
+
query=query,
|
|
145
|
+
mode=mode,
|
|
146
|
+
index_name=index_name,
|
|
147
|
+
limit=limit,
|
|
148
|
+
offset=offset,
|
|
149
|
+
account_id=account_id,
|
|
150
|
+
collection_ids=collection_ids,
|
|
151
|
+
source_ids=source_ids,
|
|
152
|
+
min_score=min_score,
|
|
153
|
+
explain=explain,
|
|
154
|
+
json_output=json_output,
|
|
155
|
+
verbose=verbose,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
if not result and not json_output:
|
|
159
|
+
sys.exit(1)
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
async def _execute_search(
|
|
163
|
+
display: RichDisplayService,
|
|
164
|
+
cli_config: CliConfig,
|
|
165
|
+
query: str,
|
|
166
|
+
mode: str,
|
|
167
|
+
index_name: str,
|
|
168
|
+
limit: int,
|
|
169
|
+
offset: int,
|
|
170
|
+
account_id: str | None,
|
|
171
|
+
collection_ids: str | None,
|
|
172
|
+
source_ids: str | None,
|
|
173
|
+
min_score: float,
|
|
174
|
+
explain: bool,
|
|
175
|
+
json_output: bool,
|
|
176
|
+
verbose: bool,
|
|
177
|
+
) -> dict[str, Any] | None:
|
|
178
|
+
"""Execute a single search and display results."""
|
|
179
|
+
# Parse filter lists
|
|
180
|
+
collection_list = collection_ids.split(",") if collection_ids else None
|
|
181
|
+
source_list = source_ids.split(",") if source_ids else None
|
|
182
|
+
|
|
183
|
+
# Create OpenSearch client
|
|
184
|
+
http_auth = None
|
|
185
|
+
if cli_config.opensearch_username and cli_config.opensearch_password:
|
|
186
|
+
http_auth = (cli_config.opensearch_username, cli_config.opensearch_password)
|
|
187
|
+
|
|
188
|
+
client = AsyncOpenSearch(
|
|
189
|
+
hosts=[{"host": cli_config.opensearch_host, "port": cli_config.opensearch_port}],
|
|
190
|
+
http_auth=http_auth,
|
|
191
|
+
use_ssl=cli_config.opensearch_use_ssl,
|
|
192
|
+
verify_certs=cli_config.opensearch_verify_certs,
|
|
193
|
+
ssl_show_warn=False,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
try:
|
|
197
|
+
# Create searcher config
|
|
198
|
+
opensearch_config = OpenSearchConfig(
|
|
199
|
+
host=cli_config.opensearch_host,
|
|
200
|
+
port=cli_config.opensearch_port,
|
|
201
|
+
username=cli_config.opensearch_username,
|
|
202
|
+
password=cli_config.opensearch_password,
|
|
203
|
+
use_ssl=cli_config.opensearch_use_ssl,
|
|
204
|
+
verify_certs=cli_config.opensearch_verify_certs,
|
|
205
|
+
model_id=cli_config.opensearch_model_id,
|
|
206
|
+
search_pipeline_name=cli_config.opensearch_search_pipeline_name,
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
searcher = OpenSearchKnowledgeSearcher(client, opensearch_config)
|
|
210
|
+
|
|
211
|
+
# Build search query
|
|
212
|
+
search_query = SearchQuery(
|
|
213
|
+
text=query,
|
|
214
|
+
mode=_get_search_mode(mode),
|
|
215
|
+
limit=limit,
|
|
216
|
+
offset=offset,
|
|
217
|
+
account_id=account_id,
|
|
218
|
+
collection_ids=collection_list,
|
|
219
|
+
source_ids=source_list,
|
|
220
|
+
min_score=min_score,
|
|
221
|
+
explain=explain,
|
|
222
|
+
include_highlights=True,
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
# Execute search
|
|
226
|
+
if not json_output:
|
|
227
|
+
display.header(
|
|
228
|
+
"GnosisLLM Knowledge Search",
|
|
229
|
+
f"Query: {query[:50]}{'...' if len(query) > 50 else ''}",
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
try:
|
|
233
|
+
result = await searcher.search(search_query, index_name)
|
|
234
|
+
except Exception as e:
|
|
235
|
+
if not json_output:
|
|
236
|
+
display.format_error_with_suggestion(
|
|
237
|
+
error=f"Search failed: {e}",
|
|
238
|
+
suggestion="Check that OpenSearch is running and the index exists.",
|
|
239
|
+
command=f"gnosisllm-knowledge load <url> --index {index_name}",
|
|
240
|
+
)
|
|
241
|
+
else:
|
|
242
|
+
print(json.dumps({"error": str(e)}))
|
|
243
|
+
return None
|
|
244
|
+
|
|
245
|
+
# JSON output
|
|
246
|
+
if json_output:
|
|
247
|
+
output = {
|
|
248
|
+
"query": result.query,
|
|
249
|
+
"mode": result.mode.value,
|
|
250
|
+
"total_hits": result.total_hits,
|
|
251
|
+
"duration_ms": result.duration_ms,
|
|
252
|
+
"max_score": result.max_score,
|
|
253
|
+
"results": [
|
|
254
|
+
{
|
|
255
|
+
"id": item.doc_id,
|
|
256
|
+
"title": item.title,
|
|
257
|
+
"content": item.content if verbose else item.content[:300],
|
|
258
|
+
"score": item.score,
|
|
259
|
+
"url": item.url,
|
|
260
|
+
"source": item.source,
|
|
261
|
+
"collection_id": item.collection_id,
|
|
262
|
+
"chunk_index": item.chunk_index,
|
|
263
|
+
"total_chunks": item.total_chunks,
|
|
264
|
+
"highlights": item.highlights,
|
|
265
|
+
}
|
|
266
|
+
for item in result.items
|
|
267
|
+
],
|
|
268
|
+
}
|
|
269
|
+
print(json.dumps(output, indent=2, default=str))
|
|
270
|
+
return output
|
|
271
|
+
|
|
272
|
+
# Human-readable output
|
|
273
|
+
search_results = []
|
|
274
|
+
for i, item in enumerate(result.items, 1):
|
|
275
|
+
content_preview = item.content
|
|
276
|
+
if not verbose and len(content_preview) > 200:
|
|
277
|
+
content_preview = content_preview[:200] + "..."
|
|
278
|
+
|
|
279
|
+
search_results.append(
|
|
280
|
+
SearchResultDisplay(
|
|
281
|
+
rank=i,
|
|
282
|
+
title=item.title or "Untitled",
|
|
283
|
+
content_preview=content_preview,
|
|
284
|
+
score=item.score or 0.0,
|
|
285
|
+
url=item.url,
|
|
286
|
+
collection_id=item.collection_id,
|
|
287
|
+
highlights=item.highlights or [],
|
|
288
|
+
)
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
display.search_results(
|
|
292
|
+
results=search_results,
|
|
293
|
+
query=result.query,
|
|
294
|
+
total_hits=result.total_hits,
|
|
295
|
+
duration_ms=result.duration_ms,
|
|
296
|
+
mode=result.mode.value,
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
# Show tip for different modes
|
|
300
|
+
if mode == "hybrid":
|
|
301
|
+
display.newline()
|
|
302
|
+
display.info(
|
|
303
|
+
"[dim]Tip: Use --mode semantic for meaning-based, --mode keyword for exact match[/dim]"
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
return {"total_hits": result.total_hits, "duration_ms": result.duration_ms}
|
|
307
|
+
|
|
308
|
+
finally:
|
|
309
|
+
await client.close()
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
async def _interactive_search(
|
|
313
|
+
display: RichDisplayService,
|
|
314
|
+
cli_config: CliConfig,
|
|
315
|
+
index_name: str,
|
|
316
|
+
mode: str,
|
|
317
|
+
limit: int,
|
|
318
|
+
account_id: str | None,
|
|
319
|
+
collection_ids: str | None,
|
|
320
|
+
source_ids: str | None,
|
|
321
|
+
min_score: float,
|
|
322
|
+
verbose: bool,
|
|
323
|
+
) -> None:
|
|
324
|
+
"""Run interactive search session."""
|
|
325
|
+
display.header(
|
|
326
|
+
"GnosisLLM Knowledge Search (Interactive)",
|
|
327
|
+
f"Index: {index_name} | Mode: {mode} | Press Ctrl+C to exit",
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
# Parse filter lists
|
|
331
|
+
collection_list = collection_ids.split(",") if collection_ids else None
|
|
332
|
+
source_list = source_ids.split(",") if source_ids else None
|
|
333
|
+
|
|
334
|
+
# Create OpenSearch client
|
|
335
|
+
http_auth = None
|
|
336
|
+
if cli_config.opensearch_username and cli_config.opensearch_password:
|
|
337
|
+
http_auth = (cli_config.opensearch_username, cli_config.opensearch_password)
|
|
338
|
+
|
|
339
|
+
client = AsyncOpenSearch(
|
|
340
|
+
hosts=[{"host": cli_config.opensearch_host, "port": cli_config.opensearch_port}],
|
|
341
|
+
http_auth=http_auth,
|
|
342
|
+
use_ssl=cli_config.opensearch_use_ssl,
|
|
343
|
+
verify_certs=cli_config.opensearch_verify_certs,
|
|
344
|
+
ssl_show_warn=False,
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
try:
|
|
348
|
+
# Create searcher config
|
|
349
|
+
opensearch_config = OpenSearchConfig(
|
|
350
|
+
host=cli_config.opensearch_host,
|
|
351
|
+
port=cli_config.opensearch_port,
|
|
352
|
+
username=cli_config.opensearch_username,
|
|
353
|
+
password=cli_config.opensearch_password,
|
|
354
|
+
use_ssl=cli_config.opensearch_use_ssl,
|
|
355
|
+
verify_certs=cli_config.opensearch_verify_certs,
|
|
356
|
+
model_id=cli_config.opensearch_model_id,
|
|
357
|
+
search_pipeline_name=cli_config.opensearch_search_pipeline_name,
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
searcher = OpenSearchKnowledgeSearcher(client, opensearch_config)
|
|
361
|
+
|
|
362
|
+
last_results: list[Any] = []
|
|
363
|
+
|
|
364
|
+
while True:
|
|
365
|
+
try:
|
|
366
|
+
display.newline()
|
|
367
|
+
user_input = Prompt.ask("[bold cyan]Search[/bold cyan]")
|
|
368
|
+
|
|
369
|
+
if not user_input:
|
|
370
|
+
continue
|
|
371
|
+
|
|
372
|
+
# Check if user wants to view a result
|
|
373
|
+
if user_input.isdigit():
|
|
374
|
+
idx = int(user_input) - 1
|
|
375
|
+
if 0 <= idx < len(last_results):
|
|
376
|
+
item = last_results[idx]
|
|
377
|
+
display.panel(
|
|
378
|
+
f"{item.content}\n\n"
|
|
379
|
+
f"[dim]URL: {item.url or 'N/A'}[/dim]\n"
|
|
380
|
+
f"[dim]Score: {item.score:.2%}[/dim]"
|
|
381
|
+
+ (
|
|
382
|
+
f" | Chunk: {item.chunk_index + 1}/{item.total_chunks}"
|
|
383
|
+
if item.total_chunks
|
|
384
|
+
else ""
|
|
385
|
+
),
|
|
386
|
+
title=item.title or "Document",
|
|
387
|
+
style="info",
|
|
388
|
+
)
|
|
389
|
+
else:
|
|
390
|
+
display.warning(f"Invalid selection. Enter 1-{len(last_results)}.")
|
|
391
|
+
continue
|
|
392
|
+
|
|
393
|
+
# Execute search
|
|
394
|
+
search_query = SearchQuery(
|
|
395
|
+
text=user_input,
|
|
396
|
+
mode=_get_search_mode(mode),
|
|
397
|
+
limit=limit,
|
|
398
|
+
offset=0,
|
|
399
|
+
account_id=account_id,
|
|
400
|
+
collection_ids=collection_list,
|
|
401
|
+
source_ids=source_list,
|
|
402
|
+
min_score=min_score,
|
|
403
|
+
highlight=True,
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
with display.loading_spinner("Searching..."):
|
|
407
|
+
result = await searcher.search(search_query, index_name)
|
|
408
|
+
|
|
409
|
+
last_results = result.items
|
|
410
|
+
|
|
411
|
+
if not result.items:
|
|
412
|
+
display.warning("No results found.")
|
|
413
|
+
continue
|
|
414
|
+
|
|
415
|
+
# Display compact results
|
|
416
|
+
display.info(f"Found {result.total_hits} results in {result.duration_ms:.1f}ms")
|
|
417
|
+
display.newline()
|
|
418
|
+
|
|
419
|
+
for i, item in enumerate(result.items, 1):
|
|
420
|
+
score_pct = (item.score or 0) * 100 if (item.score or 0) <= 1 else item.score
|
|
421
|
+
title = item.title or "Untitled"
|
|
422
|
+
display.console.print(
|
|
423
|
+
f" [cyan]{i}.[/cyan] [bold]{title[:50]}[/bold] "
|
|
424
|
+
f"[dim]({score_pct:.1f}%)[/dim]"
|
|
425
|
+
)
|
|
426
|
+
if item.url:
|
|
427
|
+
display.console.print(f" [blue]{item.url[:60]}[/blue]")
|
|
428
|
+
|
|
429
|
+
display.newline()
|
|
430
|
+
display.info("[dim]Enter number to view full content, or new query[/dim]")
|
|
431
|
+
|
|
432
|
+
except KeyboardInterrupt:
|
|
433
|
+
display.newline()
|
|
434
|
+
display.info("Exiting interactive mode.")
|
|
435
|
+
break
|
|
436
|
+
except Exception as e:
|
|
437
|
+
display.error(f"Search error: {e}")
|
|
438
|
+
|
|
439
|
+
finally:
|
|
440
|
+
await client.close()
|
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
"""Setup command for configuring OpenSearch with neural search.
|
|
2
|
+
|
|
3
|
+
Creates:
|
|
4
|
+
- OpenAI embedding connector
|
|
5
|
+
- Model group and deployed ML model
|
|
6
|
+
- Ingest pipeline for automatic embedding generation
|
|
7
|
+
- Search pipeline for hybrid scoring
|
|
8
|
+
- Knowledge index with k-NN vector mapping
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import sys
|
|
14
|
+
from typing import TYPE_CHECKING
|
|
15
|
+
|
|
16
|
+
from opensearchpy import AsyncOpenSearch
|
|
17
|
+
|
|
18
|
+
from gnosisllm_knowledge.backends.opensearch.config import OpenSearchConfig
|
|
19
|
+
from gnosisllm_knowledge.backends.opensearch.setup import OpenSearchSetupAdapter
|
|
20
|
+
from gnosisllm_knowledge.cli.display.service import RichDisplayService, StepProgress
|
|
21
|
+
from gnosisllm_knowledge.cli.utils.config import CliConfig
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
async def setup_command(
|
|
28
|
+
display: RichDisplayService,
|
|
29
|
+
host: str = "localhost",
|
|
30
|
+
port: int = 9200,
|
|
31
|
+
username: str | None = None,
|
|
32
|
+
password: str | None = None,
|
|
33
|
+
use_ssl: bool = False,
|
|
34
|
+
verify_certs: bool = False,
|
|
35
|
+
force: bool = False,
|
|
36
|
+
no_sample_data: bool = False,
|
|
37
|
+
no_hybrid: bool = False,
|
|
38
|
+
) -> None:
|
|
39
|
+
"""Execute the setup command.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
display: Display service for output.
|
|
43
|
+
host: OpenSearch host.
|
|
44
|
+
port: OpenSearch port.
|
|
45
|
+
username: OpenSearch username.
|
|
46
|
+
password: OpenSearch password.
|
|
47
|
+
use_ssl: Enable SSL.
|
|
48
|
+
verify_certs: Verify SSL certificates.
|
|
49
|
+
force: Clean up existing resources first.
|
|
50
|
+
no_sample_data: Skip sample data ingestion.
|
|
51
|
+
no_hybrid: Skip hybrid search pipeline.
|
|
52
|
+
"""
|
|
53
|
+
# Load configuration
|
|
54
|
+
cli_config = CliConfig.from_env()
|
|
55
|
+
|
|
56
|
+
# Override with CLI arguments
|
|
57
|
+
final_host = host or cli_config.opensearch_host
|
|
58
|
+
final_port = port or cli_config.opensearch_port
|
|
59
|
+
final_username = username or cli_config.opensearch_username
|
|
60
|
+
final_password = password or cli_config.opensearch_password
|
|
61
|
+
|
|
62
|
+
# Validate required config
|
|
63
|
+
if not cli_config.openai_api_key:
|
|
64
|
+
display.format_error_with_suggestion(
|
|
65
|
+
error="OPENAI_API_KEY is required for setup.",
|
|
66
|
+
suggestion="Set the OPENAI_API_KEY environment variable.",
|
|
67
|
+
command="export OPENAI_API_KEY=sk-...",
|
|
68
|
+
)
|
|
69
|
+
sys.exit(1)
|
|
70
|
+
|
|
71
|
+
# Display header
|
|
72
|
+
display.header(
|
|
73
|
+
"GnosisLLM Knowledge Setup",
|
|
74
|
+
f"Configuring OpenSearch at {final_host}:{final_port}",
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# Show configuration
|
|
78
|
+
display.table(
|
|
79
|
+
"Configuration",
|
|
80
|
+
[
|
|
81
|
+
("Host", f"{final_host}:{final_port}"),
|
|
82
|
+
("SSL", "Enabled" if use_ssl else "Disabled"),
|
|
83
|
+
("Auth", "Configured" if final_username else "None"),
|
|
84
|
+
("Hybrid Search", "Disabled" if no_hybrid else "Enabled"),
|
|
85
|
+
("Force Recreate", "Yes" if force else "No"),
|
|
86
|
+
],
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
display.newline()
|
|
90
|
+
|
|
91
|
+
# Create OpenSearch config
|
|
92
|
+
opensearch_config = OpenSearchConfig(
|
|
93
|
+
host=final_host,
|
|
94
|
+
port=final_port,
|
|
95
|
+
username=final_username,
|
|
96
|
+
password=final_password,
|
|
97
|
+
use_ssl=use_ssl,
|
|
98
|
+
verify_certs=verify_certs,
|
|
99
|
+
openai_api_key=cli_config.openai_api_key,
|
|
100
|
+
embedding_model=cli_config.openai_embedding_model,
|
|
101
|
+
embedding_dimension=cli_config.openai_embedding_dimension,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# Create OpenSearch client
|
|
105
|
+
http_auth = None
|
|
106
|
+
if final_username and final_password:
|
|
107
|
+
http_auth = (final_username, final_password)
|
|
108
|
+
|
|
109
|
+
client = AsyncOpenSearch(
|
|
110
|
+
hosts=[{"host": final_host, "port": final_port}],
|
|
111
|
+
http_auth=http_auth,
|
|
112
|
+
use_ssl=use_ssl,
|
|
113
|
+
verify_certs=verify_certs,
|
|
114
|
+
ssl_show_warn=False,
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
try:
|
|
118
|
+
# Health check
|
|
119
|
+
display.info("Checking OpenSearch connection...")
|
|
120
|
+
|
|
121
|
+
adapter = OpenSearchSetupAdapter(client, opensearch_config)
|
|
122
|
+
|
|
123
|
+
if not await adapter.health_check():
|
|
124
|
+
display.format_error_with_suggestion(
|
|
125
|
+
error=f"Cannot connect to OpenSearch at {final_host}:{final_port}",
|
|
126
|
+
suggestion="Ensure OpenSearch is running and accessible.",
|
|
127
|
+
command=f"curl http{'s' if use_ssl else ''}://{final_host}:{final_port}",
|
|
128
|
+
)
|
|
129
|
+
sys.exit(1)
|
|
130
|
+
|
|
131
|
+
# Get cluster info
|
|
132
|
+
try:
|
|
133
|
+
cluster_stats = await adapter.get_cluster_stats()
|
|
134
|
+
display.success(
|
|
135
|
+
f"Connected to OpenSearch {cluster_stats.get('cluster_name', 'cluster')} "
|
|
136
|
+
f"({cluster_stats.get('node_count', 0)} nodes)"
|
|
137
|
+
)
|
|
138
|
+
except Exception:
|
|
139
|
+
display.success("Connected to OpenSearch")
|
|
140
|
+
|
|
141
|
+
display.newline()
|
|
142
|
+
|
|
143
|
+
# Force cleanup if requested
|
|
144
|
+
if force:
|
|
145
|
+
display.warning("Force mode: cleaning up existing resources...")
|
|
146
|
+
cleanup_result = await adapter.cleanup()
|
|
147
|
+
for step in cleanup_result.steps_completed or []:
|
|
148
|
+
display.info(f" {step}")
|
|
149
|
+
display.newline()
|
|
150
|
+
|
|
151
|
+
# Get setup steps
|
|
152
|
+
step_defs = adapter.get_setup_steps()
|
|
153
|
+
|
|
154
|
+
# Filter steps based on options
|
|
155
|
+
if no_hybrid:
|
|
156
|
+
step_defs = [s for s in step_defs if s[0] != "search_pipeline"]
|
|
157
|
+
|
|
158
|
+
# Create progress display
|
|
159
|
+
steps = [StepProgress(name=name, description=desc) for name, desc in step_defs]
|
|
160
|
+
progress = display.progress(steps)
|
|
161
|
+
|
|
162
|
+
# Execute setup
|
|
163
|
+
setup_options = {
|
|
164
|
+
"force_recreate": False, # Already handled above
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
try:
|
|
168
|
+
result = await adapter.setup(**setup_options)
|
|
169
|
+
|
|
170
|
+
# Update progress based on result
|
|
171
|
+
for i, (step_name, _) in enumerate(step_defs):
|
|
172
|
+
# Normalize step name for matching (replace underscores with spaces)
|
|
173
|
+
normalized_name = step_name.replace("_", " ")
|
|
174
|
+
|
|
175
|
+
# Check if step was completed
|
|
176
|
+
step_completed = any(
|
|
177
|
+
normalized_name in completed.lower()
|
|
178
|
+
for completed in (result.steps_completed or [])
|
|
179
|
+
)
|
|
180
|
+
step_error = None
|
|
181
|
+
for error in result.errors or []:
|
|
182
|
+
if normalized_name in error.lower() or step_name in error.lower():
|
|
183
|
+
step_error = error
|
|
184
|
+
break
|
|
185
|
+
|
|
186
|
+
if step_error:
|
|
187
|
+
progress.fail(i, step_error.split(": ")[-1][:40])
|
|
188
|
+
elif step_completed:
|
|
189
|
+
progress.complete(i)
|
|
190
|
+
else:
|
|
191
|
+
progress.skip(i, "Skipped")
|
|
192
|
+
|
|
193
|
+
progress.stop()
|
|
194
|
+
|
|
195
|
+
except Exception as e:
|
|
196
|
+
progress.stop()
|
|
197
|
+
display.format_error_with_suggestion(
|
|
198
|
+
error=f"Setup failed: {e}",
|
|
199
|
+
suggestion="Check OpenSearch logs for more details.",
|
|
200
|
+
)
|
|
201
|
+
sys.exit(1)
|
|
202
|
+
|
|
203
|
+
display.newline()
|
|
204
|
+
|
|
205
|
+
# Display result
|
|
206
|
+
if result.success:
|
|
207
|
+
model_id = adapter.model_id or (result.data or {}).get("model_id")
|
|
208
|
+
|
|
209
|
+
content = f"Model ID: [cyan]{model_id}[/cyan]\n\n"
|
|
210
|
+
content += "Add to your .env file:\n"
|
|
211
|
+
content += f" [green]OPENSEARCH_MODEL_ID={model_id}[/green]"
|
|
212
|
+
|
|
213
|
+
if not no_sample_data:
|
|
214
|
+
content += "\n\nTest your setup:\n"
|
|
215
|
+
content += ' [dim]gnosisllm-knowledge search "test query"[/dim]'
|
|
216
|
+
|
|
217
|
+
display.panel(content, title="Setup Complete", style="success")
|
|
218
|
+
|
|
219
|
+
else:
|
|
220
|
+
error_content = "Setup completed with errors:\n\n"
|
|
221
|
+
for error in result.errors or []:
|
|
222
|
+
error_content += f"[red]• {error}[/red]\n"
|
|
223
|
+
|
|
224
|
+
display.panel(error_content, title="Setup Incomplete", style="warning")
|
|
225
|
+
sys.exit(1)
|
|
226
|
+
|
|
227
|
+
finally:
|
|
228
|
+
await client.close()
|