gnosisllm-knowledge 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. gnosisllm_knowledge/__init__.py +152 -0
  2. gnosisllm_knowledge/api/__init__.py +5 -0
  3. gnosisllm_knowledge/api/knowledge.py +548 -0
  4. gnosisllm_knowledge/backends/__init__.py +26 -0
  5. gnosisllm_knowledge/backends/memory/__init__.py +9 -0
  6. gnosisllm_knowledge/backends/memory/indexer.py +384 -0
  7. gnosisllm_knowledge/backends/memory/searcher.py +516 -0
  8. gnosisllm_knowledge/backends/opensearch/__init__.py +19 -0
  9. gnosisllm_knowledge/backends/opensearch/agentic.py +738 -0
  10. gnosisllm_knowledge/backends/opensearch/config.py +195 -0
  11. gnosisllm_knowledge/backends/opensearch/indexer.py +499 -0
  12. gnosisllm_knowledge/backends/opensearch/mappings.py +255 -0
  13. gnosisllm_knowledge/backends/opensearch/queries.py +445 -0
  14. gnosisllm_knowledge/backends/opensearch/searcher.py +383 -0
  15. gnosisllm_knowledge/backends/opensearch/setup.py +1390 -0
  16. gnosisllm_knowledge/chunking/__init__.py +9 -0
  17. gnosisllm_knowledge/chunking/fixed.py +138 -0
  18. gnosisllm_knowledge/chunking/sentence.py +239 -0
  19. gnosisllm_knowledge/cli/__init__.py +18 -0
  20. gnosisllm_knowledge/cli/app.py +509 -0
  21. gnosisllm_knowledge/cli/commands/__init__.py +7 -0
  22. gnosisllm_knowledge/cli/commands/agentic.py +529 -0
  23. gnosisllm_knowledge/cli/commands/load.py +369 -0
  24. gnosisllm_knowledge/cli/commands/search.py +440 -0
  25. gnosisllm_knowledge/cli/commands/setup.py +228 -0
  26. gnosisllm_knowledge/cli/display/__init__.py +5 -0
  27. gnosisllm_knowledge/cli/display/service.py +555 -0
  28. gnosisllm_knowledge/cli/utils/__init__.py +5 -0
  29. gnosisllm_knowledge/cli/utils/config.py +207 -0
  30. gnosisllm_knowledge/core/__init__.py +87 -0
  31. gnosisllm_knowledge/core/domain/__init__.py +43 -0
  32. gnosisllm_knowledge/core/domain/document.py +240 -0
  33. gnosisllm_knowledge/core/domain/result.py +176 -0
  34. gnosisllm_knowledge/core/domain/search.py +327 -0
  35. gnosisllm_knowledge/core/domain/source.py +139 -0
  36. gnosisllm_knowledge/core/events/__init__.py +23 -0
  37. gnosisllm_knowledge/core/events/emitter.py +216 -0
  38. gnosisllm_knowledge/core/events/types.py +226 -0
  39. gnosisllm_knowledge/core/exceptions.py +407 -0
  40. gnosisllm_knowledge/core/interfaces/__init__.py +20 -0
  41. gnosisllm_knowledge/core/interfaces/agentic.py +136 -0
  42. gnosisllm_knowledge/core/interfaces/chunker.py +64 -0
  43. gnosisllm_knowledge/core/interfaces/fetcher.py +112 -0
  44. gnosisllm_knowledge/core/interfaces/indexer.py +244 -0
  45. gnosisllm_knowledge/core/interfaces/loader.py +102 -0
  46. gnosisllm_knowledge/core/interfaces/searcher.py +178 -0
  47. gnosisllm_knowledge/core/interfaces/setup.py +164 -0
  48. gnosisllm_knowledge/fetchers/__init__.py +12 -0
  49. gnosisllm_knowledge/fetchers/config.py +77 -0
  50. gnosisllm_knowledge/fetchers/http.py +167 -0
  51. gnosisllm_knowledge/fetchers/neoreader.py +204 -0
  52. gnosisllm_knowledge/loaders/__init__.py +13 -0
  53. gnosisllm_knowledge/loaders/base.py +399 -0
  54. gnosisllm_knowledge/loaders/factory.py +202 -0
  55. gnosisllm_knowledge/loaders/sitemap.py +285 -0
  56. gnosisllm_knowledge/loaders/website.py +57 -0
  57. gnosisllm_knowledge/py.typed +0 -0
  58. gnosisllm_knowledge/services/__init__.py +9 -0
  59. gnosisllm_knowledge/services/indexing.py +387 -0
  60. gnosisllm_knowledge/services/search.py +349 -0
  61. gnosisllm_knowledge-0.2.0.dist-info/METADATA +382 -0
  62. gnosisllm_knowledge-0.2.0.dist-info/RECORD +64 -0
  63. gnosisllm_knowledge-0.2.0.dist-info/WHEEL +4 -0
  64. gnosisllm_knowledge-0.2.0.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,509 @@
1
+ """GnosisLLM Knowledge CLI Application.
2
+
3
+ Main entry point assembling all CLI commands with enterprise-grade UX.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import asyncio
9
+ from typing import Annotated, Optional
10
+
11
+ import typer
12
+ from rich.console import Console
13
+
14
+ from gnosisllm_knowledge.cli.display import RichDisplayService
15
+ from gnosisllm_knowledge.cli.utils import CliConfig
16
+
17
+ # Main application
18
+ app = typer.Typer(
19
+ name="gnosisllm-knowledge",
20
+ help="Enterprise-grade knowledge loading, indexing, and semantic search.",
21
+ no_args_is_help=True,
22
+ rich_markup_mode="rich",
23
+ pretty_exceptions_enable=True,
24
+ pretty_exceptions_show_locals=False,
25
+ )
26
+
27
+ # Shared console and display service
28
+ console = Console()
29
+ display = RichDisplayService(console)
30
+
31
+
32
+ def version_callback(value: bool) -> None:
33
+ """Display version and exit."""
34
+ if value:
35
+ from gnosisllm_knowledge import __version__
36
+
37
+ console.print(f"gnosisllm-knowledge [cyan]{__version__}[/cyan]")
38
+ raise typer.Exit()
39
+
40
+
41
+ @app.callback()
42
+ def main_callback(
43
+ version: Annotated[
44
+ Optional[bool],
45
+ typer.Option(
46
+ "--version",
47
+ "-v",
48
+ help="Show version and exit.",
49
+ callback=version_callback,
50
+ is_eager=True,
51
+ ),
52
+ ] = None,
53
+ ) -> None:
54
+ """GnosisLLM Knowledge - Enterprise knowledge management CLI."""
55
+ pass
56
+
57
+
58
+ # ============================================================================
59
+ # SETUP COMMAND
60
+ # ============================================================================
61
+
62
+
63
+ @app.command()
64
+ def setup(
65
+ host: Annotated[
66
+ str,
67
+ typer.Option("--host", "-h", help="OpenSearch host."),
68
+ ] = "localhost",
69
+ port: Annotated[
70
+ int,
71
+ typer.Option("--port", "-p", help="OpenSearch port."),
72
+ ] = 9200,
73
+ username: Annotated[
74
+ Optional[str],
75
+ typer.Option("--username", "-u", help="OpenSearch username."),
76
+ ] = None,
77
+ password: Annotated[
78
+ Optional[str],
79
+ typer.Option("--password", help="OpenSearch password."),
80
+ ] = None,
81
+ use_ssl: Annotated[
82
+ bool,
83
+ typer.Option("--use-ssl", help="Enable SSL."),
84
+ ] = False,
85
+ verify_certs: Annotated[
86
+ bool,
87
+ typer.Option("--verify-certs", help="Verify SSL certificates."),
88
+ ] = False,
89
+ force: Annotated[
90
+ bool,
91
+ typer.Option("--force", "-f", help="Clean up existing resources first."),
92
+ ] = False,
93
+ no_sample_data: Annotated[
94
+ bool,
95
+ typer.Option("--no-sample-data", help="Skip sample data ingestion."),
96
+ ] = False,
97
+ no_hybrid: Annotated[
98
+ bool,
99
+ typer.Option("--no-hybrid", help="Skip hybrid search pipeline."),
100
+ ] = False,
101
+ ) -> None:
102
+ """Configure OpenSearch with ML model for neural search.
103
+
104
+ Sets up the complete neural search infrastructure:
105
+ - OpenAI connector for embeddings
106
+ - Model group and deployed ML model
107
+ - Ingest pipeline for automatic embedding generation
108
+ - Search pipeline for hybrid scoring
109
+ - Knowledge index with k-NN vector mapping
110
+
111
+ [bold]Example:[/bold]
112
+ $ gnosisllm-knowledge setup
113
+ $ gnosisllm-knowledge setup --host opensearch.example.com --port 443 --use-ssl
114
+ $ gnosisllm-knowledge setup --force # Clean and recreate
115
+ """
116
+ from gnosisllm_knowledge.cli.commands.setup import setup_command
117
+
118
+ asyncio.run(
119
+ setup_command(
120
+ display=display,
121
+ host=host,
122
+ port=port,
123
+ username=username,
124
+ password=password,
125
+ use_ssl=use_ssl,
126
+ verify_certs=verify_certs,
127
+ force=force,
128
+ no_sample_data=no_sample_data,
129
+ no_hybrid=no_hybrid,
130
+ )
131
+ )
132
+
133
+
134
+ # ============================================================================
135
+ # LOAD COMMAND
136
+ # ============================================================================
137
+
138
+
139
+ @app.command()
140
+ def load(
141
+ source: Annotated[
142
+ str,
143
+ typer.Argument(help="URL or sitemap to load content from."),
144
+ ],
145
+ source_type: Annotated[
146
+ Optional[str],
147
+ typer.Option(
148
+ "--type",
149
+ "-t",
150
+ help="Source type: website, sitemap (auto-detects if not specified).",
151
+ ),
152
+ ] = None,
153
+ index: Annotated[
154
+ str,
155
+ typer.Option("--index", "-i", help="Target index name."),
156
+ ] = "knowledge",
157
+ account_id: Annotated[
158
+ Optional[str],
159
+ typer.Option("--account-id", "-a", help="Multi-tenant account ID."),
160
+ ] = None,
161
+ collection_id: Annotated[
162
+ Optional[str],
163
+ typer.Option("--collection-id", "-c", help="Collection grouping ID."),
164
+ ] = None,
165
+ source_id: Annotated[
166
+ Optional[str],
167
+ typer.Option("--source-id", "-s", help="Source identifier (defaults to URL)."),
168
+ ] = None,
169
+ batch_size: Annotated[
170
+ int,
171
+ typer.Option("--batch-size", "-b", help="Documents per indexing batch."),
172
+ ] = 100,
173
+ max_urls: Annotated[
174
+ int,
175
+ typer.Option("--max-urls", "-m", help="Maximum URLs to process from sitemap."),
176
+ ] = 1000,
177
+ force: Annotated[
178
+ bool,
179
+ typer.Option("--force", "-f", help="Delete existing source documents first."),
180
+ ] = False,
181
+ dry_run: Annotated[
182
+ bool,
183
+ typer.Option("--dry-run", help="Preview without indexing."),
184
+ ] = False,
185
+ verbose: Annotated[
186
+ bool,
187
+ typer.Option("--verbose", "-V", help="Show per-document progress."),
188
+ ] = False,
189
+ ) -> None:
190
+ """Load and index content from URLs or sitemaps.
191
+
192
+ Fetches content, chunks it for optimal embedding, and indexes
193
+ into OpenSearch with automatic embedding generation.
194
+
195
+ [bold]Example:[/bold]
196
+ $ gnosisllm-knowledge load https://docs.example.com/intro
197
+ $ gnosisllm-knowledge load https://example.com/sitemap.xml --type sitemap
198
+ $ gnosisllm-knowledge load https://docs.example.com/sitemap.xml --max-urls 500
199
+ """
200
+ from gnosisllm_knowledge.cli.commands.load import load_command
201
+
202
+ asyncio.run(
203
+ load_command(
204
+ display=display,
205
+ source=source,
206
+ source_type=source_type,
207
+ index_name=index,
208
+ account_id=account_id,
209
+ collection_id=collection_id,
210
+ source_id=source_id,
211
+ batch_size=batch_size,
212
+ max_urls=max_urls,
213
+ force=force,
214
+ dry_run=dry_run,
215
+ verbose=verbose,
216
+ )
217
+ )
218
+
219
+
220
+ # ============================================================================
221
+ # SEARCH COMMAND
222
+ # ============================================================================
223
+
224
+
225
+ @app.command()
226
+ def search(
227
+ query: Annotated[
228
+ Optional[str],
229
+ typer.Argument(help="Search query text."),
230
+ ] = None,
231
+ mode: Annotated[
232
+ str,
233
+ typer.Option(
234
+ "--mode",
235
+ "-m",
236
+ help="Search mode: semantic, keyword, hybrid (default), agentic.",
237
+ ),
238
+ ] = "hybrid",
239
+ index: Annotated[
240
+ str,
241
+ typer.Option("--index", "-i", help="Index to search."),
242
+ ] = "knowledge",
243
+ limit: Annotated[
244
+ int,
245
+ typer.Option("--limit", "-l", help="Maximum results to return."),
246
+ ] = 5,
247
+ offset: Annotated[
248
+ int,
249
+ typer.Option("--offset", "-o", help="Pagination offset."),
250
+ ] = 0,
251
+ account_id: Annotated[
252
+ Optional[str],
253
+ typer.Option("--account-id", "-a", help="Filter by account ID."),
254
+ ] = None,
255
+ collection_ids: Annotated[
256
+ Optional[str],
257
+ typer.Option("--collection-ids", "-c", help="Filter by collection IDs (comma-separated)."),
258
+ ] = None,
259
+ source_ids: Annotated[
260
+ Optional[str],
261
+ typer.Option("--source-ids", "-s", help="Filter by source IDs (comma-separated)."),
262
+ ] = None,
263
+ min_score: Annotated[
264
+ float,
265
+ typer.Option("--min-score", help="Minimum score threshold (0.0-1.0)."),
266
+ ] = 0.0,
267
+ explain: Annotated[
268
+ bool,
269
+ typer.Option("--explain", "-e", help="Show score explanation."),
270
+ ] = False,
271
+ json_output: Annotated[
272
+ bool,
273
+ typer.Option("--json", "-j", help="Output as JSON for scripting."),
274
+ ] = False,
275
+ interactive: Annotated[
276
+ bool,
277
+ typer.Option("--interactive", "-I", help="Interactive search session."),
278
+ ] = False,
279
+ verbose: Annotated[
280
+ bool,
281
+ typer.Option("--verbose", "-V", help="Show full content (not truncated)."),
282
+ ] = False,
283
+ ) -> None:
284
+ """Search indexed content with semantic, keyword, or hybrid modes.
285
+
286
+ Supports multiple search strategies:
287
+ - [cyan]semantic[/cyan]: Meaning-based vector search using embeddings
288
+ - [cyan]keyword[/cyan]: Traditional BM25 text matching
289
+ - [cyan]hybrid[/cyan]: Combined semantic + keyword (default, best results)
290
+ - [cyan]agentic[/cyan]: AI-powered search with reasoning
291
+
292
+ [bold]Example:[/bold]
293
+ $ gnosisllm-knowledge search "how to configure auth"
294
+ $ gnosisllm-knowledge search "API reference" --mode semantic --limit 10
295
+ $ gnosisllm-knowledge search --interactive
296
+ """
297
+ from gnosisllm_knowledge.cli.commands.search import search_command
298
+
299
+ asyncio.run(
300
+ search_command(
301
+ display=display,
302
+ query=query,
303
+ mode=mode,
304
+ index_name=index,
305
+ limit=limit,
306
+ offset=offset,
307
+ account_id=account_id,
308
+ collection_ids=collection_ids,
309
+ source_ids=source_ids,
310
+ min_score=min_score,
311
+ explain=explain,
312
+ json_output=json_output,
313
+ interactive=interactive,
314
+ verbose=verbose,
315
+ )
316
+ )
317
+
318
+
319
+ # ============================================================================
320
+ # INFO COMMAND
321
+ # ============================================================================
322
+
323
+
324
+ @app.command()
325
+ def info() -> None:
326
+ """Display configuration and environment information.
327
+
328
+ Shows current settings from environment variables and
329
+ validates connectivity to required services.
330
+ """
331
+ config = CliConfig.from_env()
332
+
333
+ display.header("GnosisLLM Knowledge", "Configuration and Environment Info")
334
+
335
+ display.table(
336
+ "OpenSearch Configuration",
337
+ [
338
+ ("Host", f"{config.opensearch_host}:{config.opensearch_port}"),
339
+ ("SSL", "Enabled" if config.opensearch_use_ssl else "Disabled"),
340
+ ("Auth", "Configured" if config.opensearch_username else "None"),
341
+ ("Model ID", config.opensearch_model_id or "[dim]Not set[/dim]"),
342
+ ("Index", config.opensearch_index_name),
343
+ ],
344
+ )
345
+
346
+ display.newline()
347
+
348
+ display.table(
349
+ "Embedding Configuration",
350
+ [
351
+ ("OpenAI Key", "✓ Set" if config.openai_api_key else "✗ Not set"),
352
+ ("Model", config.openai_embedding_model),
353
+ ("Dimension", str(config.openai_embedding_dimension)),
354
+ ],
355
+ )
356
+
357
+ display.newline()
358
+
359
+ display.table(
360
+ "Agentic Search Configuration",
361
+ [
362
+ ("Flow Agent", config.opensearch_flow_agent_id or "[dim]Not set[/dim]"),
363
+ ("Conversational Agent", config.opensearch_conversational_agent_id or "[dim]Not set[/dim]"),
364
+ ("LLM Model", config.agentic_llm_model),
365
+ ],
366
+ )
367
+
368
+ display.newline()
369
+
370
+ display.table(
371
+ "Content Fetching",
372
+ [
373
+ ("Neoreader", config.neoreader_host),
374
+ ],
375
+ )
376
+
377
+ # Validation
378
+ setup_errors = config.validate_for_setup()
379
+ search_errors = config.validate_for_search()
380
+
381
+ if setup_errors or search_errors:
382
+ display.newline()
383
+ display.warning("Configuration Issues:")
384
+ for error in setup_errors + search_errors:
385
+ display.error(f" {error}")
386
+
387
+
388
+ # ============================================================================
389
+ # AGENTIC SUBCOMMAND GROUP
390
+ # ============================================================================
391
+
392
+ agentic_app = typer.Typer(
393
+ name="agentic",
394
+ help="AI-powered agentic search commands.",
395
+ no_args_is_help=True,
396
+ rich_markup_mode="rich",
397
+ )
398
+ app.add_typer(agentic_app, name="agentic")
399
+
400
+
401
+ @agentic_app.command("setup")
402
+ def agentic_setup(
403
+ agent_type: Annotated[
404
+ str,
405
+ typer.Option(
406
+ "--type",
407
+ "-t",
408
+ help="Agent type to setup: flow, conversational, or all (default).",
409
+ ),
410
+ ] = "all",
411
+ force: Annotated[
412
+ bool,
413
+ typer.Option("--force", "-f", help="Force recreate existing agents."),
414
+ ] = False,
415
+ ) -> None:
416
+ """Setup agentic search agents in OpenSearch.
417
+
418
+ Creates and deploys AI agents for intelligent search:
419
+ - [cyan]flow[/cyan]: Fast RAG for single-turn queries
420
+ - [cyan]conversational[/cyan]: Multi-turn with memory support
421
+
422
+ [bold]Example:[/bold]
423
+ $ gnosisllm-knowledge agentic setup
424
+ $ gnosisllm-knowledge agentic setup --type flow
425
+ $ gnosisllm-knowledge agentic setup --force
426
+ """
427
+ from gnosisllm_knowledge.cli.commands.agentic import agentic_setup_command
428
+
429
+ asyncio.run(
430
+ agentic_setup_command(
431
+ display=display,
432
+ agent_type=agent_type,
433
+ force=force,
434
+ )
435
+ )
436
+
437
+
438
+ @agentic_app.command("chat")
439
+ def agentic_chat(
440
+ index: Annotated[
441
+ str,
442
+ typer.Option("--index", "-i", help="Index to search."),
443
+ ] = "knowledge",
444
+ agent_type: Annotated[
445
+ str,
446
+ typer.Option(
447
+ "--type",
448
+ "-t",
449
+ help="Agent type: flow or conversational (default).",
450
+ ),
451
+ ] = "conversational",
452
+ account_id: Annotated[
453
+ Optional[str],
454
+ typer.Option("--account-id", "-a", help="Filter by account ID."),
455
+ ] = None,
456
+ collection_ids: Annotated[
457
+ Optional[str],
458
+ typer.Option("--collection-ids", "-c", help="Filter by collection IDs (comma-separated)."),
459
+ ] = None,
460
+ verbose: Annotated[
461
+ bool,
462
+ typer.Option("--verbose", "-V", help="Show reasoning steps."),
463
+ ] = False,
464
+ ) -> None:
465
+ """Interactive agentic chat session.
466
+
467
+ Start a conversation with the AI-powered knowledge assistant.
468
+ The agent remembers context for multi-turn dialogue.
469
+
470
+ [bold]Example:[/bold]
471
+ $ gnosisllm-knowledge agentic chat
472
+ $ gnosisllm-knowledge agentic chat --type flow
473
+ $ gnosisllm-knowledge agentic chat --verbose
474
+ """
475
+ from gnosisllm_knowledge.cli.commands.agentic import agentic_chat_command
476
+
477
+ asyncio.run(
478
+ agentic_chat_command(
479
+ display=display,
480
+ index_name=index,
481
+ agent_type=agent_type,
482
+ account_id=account_id,
483
+ collection_ids=collection_ids,
484
+ verbose=verbose,
485
+ )
486
+ )
487
+
488
+
489
+ @agentic_app.command("status")
490
+ def agentic_status() -> None:
491
+ """Show agentic search configuration status.
492
+
493
+ Displays configured agents and their health status.
494
+
495
+ [bold]Example:[/bold]
496
+ $ gnosisllm-knowledge agentic status
497
+ """
498
+ from gnosisllm_knowledge.cli.commands.agentic import agentic_status_command
499
+
500
+ asyncio.run(agentic_status_command(display=display))
501
+
502
+
503
+ def main() -> None:
504
+ """CLI entry point."""
505
+ app()
506
+
507
+
508
+ if __name__ == "__main__":
509
+ main()
@@ -0,0 +1,7 @@
1
+ """CLI commands for gnosisllm-knowledge."""
2
+
3
+ from gnosisllm_knowledge.cli.commands.load import load_command
4
+ from gnosisllm_knowledge.cli.commands.search import search_command
5
+ from gnosisllm_knowledge.cli.commands.setup import setup_command
6
+
7
+ __all__ = ["setup_command", "load_command", "search_command"]