gnosisllm-knowledge 0.3.0__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gnosisllm_knowledge/api/knowledge.py +233 -35
- gnosisllm_knowledge/backends/memory/indexer.py +27 -2
- gnosisllm_knowledge/backends/memory/searcher.py +132 -10
- gnosisllm_knowledge/backends/opensearch/agentic.py +14 -9
- gnosisllm_knowledge/backends/opensearch/config.py +7 -0
- gnosisllm_knowledge/backends/opensearch/indexer.py +48 -3
- gnosisllm_knowledge/backends/opensearch/mappings.py +12 -4
- gnosisllm_knowledge/backends/opensearch/queries.py +33 -33
- gnosisllm_knowledge/backends/opensearch/searcher.py +64 -6
- gnosisllm_knowledge/backends/opensearch/setup.py +29 -33
- gnosisllm_knowledge/cli/app.py +58 -19
- gnosisllm_knowledge/cli/commands/agentic.py +15 -9
- gnosisllm_knowledge/cli/commands/load.py +169 -19
- gnosisllm_knowledge/cli/commands/memory.py +10 -0
- gnosisllm_knowledge/cli/commands/search.py +9 -10
- gnosisllm_knowledge/cli/commands/setup.py +25 -1
- gnosisllm_knowledge/cli/utils/config.py +4 -4
- gnosisllm_knowledge/core/domain/__init__.py +13 -0
- gnosisllm_knowledge/core/domain/discovery.py +166 -0
- gnosisllm_knowledge/core/domain/document.py +14 -19
- gnosisllm_knowledge/core/domain/search.py +10 -25
- gnosisllm_knowledge/core/domain/source.py +11 -12
- gnosisllm_knowledge/core/events/__init__.py +8 -0
- gnosisllm_knowledge/core/events/types.py +122 -5
- gnosisllm_knowledge/core/exceptions.py +93 -0
- gnosisllm_knowledge/core/interfaces/agentic.py +11 -3
- gnosisllm_knowledge/core/interfaces/indexer.py +10 -1
- gnosisllm_knowledge/core/interfaces/searcher.py +30 -1
- gnosisllm_knowledge/core/interfaces/streaming.py +10 -4
- gnosisllm_knowledge/fetchers/__init__.py +8 -0
- gnosisllm_knowledge/fetchers/config.py +27 -0
- gnosisllm_knowledge/fetchers/neoreader.py +31 -3
- gnosisllm_knowledge/fetchers/neoreader_discovery.py +505 -0
- gnosisllm_knowledge/loaders/__init__.py +5 -1
- gnosisllm_knowledge/loaders/discovery.py +338 -0
- gnosisllm_knowledge/loaders/discovery_streaming.py +343 -0
- gnosisllm_knowledge/loaders/factory.py +46 -0
- gnosisllm_knowledge/services/indexing.py +51 -21
- gnosisllm_knowledge/services/search.py +42 -28
- gnosisllm_knowledge/services/streaming_pipeline.py +45 -7
- {gnosisllm_knowledge-0.3.0.dist-info → gnosisllm_knowledge-0.4.3.dist-info}/METADATA +30 -10
- gnosisllm_knowledge-0.4.3.dist-info/RECORD +81 -0
- gnosisllm_knowledge-0.3.0.dist-info/RECORD +0 -77
- {gnosisllm_knowledge-0.3.0.dist-info → gnosisllm_knowledge-0.4.3.dist-info}/WHEEL +0 -0
- {gnosisllm_knowledge-0.3.0.dist-info → gnosisllm_knowledge-0.4.3.dist-info}/entry_points.txt +0 -0
|
@@ -249,10 +249,9 @@ class OpenSearchSetupAdapter:
|
|
|
249
249
|
self._model_id = self._config.model_id
|
|
250
250
|
|
|
251
251
|
# Step 4: Create ingest pipeline
|
|
252
|
-
#
|
|
253
|
-
#
|
|
254
|
-
|
|
255
|
-
if self._model_id and is_global_setup:
|
|
252
|
+
# Create pipeline for any setup that has a model deployed
|
|
253
|
+
# Each index_prefix namespace gets its own pipeline
|
|
254
|
+
if self._model_id:
|
|
256
255
|
try:
|
|
257
256
|
await self._create_ingest_pipeline()
|
|
258
257
|
pipeline_name = self._config.ingest_pipeline_name or f"{self._config.index_prefix}-ingest-pipeline"
|
|
@@ -261,35 +260,33 @@ class OpenSearchSetupAdapter:
|
|
|
261
260
|
errors.append(f"Failed to create ingest pipeline: {e}")
|
|
262
261
|
logger.error(f"Failed to create ingest pipeline: {e}")
|
|
263
262
|
|
|
264
|
-
# Step 5: Create search pipeline
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
logger.error(f"Failed to create search pipeline: {e}")
|
|
263
|
+
# Step 5: Create search pipeline for hybrid search
|
|
264
|
+
try:
|
|
265
|
+
await self._create_search_pipeline()
|
|
266
|
+
pipeline_name = self._config.search_pipeline_name or f"{self._config.index_prefix}-search-pipeline"
|
|
267
|
+
steps_completed.append(f"Created search pipeline: {pipeline_name}")
|
|
268
|
+
except Exception as e:
|
|
269
|
+
errors.append(f"Failed to create search pipeline: {e}")
|
|
270
|
+
logger.error(f"Failed to create search pipeline: {e}")
|
|
273
271
|
|
|
274
|
-
# Step 6: Create index template
|
|
275
|
-
# Template covers all
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
template_body = get_index_template(self._config)
|
|
272
|
+
# Step 6: Create index template for this namespace
|
|
273
|
+
# Template covers all {index_prefix}-* indices
|
|
274
|
+
try:
|
|
275
|
+
template_name = f"{self._config.index_prefix}-template"
|
|
276
|
+
template_body = get_index_template(self._config)
|
|
280
277
|
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
278
|
+
# Set default pipeline for auto-index creation within this namespace
|
|
279
|
+
default_pipeline = self._config.ingest_pipeline_name or f"{self._config.index_prefix}-ingest-pipeline"
|
|
280
|
+
template_body["template"]["settings"]["index"]["default_pipeline"] = default_pipeline
|
|
284
281
|
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
282
|
+
await self._client.indices.put_index_template(
|
|
283
|
+
name=template_name,
|
|
284
|
+
body=template_body,
|
|
285
|
+
)
|
|
286
|
+
steps_completed.append(f"Created index template: {template_name}")
|
|
287
|
+
except Exception as e:
|
|
288
|
+
errors.append(f"Failed to create index template: {e}")
|
|
289
|
+
logger.error(f"Failed to create index template: {e}")
|
|
293
290
|
|
|
294
291
|
# Step 7: Create knowledge index
|
|
295
292
|
try:
|
|
@@ -298,9 +295,8 @@ class OpenSearchSetupAdapter:
|
|
|
298
295
|
|
|
299
296
|
if not exists:
|
|
300
297
|
settings = get_knowledge_index_settings(self._config)
|
|
301
|
-
# Add default pipeline
|
|
302
|
-
|
|
303
|
-
pipeline_name = self._config.ingest_pipeline_name or "gnosisllm-ingest-pipeline"
|
|
298
|
+
# Add default pipeline for this namespace
|
|
299
|
+
pipeline_name = self._config.ingest_pipeline_name or f"{self._config.index_prefix}-ingest-pipeline"
|
|
304
300
|
settings["index"]["default_pipeline"] = pipeline_name
|
|
305
301
|
|
|
306
302
|
await self._client.indices.create(
|
gnosisllm_knowledge/cli/app.py
CHANGED
|
@@ -1,6 +1,11 @@
|
|
|
1
1
|
"""GnosisLLM Knowledge CLI Application.
|
|
2
2
|
|
|
3
3
|
Main entry point assembling all CLI commands with enterprise-grade UX.
|
|
4
|
+
|
|
5
|
+
Note:
|
|
6
|
+
This library is tenant-agnostic. Multi-tenancy is achieved through index
|
|
7
|
+
isolation - each tenant should use a separate index (e.g., "knowledge-{account_id}").
|
|
8
|
+
Use --index to target tenant-specific indices.
|
|
4
9
|
"""
|
|
5
10
|
|
|
6
11
|
from __future__ import annotations
|
|
@@ -147,17 +152,13 @@ def load(
|
|
|
147
152
|
typer.Option(
|
|
148
153
|
"--type",
|
|
149
154
|
"-t",
|
|
150
|
-
help="Source type: website, sitemap (auto-detects if not specified).",
|
|
155
|
+
help="Source type: website, sitemap, discovery (auto-detects if not specified).",
|
|
151
156
|
),
|
|
152
157
|
] = None,
|
|
153
158
|
index: Annotated[
|
|
154
159
|
str,
|
|
155
|
-
typer.Option("--index", "-i", help="Target index name."),
|
|
160
|
+
typer.Option("--index", "-i", help="Target index name (use tenant-specific name for multi-tenancy)."),
|
|
156
161
|
] = "knowledge",
|
|
157
|
-
account_id: Annotated[
|
|
158
|
-
Optional[str],
|
|
159
|
-
typer.Option("--account-id", "-a", help="Multi-tenant account ID."),
|
|
160
|
-
] = None,
|
|
161
162
|
collection_id: Annotated[
|
|
162
163
|
Optional[str],
|
|
163
164
|
typer.Option("--collection-id", "-c", help="Collection grouping ID."),
|
|
@@ -186,16 +187,50 @@ def load(
|
|
|
186
187
|
bool,
|
|
187
188
|
typer.Option("--verbose", "-V", help="Show per-document progress."),
|
|
188
189
|
] = False,
|
|
190
|
+
discovery: Annotated[
|
|
191
|
+
bool,
|
|
192
|
+
typer.Option(
|
|
193
|
+
"--discovery",
|
|
194
|
+
"-D",
|
|
195
|
+
help="Use discovery loader to crawl and discover all URLs from the website.",
|
|
196
|
+
),
|
|
197
|
+
] = False,
|
|
198
|
+
max_depth: Annotated[
|
|
199
|
+
int,
|
|
200
|
+
typer.Option("--max-depth", help="Maximum crawl depth for discovery (default: 3)."),
|
|
201
|
+
] = 3,
|
|
202
|
+
max_pages: Annotated[
|
|
203
|
+
int,
|
|
204
|
+
typer.Option("--max-pages", help="Maximum pages to discover (default: 100)."),
|
|
205
|
+
] = 100,
|
|
206
|
+
same_domain: Annotated[
|
|
207
|
+
bool,
|
|
208
|
+
typer.Option(
|
|
209
|
+
"--same-domain/--any-domain",
|
|
210
|
+
help="Only crawl URLs on the same domain (default: same domain only).",
|
|
211
|
+
),
|
|
212
|
+
] = True,
|
|
189
213
|
) -> None:
|
|
190
214
|
"""Load and index content from URLs or sitemaps.
|
|
191
215
|
|
|
192
216
|
Fetches content, chunks it for optimal embedding, and indexes
|
|
193
217
|
into OpenSearch with automatic embedding generation.
|
|
194
218
|
|
|
219
|
+
[bold]Multi-tenancy:[/bold]
|
|
220
|
+
Use --index with tenant-specific index names for isolation
|
|
221
|
+
(e.g., --index knowledge-{account_id}). Each tenant's data
|
|
222
|
+
is stored in a separate index for complete isolation.
|
|
223
|
+
|
|
224
|
+
[bold]Discovery Mode:[/bold]
|
|
225
|
+
Use --discovery to crawl and discover all URLs from a website
|
|
226
|
+
before loading. This is useful for sites without a sitemap.
|
|
227
|
+
|
|
195
228
|
[bold]Example:[/bold]
|
|
196
229
|
$ gnosisllm-knowledge load https://docs.example.com/intro
|
|
197
230
|
$ gnosisllm-knowledge load https://example.com/sitemap.xml --type sitemap
|
|
198
231
|
$ gnosisllm-knowledge load https://docs.example.com/sitemap.xml --max-urls 500
|
|
232
|
+
$ gnosisllm-knowledge load https://docs.example.com --discovery --max-depth 5
|
|
233
|
+
$ gnosisllm-knowledge load https://docs.example.com --index knowledge-tenant-123
|
|
199
234
|
"""
|
|
200
235
|
from gnosisllm_knowledge.cli.commands.load import load_command
|
|
201
236
|
|
|
@@ -205,7 +240,6 @@ def load(
|
|
|
205
240
|
source=source,
|
|
206
241
|
source_type=source_type,
|
|
207
242
|
index_name=index,
|
|
208
|
-
account_id=account_id,
|
|
209
243
|
collection_id=collection_id,
|
|
210
244
|
source_id=source_id,
|
|
211
245
|
batch_size=batch_size,
|
|
@@ -213,6 +247,10 @@ def load(
|
|
|
213
247
|
force=force,
|
|
214
248
|
dry_run=dry_run,
|
|
215
249
|
verbose=verbose,
|
|
250
|
+
discovery=discovery,
|
|
251
|
+
max_depth=max_depth,
|
|
252
|
+
max_pages=max_pages,
|
|
253
|
+
same_domain=same_domain,
|
|
216
254
|
)
|
|
217
255
|
)
|
|
218
256
|
|
|
@@ -238,7 +276,7 @@ def search(
|
|
|
238
276
|
] = "hybrid",
|
|
239
277
|
index: Annotated[
|
|
240
278
|
str,
|
|
241
|
-
typer.Option("--index", "-i", help="Index to search."),
|
|
279
|
+
typer.Option("--index", "-i", help="Index to search (use tenant-specific name for multi-tenancy)."),
|
|
242
280
|
] = "knowledge",
|
|
243
281
|
limit: Annotated[
|
|
244
282
|
int,
|
|
@@ -248,10 +286,6 @@ def search(
|
|
|
248
286
|
int,
|
|
249
287
|
typer.Option("--offset", "-o", help="Pagination offset."),
|
|
250
288
|
] = 0,
|
|
251
|
-
account_id: Annotated[
|
|
252
|
-
Optional[str],
|
|
253
|
-
typer.Option("--account-id", "-a", help="Filter by account ID."),
|
|
254
|
-
] = None,
|
|
255
289
|
collection_ids: Annotated[
|
|
256
290
|
Optional[str],
|
|
257
291
|
typer.Option("--collection-ids", "-c", help="Filter by collection IDs (comma-separated)."),
|
|
@@ -289,10 +323,16 @@ def search(
|
|
|
289
323
|
- [cyan]hybrid[/cyan]: Combined semantic + keyword (default, best results)
|
|
290
324
|
- [cyan]agentic[/cyan]: AI-powered search with reasoning
|
|
291
325
|
|
|
326
|
+
[bold]Multi-tenancy:[/bold]
|
|
327
|
+
Use --index with tenant-specific index names for isolation
|
|
328
|
+
(e.g., --index knowledge-{account_id}). Each tenant's data
|
|
329
|
+
is stored in a separate index for complete isolation.
|
|
330
|
+
|
|
292
331
|
[bold]Example:[/bold]
|
|
293
332
|
$ gnosisllm-knowledge search "how to configure auth"
|
|
294
333
|
$ gnosisllm-knowledge search "API reference" --mode semantic --limit 10
|
|
295
334
|
$ gnosisllm-knowledge search --interactive
|
|
335
|
+
$ gnosisllm-knowledge search "query" --index knowledge-tenant-123
|
|
296
336
|
"""
|
|
297
337
|
from gnosisllm_knowledge.cli.commands.search import search_command
|
|
298
338
|
|
|
@@ -304,7 +344,6 @@ def search(
|
|
|
304
344
|
index_name=index,
|
|
305
345
|
limit=limit,
|
|
306
346
|
offset=offset,
|
|
307
|
-
account_id=account_id,
|
|
308
347
|
collection_ids=collection_ids,
|
|
309
348
|
source_ids=source_ids,
|
|
310
349
|
min_score=min_score,
|
|
@@ -451,7 +490,7 @@ def agentic_setup(
|
|
|
451
490
|
def agentic_chat(
|
|
452
491
|
index: Annotated[
|
|
453
492
|
str,
|
|
454
|
-
typer.Option("--index", "-i", help="Index to search."),
|
|
493
|
+
typer.Option("--index", "-i", help="Index to search (use tenant-specific name for multi-tenancy)."),
|
|
455
494
|
] = "knowledge",
|
|
456
495
|
agent_type: Annotated[
|
|
457
496
|
str,
|
|
@@ -461,10 +500,6 @@ def agentic_chat(
|
|
|
461
500
|
help="Agent type: flow or conversational (default).",
|
|
462
501
|
),
|
|
463
502
|
] = "conversational",
|
|
464
|
-
account_id: Annotated[
|
|
465
|
-
Optional[str],
|
|
466
|
-
typer.Option("--account-id", "-a", help="Filter by account ID."),
|
|
467
|
-
] = None,
|
|
468
503
|
collection_ids: Annotated[
|
|
469
504
|
Optional[str],
|
|
470
505
|
typer.Option("--collection-ids", "-c", help="Filter by collection IDs (comma-separated)."),
|
|
@@ -479,10 +514,15 @@ def agentic_chat(
|
|
|
479
514
|
Start a conversation with the AI-powered knowledge assistant.
|
|
480
515
|
The agent remembers context for multi-turn dialogue.
|
|
481
516
|
|
|
517
|
+
[bold]Multi-tenancy:[/bold]
|
|
518
|
+
Use --index with tenant-specific index names for isolation
|
|
519
|
+
(e.g., --index knowledge-{account_id}).
|
|
520
|
+
|
|
482
521
|
[bold]Example:[/bold]
|
|
483
522
|
$ gnosisllm-knowledge agentic chat
|
|
484
523
|
$ gnosisllm-knowledge agentic chat --type flow
|
|
485
524
|
$ gnosisllm-knowledge agentic chat --verbose
|
|
525
|
+
$ gnosisllm-knowledge agentic chat --index knowledge-tenant-123
|
|
486
526
|
"""
|
|
487
527
|
from gnosisllm_knowledge.cli.commands.agentic import agentic_chat_command
|
|
488
528
|
|
|
@@ -491,7 +531,6 @@ def agentic_chat(
|
|
|
491
531
|
display=display,
|
|
492
532
|
index_name=index,
|
|
493
533
|
agent_type=agent_type,
|
|
494
|
-
account_id=account_id,
|
|
495
534
|
collection_ids=collection_ids,
|
|
496
535
|
verbose=verbose,
|
|
497
536
|
)
|
|
@@ -4,6 +4,10 @@ Commands:
|
|
|
4
4
|
- setup: Configure agents in OpenSearch
|
|
5
5
|
- chat: Interactive agentic chat session
|
|
6
6
|
- status: Show agent configuration status
|
|
7
|
+
|
|
8
|
+
Note:
|
|
9
|
+
This library is tenant-agnostic. Multi-tenancy is achieved through index
|
|
10
|
+
isolation - each tenant should use a separate index (e.g., "knowledge-{account_id}").
|
|
7
11
|
"""
|
|
8
12
|
|
|
9
13
|
from __future__ import annotations
|
|
@@ -202,17 +206,19 @@ async def agentic_chat_command(
|
|
|
202
206
|
display: RichDisplayService,
|
|
203
207
|
index_name: str = "knowledge",
|
|
204
208
|
agent_type: str = "conversational",
|
|
205
|
-
account_id: str | None = None,
|
|
206
209
|
collection_ids: str | None = None,
|
|
207
210
|
verbose: bool = False,
|
|
208
211
|
) -> None:
|
|
209
212
|
"""Interactive agentic chat session.
|
|
210
213
|
|
|
214
|
+
Note:
|
|
215
|
+
Multi-tenancy is achieved through index isolation. Use tenant-specific
|
|
216
|
+
index names instead (e.g., --index knowledge-tenant-123).
|
|
217
|
+
|
|
211
218
|
Args:
|
|
212
219
|
display: Display service for output.
|
|
213
|
-
index_name: Index to search.
|
|
220
|
+
index_name: Index to search (use tenant-specific name for isolation).
|
|
214
221
|
agent_type: Agent type ('flow' or 'conversational').
|
|
215
|
-
account_id: Filter by account ID.
|
|
216
222
|
collection_ids: Filter by collection IDs (comma-separated).
|
|
217
223
|
verbose: Show reasoning steps.
|
|
218
224
|
"""
|
|
@@ -242,7 +248,6 @@ async def agentic_chat_command(
|
|
|
242
248
|
if agent_type == "conversational":
|
|
243
249
|
return await searcher.create_conversation(
|
|
244
250
|
name="CLI Chat Session",
|
|
245
|
-
account_id=account_id,
|
|
246
251
|
)
|
|
247
252
|
return None
|
|
248
253
|
|
|
@@ -291,7 +296,6 @@ async def agentic_chat_command(
|
|
|
291
296
|
agent_type=AgentType.CONVERSATIONAL if agent_type == "conversational" else AgentType.FLOW,
|
|
292
297
|
conversation_id=conversation_id,
|
|
293
298
|
collection_ids=collection_list,
|
|
294
|
-
account_id=account_id,
|
|
295
299
|
include_reasoning=verbose,
|
|
296
300
|
)
|
|
297
301
|
|
|
@@ -395,7 +399,6 @@ async def agentic_search_command(
|
|
|
395
399
|
query: str,
|
|
396
400
|
index_name: str = "knowledge",
|
|
397
401
|
agent_type: str = "flow",
|
|
398
|
-
account_id: str | None = None,
|
|
399
402
|
collection_ids: str | None = None,
|
|
400
403
|
source_ids: str | None = None,
|
|
401
404
|
limit: int = 5,
|
|
@@ -404,12 +407,15 @@ async def agentic_search_command(
|
|
|
404
407
|
) -> dict[str, Any] | None:
|
|
405
408
|
"""Execute agentic search.
|
|
406
409
|
|
|
410
|
+
Note:
|
|
411
|
+
Multi-tenancy is achieved through index isolation. Use tenant-specific
|
|
412
|
+
index names instead (e.g., --index knowledge-tenant-123).
|
|
413
|
+
|
|
407
414
|
Args:
|
|
408
415
|
display: Display service for output.
|
|
409
416
|
query: Search query text.
|
|
410
|
-
index_name: Index to search.
|
|
417
|
+
index_name: Index to search (use tenant-specific name for isolation).
|
|
411
418
|
agent_type: Agent type ('flow' or 'conversational').
|
|
412
|
-
account_id: Filter by account ID.
|
|
413
419
|
collection_ids: Filter by collection IDs (comma-separated).
|
|
414
420
|
source_ids: Filter by source IDs (comma-separated).
|
|
415
421
|
limit: Maximum source documents to retrieve.
|
|
@@ -447,12 +453,12 @@ async def agentic_search_command(
|
|
|
447
453
|
)
|
|
448
454
|
|
|
449
455
|
# Build query
|
|
456
|
+
# Note: account_id is deprecated and ignored - use index isolation instead
|
|
450
457
|
agentic_query = AgenticSearchQuery(
|
|
451
458
|
text=query,
|
|
452
459
|
agent_type=AgentType.CONVERSATIONAL if agent_type == "conversational" else AgentType.FLOW,
|
|
453
460
|
collection_ids=collection_list,
|
|
454
461
|
source_ids=source_list,
|
|
455
|
-
account_id=account_id,
|
|
456
462
|
limit=limit,
|
|
457
463
|
include_reasoning=verbose,
|
|
458
464
|
)
|