gnosisllm-knowledge 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gnosisllm_knowledge/api/knowledge.py +225 -35
- gnosisllm_knowledge/backends/memory/indexer.py +27 -2
- gnosisllm_knowledge/backends/memory/searcher.py +111 -10
- gnosisllm_knowledge/backends/opensearch/agentic.py +14 -9
- gnosisllm_knowledge/backends/opensearch/indexer.py +48 -3
- gnosisllm_knowledge/backends/opensearch/mappings.py +12 -4
- gnosisllm_knowledge/backends/opensearch/queries.py +33 -33
- gnosisllm_knowledge/backends/opensearch/searcher.py +9 -6
- gnosisllm_knowledge/cli/app.py +58 -19
- gnosisllm_knowledge/cli/commands/agentic.py +15 -9
- gnosisllm_knowledge/cli/commands/load.py +169 -19
- gnosisllm_knowledge/cli/commands/memory.py +10 -0
- gnosisllm_knowledge/cli/commands/search.py +9 -10
- gnosisllm_knowledge/cli/commands/setup.py +25 -1
- gnosisllm_knowledge/cli/utils/config.py +4 -4
- gnosisllm_knowledge/core/domain/__init__.py +13 -0
- gnosisllm_knowledge/core/domain/discovery.py +166 -0
- gnosisllm_knowledge/core/domain/document.py +14 -19
- gnosisllm_knowledge/core/domain/search.py +10 -25
- gnosisllm_knowledge/core/domain/source.py +11 -12
- gnosisllm_knowledge/core/events/__init__.py +8 -0
- gnosisllm_knowledge/core/events/types.py +122 -5
- gnosisllm_knowledge/core/exceptions.py +93 -0
- gnosisllm_knowledge/core/interfaces/agentic.py +11 -3
- gnosisllm_knowledge/core/interfaces/indexer.py +10 -1
- gnosisllm_knowledge/core/interfaces/searcher.py +10 -1
- gnosisllm_knowledge/core/interfaces/streaming.py +10 -4
- gnosisllm_knowledge/fetchers/__init__.py +8 -0
- gnosisllm_knowledge/fetchers/config.py +27 -0
- gnosisllm_knowledge/fetchers/neoreader.py +31 -3
- gnosisllm_knowledge/fetchers/neoreader_discovery.py +505 -0
- gnosisllm_knowledge/loaders/__init__.py +5 -1
- gnosisllm_knowledge/loaders/discovery.py +338 -0
- gnosisllm_knowledge/loaders/discovery_streaming.py +343 -0
- gnosisllm_knowledge/loaders/factory.py +46 -0
- gnosisllm_knowledge/services/indexing.py +35 -20
- gnosisllm_knowledge/services/search.py +37 -20
- gnosisllm_knowledge/services/streaming_pipeline.py +39 -7
- {gnosisllm_knowledge-0.3.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/METADATA +30 -10
- gnosisllm_knowledge-0.4.0.dist-info/RECORD +81 -0
- gnosisllm_knowledge-0.3.0.dist-info/RECORD +0 -77
- {gnosisllm_knowledge-0.3.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/WHEEL +0 -0
- {gnosisllm_knowledge-0.3.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/entry_points.txt +0 -0
|
@@ -2,6 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
Uses OpenSearch ML agents for AI-powered search with reasoning capabilities.
|
|
4
4
|
Supports flow agents (fast RAG) and conversational agents (multi-turn with memory).
|
|
5
|
+
|
|
6
|
+
Note:
|
|
7
|
+
This module is **tenant-agnostic**. Multi-tenancy is achieved through index isolation:
|
|
8
|
+
each tenant's data resides in a separate OpenSearch index. The caller (e.g., gnosisllm-api)
|
|
9
|
+
is responsible for constructing the appropriate index name (e.g., `knowledge-{account_id}`).
|
|
10
|
+
The library operates on the provided index without any tenant-specific filtering logic.
|
|
5
11
|
"""
|
|
6
12
|
|
|
7
13
|
from __future__ import annotations
|
|
@@ -9,7 +15,6 @@ from __future__ import annotations
|
|
|
9
15
|
import asyncio
|
|
10
16
|
import json
|
|
11
17
|
import logging
|
|
12
|
-
import uuid
|
|
13
18
|
from datetime import UTC, datetime
|
|
14
19
|
from typing import TYPE_CHECKING, Any
|
|
15
20
|
|
|
@@ -297,13 +302,15 @@ class OpenSearchAgenticSearcher:
|
|
|
297
302
|
|
|
298
303
|
async def list_conversations(
|
|
299
304
|
self,
|
|
300
|
-
account_id: str | None = None,
|
|
301
305
|
limit: int = 100,
|
|
302
306
|
) -> list[dict[str, Any]]:
|
|
303
307
|
"""List active conversations.
|
|
304
308
|
|
|
309
|
+
Note:
|
|
310
|
+
This library is tenant-agnostic. Multi-tenancy is achieved through
|
|
311
|
+
index isolation (separate index per account).
|
|
312
|
+
|
|
305
313
|
Args:
|
|
306
|
-
account_id: Filter by account (multi-tenant).
|
|
307
314
|
limit: Maximum number of conversations.
|
|
308
315
|
|
|
309
316
|
Returns:
|
|
@@ -311,8 +318,6 @@ class OpenSearchAgenticSearcher:
|
|
|
311
318
|
"""
|
|
312
319
|
try:
|
|
313
320
|
body: dict[str, Any] = {"size": limit}
|
|
314
|
-
if account_id:
|
|
315
|
-
body["query"] = {"term": {"account_id": account_id}}
|
|
316
321
|
|
|
317
322
|
response = await self._client.transport.perform_request(
|
|
318
323
|
"POST",
|
|
@@ -365,16 +370,18 @@ class OpenSearchAgenticSearcher:
|
|
|
365
370
|
async def create_conversation(
|
|
366
371
|
self,
|
|
367
372
|
name: str | None = None,
|
|
368
|
-
account_id: str | None = None,
|
|
369
373
|
) -> str | None:
|
|
370
374
|
"""Create a new conversation memory.
|
|
371
375
|
|
|
372
376
|
Uses the OpenSearch Memory API to create a conversation memory.
|
|
373
377
|
The endpoint is POST /_plugins/_ml/memory (introduced in 2.12).
|
|
374
378
|
|
|
379
|
+
Note:
|
|
380
|
+
This library is tenant-agnostic. Multi-tenancy is achieved through
|
|
381
|
+
index isolation (separate index per account).
|
|
382
|
+
|
|
375
383
|
Args:
|
|
376
384
|
name: Optional name for the conversation.
|
|
377
|
-
account_id: Optional account ID for multi-tenancy.
|
|
378
385
|
|
|
379
386
|
Returns:
|
|
380
387
|
The new conversation/memory ID, or None if creation fails.
|
|
@@ -382,8 +389,6 @@ class OpenSearchAgenticSearcher:
|
|
|
382
389
|
body: dict[str, Any] = {}
|
|
383
390
|
if name:
|
|
384
391
|
body["name"] = name
|
|
385
|
-
if account_id:
|
|
386
|
-
body["account_id"] = account_id
|
|
387
392
|
|
|
388
393
|
try:
|
|
389
394
|
# POST /_plugins/_ml/memory creates a new memory (OpenSearch 2.12+)
|
|
@@ -87,13 +87,15 @@ class OpenSearchIndexer:
|
|
|
87
87
|
# Embeddings are generated by OpenSearch ingest pipeline
|
|
88
88
|
doc_body = self._prepare_document(document)
|
|
89
89
|
|
|
90
|
-
# Index the document
|
|
90
|
+
# Index the document with ingest pipeline for embedding generation
|
|
91
91
|
refresh = options.get("refresh", False)
|
|
92
|
+
pipeline = self._config.ingest_pipeline_name
|
|
92
93
|
await self._client.index(
|
|
93
94
|
index=index_name,
|
|
94
95
|
id=document.doc_id,
|
|
95
96
|
body=doc_body,
|
|
96
97
|
refresh=refresh,
|
|
98
|
+
pipeline=pipeline,
|
|
97
99
|
)
|
|
98
100
|
|
|
99
101
|
return IndexResult(
|
|
@@ -272,6 +274,43 @@ class OpenSearchIndexer:
|
|
|
272
274
|
failed_count=0,
|
|
273
275
|
)
|
|
274
276
|
|
|
277
|
+
async def get(
|
|
278
|
+
self,
|
|
279
|
+
doc_id: str,
|
|
280
|
+
index_name: str,
|
|
281
|
+
) -> dict[str, Any] | None:
|
|
282
|
+
"""Get a document by ID.
|
|
283
|
+
|
|
284
|
+
Uses OpenSearch client's direct get() API (CRUD operation, not search).
|
|
285
|
+
|
|
286
|
+
Args:
|
|
287
|
+
doc_id: Document ID to retrieve.
|
|
288
|
+
index_name: Index name.
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
Document dict (source fields) or None if not found.
|
|
292
|
+
Excludes embeddings from response for efficiency.
|
|
293
|
+
"""
|
|
294
|
+
try:
|
|
295
|
+
response = await self._client.get(
|
|
296
|
+
index=index_name,
|
|
297
|
+
id=doc_id,
|
|
298
|
+
_source_excludes=["content_embedding"],
|
|
299
|
+
)
|
|
300
|
+
source = response.get("_source", {})
|
|
301
|
+
# Include the document ID in the response
|
|
302
|
+
source["id"] = response.get("_id", doc_id)
|
|
303
|
+
return source
|
|
304
|
+
except Exception as e:
|
|
305
|
+
if "not_found" in str(e).lower():
|
|
306
|
+
return None
|
|
307
|
+
logger.error(f"Failed to get document {doc_id}: {e}")
|
|
308
|
+
raise IndexError(
|
|
309
|
+
message=f"Failed to get document: {e}",
|
|
310
|
+
details={"document_id": doc_id},
|
|
311
|
+
cause=e,
|
|
312
|
+
) from e
|
|
313
|
+
|
|
275
314
|
async def delete(
|
|
276
315
|
self,
|
|
277
316
|
doc_id: str,
|
|
@@ -434,7 +473,9 @@ class OpenSearchIndexer:
|
|
|
434
473
|
if not actions:
|
|
435
474
|
return IndexResult(success=True, index_name=index_name, indexed_count=0, failed_count=0)
|
|
436
475
|
|
|
437
|
-
|
|
476
|
+
# Use ingest pipeline for embedding generation
|
|
477
|
+
pipeline = self._config.ingest_pipeline_name
|
|
478
|
+
response = await self._client.bulk(body=actions, pipeline=pipeline)
|
|
438
479
|
|
|
439
480
|
indexed = 0
|
|
440
481
|
failed = 0
|
|
@@ -460,6 +501,11 @@ class OpenSearchIndexer:
|
|
|
460
501
|
def _prepare_document(self, document: Document) -> dict[str, Any]:
|
|
461
502
|
"""Prepare document for indexing.
|
|
462
503
|
|
|
504
|
+
Note:
|
|
505
|
+
This library is tenant-agnostic. Multi-tenancy is achieved through index
|
|
506
|
+
isolation. Tenant information should be passed in document.metadata if
|
|
507
|
+
needed for audit purposes.
|
|
508
|
+
|
|
463
509
|
Args:
|
|
464
510
|
document: Document to prepare.
|
|
465
511
|
|
|
@@ -479,7 +525,6 @@ class OpenSearchIndexer:
|
|
|
479
525
|
"url": document.url,
|
|
480
526
|
"title": document.title,
|
|
481
527
|
"source": document.source,
|
|
482
|
-
"account_id": document.account_id,
|
|
483
528
|
"collection_id": document.collection_id,
|
|
484
529
|
"collection_name": document.collection_name,
|
|
485
530
|
"source_id": document.source_id,
|
|
@@ -1,4 +1,10 @@
|
|
|
1
|
-
"""OpenSearch index mappings for knowledge documents.
|
|
1
|
+
"""OpenSearch index mappings for knowledge documents.
|
|
2
|
+
|
|
3
|
+
Note:
|
|
4
|
+
This library is tenant-agnostic. Multi-tenancy is achieved through index
|
|
5
|
+
isolation (e.g., `knowledge-{account_id}`). Index mappings do not include
|
|
6
|
+
tenant-specific fields like account_id.
|
|
7
|
+
"""
|
|
2
8
|
|
|
3
9
|
from __future__ import annotations
|
|
4
10
|
|
|
@@ -56,8 +62,7 @@ def get_knowledge_index_mappings(config: OpenSearchConfig) -> dict[str, Any]:
|
|
|
56
62
|
"fields": {"keyword": {"type": "keyword", "ignore_above": 512}},
|
|
57
63
|
},
|
|
58
64
|
"source": {"type": "keyword"},
|
|
59
|
-
# ===
|
|
60
|
-
"account_id": {"type": "keyword"},
|
|
65
|
+
# === Collection Fields ===
|
|
61
66
|
"collection_id": {"type": "keyword"},
|
|
62
67
|
"collection_name": {"type": "keyword"}, # For aggregation display
|
|
63
68
|
"source_id": {"type": "keyword"},
|
|
@@ -129,13 +134,16 @@ def get_memory_index_settings(config: OpenSearchConfig) -> dict[str, Any]:
|
|
|
129
134
|
def get_memory_index_mappings() -> dict[str, Any]:
|
|
130
135
|
"""Get index mappings for conversation memory.
|
|
131
136
|
|
|
137
|
+
Note:
|
|
138
|
+
This library is tenant-agnostic. Multi-tenancy is achieved through index
|
|
139
|
+
isolation. Use tenant-specific index names for conversation memory.
|
|
140
|
+
|
|
132
141
|
Returns:
|
|
133
142
|
Index mappings dictionary.
|
|
134
143
|
"""
|
|
135
144
|
return {
|
|
136
145
|
"properties": {
|
|
137
146
|
"conversation_id": {"type": "keyword"},
|
|
138
|
-
"account_id": {"type": "keyword"},
|
|
139
147
|
"user_id": {"type": "keyword"},
|
|
140
148
|
"message_index": {"type": "integer"},
|
|
141
149
|
"role": {"type": "keyword"}, # user, assistant, system
|
|
@@ -2,6 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
Uses OpenSearch neural search - embeddings are generated automatically
|
|
4
4
|
via the deployed model. No Python-side embedding generation needed.
|
|
5
|
+
|
|
6
|
+
Note: This module is tenant-agnostic. Multi-tenancy should be handled
|
|
7
|
+
at the API layer by using separate indices per account (e.g.,
|
|
8
|
+
`knowledge-{account_id}`) rather than filtering by account_id.
|
|
5
9
|
"""
|
|
6
10
|
|
|
7
11
|
from __future__ import annotations
|
|
@@ -18,9 +22,13 @@ class QueryBuilder:
|
|
|
18
22
|
model handles embedding generation automatically via ingest and
|
|
19
23
|
search pipelines.
|
|
20
24
|
|
|
25
|
+
Note:
|
|
26
|
+
This builder is tenant-agnostic. Multi-tenancy should be handled
|
|
27
|
+
by using separate indices per account.
|
|
28
|
+
|
|
21
29
|
Example:
|
|
22
30
|
```python
|
|
23
|
-
query = SearchQuery(text="how to configure",
|
|
31
|
+
query = SearchQuery(text="how to configure", collection_ids=["col-1"])
|
|
24
32
|
builder = QueryBuilder(query, model_id="abc123")
|
|
25
33
|
os_query = builder.build_hybrid_query()
|
|
26
34
|
```
|
|
@@ -204,12 +212,12 @@ class QueryBuilder:
|
|
|
204
212
|
},
|
|
205
213
|
}
|
|
206
214
|
|
|
207
|
-
# Apply filters
|
|
215
|
+
# Apply filters using post_filter for hybrid queries
|
|
216
|
+
# Hybrid queries cannot be wrapped in bool - they must be top-level
|
|
208
217
|
filters = self._build_filters()
|
|
209
218
|
if filters:
|
|
210
|
-
query["
|
|
219
|
+
query["post_filter"] = {
|
|
211
220
|
"bool": {
|
|
212
|
-
"must": [query["query"]],
|
|
213
221
|
"filter": filters,
|
|
214
222
|
}
|
|
215
223
|
}
|
|
@@ -270,15 +278,15 @@ class QueryBuilder:
|
|
|
270
278
|
def _build_filters(self) -> list[dict[str, Any]]:
|
|
271
279
|
"""Build filter clauses from query parameters.
|
|
272
280
|
|
|
281
|
+
Note:
|
|
282
|
+
This method is tenant-agnostic. Multi-tenancy should be handled
|
|
283
|
+
at the API layer by using separate indices per account.
|
|
284
|
+
|
|
273
285
|
Returns:
|
|
274
|
-
List of filter clauses.
|
|
286
|
+
List of filter clauses for collection, source, and metadata filters.
|
|
275
287
|
"""
|
|
276
288
|
filters: list[dict[str, Any]] = []
|
|
277
289
|
|
|
278
|
-
# Multi-tenant filter (required for security)
|
|
279
|
-
if self._query.account_id:
|
|
280
|
-
filters.append({"term": {"account_id": self._query.account_id}})
|
|
281
|
-
|
|
282
290
|
# Collection filter
|
|
283
291
|
if self._query.collection_ids:
|
|
284
292
|
filters.append({"terms": {"collection_id": self._query.collection_ids}})
|
|
@@ -357,67 +365,61 @@ class QueryBuilder:
|
|
|
357
365
|
]
|
|
358
366
|
|
|
359
367
|
|
|
360
|
-
def build_delete_by_source_query(
|
|
361
|
-
source_id: str,
|
|
362
|
-
account_id: str | None = None,
|
|
363
|
-
) -> dict[str, Any]:
|
|
368
|
+
def build_delete_by_source_query(source_id: str) -> dict[str, Any]:
|
|
364
369
|
"""Build query to delete documents by source.
|
|
365
370
|
|
|
371
|
+
Note:
|
|
372
|
+
This function is tenant-agnostic. Multi-tenancy should be handled
|
|
373
|
+
at the API layer by using separate indices per account.
|
|
374
|
+
|
|
366
375
|
Args:
|
|
367
376
|
source_id: Source ID to delete.
|
|
368
|
-
account_id: Optional account filter for multi-tenancy.
|
|
369
377
|
|
|
370
378
|
Returns:
|
|
371
379
|
Delete-by-query dictionary.
|
|
372
380
|
"""
|
|
373
|
-
filters = [{"term": {"source_id": source_id}}]
|
|
374
|
-
if account_id:
|
|
375
|
-
filters.append({"term": {"account_id": account_id}})
|
|
376
|
-
|
|
377
381
|
return {
|
|
378
382
|
"query": {
|
|
379
383
|
"bool": {
|
|
380
|
-
"filter":
|
|
384
|
+
"filter": [{"term": {"source_id": source_id}}],
|
|
381
385
|
}
|
|
382
386
|
}
|
|
383
387
|
}
|
|
384
388
|
|
|
385
389
|
|
|
386
|
-
def build_delete_by_collection_query(
|
|
387
|
-
collection_id: str,
|
|
388
|
-
account_id: str | None = None,
|
|
389
|
-
) -> dict[str, Any]:
|
|
390
|
+
def build_delete_by_collection_query(collection_id: str) -> dict[str, Any]:
|
|
390
391
|
"""Build query to delete documents by collection.
|
|
391
392
|
|
|
393
|
+
Note:
|
|
394
|
+
This function is tenant-agnostic. Multi-tenancy should be handled
|
|
395
|
+
at the API layer by using separate indices per account.
|
|
396
|
+
|
|
392
397
|
Args:
|
|
393
398
|
collection_id: Collection ID to delete.
|
|
394
|
-
account_id: Optional account filter for multi-tenancy.
|
|
395
399
|
|
|
396
400
|
Returns:
|
|
397
401
|
Delete-by-query dictionary.
|
|
398
402
|
"""
|
|
399
|
-
filters = [{"term": {"collection_id": collection_id}}]
|
|
400
|
-
if account_id:
|
|
401
|
-
filters.append({"term": {"account_id": account_id}})
|
|
402
|
-
|
|
403
403
|
return {
|
|
404
404
|
"query": {
|
|
405
405
|
"bool": {
|
|
406
|
-
"filter":
|
|
406
|
+
"filter": [{"term": {"collection_id": collection_id}}],
|
|
407
407
|
}
|
|
408
408
|
}
|
|
409
409
|
}
|
|
410
410
|
|
|
411
411
|
|
|
412
412
|
def build_count_query(
|
|
413
|
-
account_id: str | None = None,
|
|
414
413
|
collection_id: str | None = None,
|
|
415
414
|
source_id: str | None = None,
|
|
416
415
|
) -> dict[str, Any]:
|
|
417
416
|
"""Build query to count documents.
|
|
418
417
|
|
|
418
|
+
Note:
|
|
419
|
+
This function is tenant-agnostic. Multi-tenancy should be handled
|
|
420
|
+
at the API layer by using separate indices per account.
|
|
421
|
+
|
|
419
422
|
Args:
|
|
420
|
-
account_id: Optional account filter.
|
|
421
423
|
collection_id: Optional collection filter.
|
|
422
424
|
source_id: Optional source filter.
|
|
423
425
|
|
|
@@ -426,8 +428,6 @@ def build_count_query(
|
|
|
426
428
|
"""
|
|
427
429
|
filters: list[dict[str, Any]] = []
|
|
428
430
|
|
|
429
|
-
if account_id:
|
|
430
|
-
filters.append({"term": {"account_id": account_id}})
|
|
431
431
|
if collection_id:
|
|
432
432
|
filters.append({"term": {"collection_id": collection_id}})
|
|
433
433
|
if source_id:
|
|
@@ -2,6 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
Uses OpenSearch neural search - embeddings are generated automatically
|
|
4
4
|
by the deployed ML model. No Python-side embedding generation needed.
|
|
5
|
+
|
|
6
|
+
Note: This module is tenant-agnostic. Multi-tenancy should be handled
|
|
7
|
+
at the API layer by using separate indices per account (e.g.,
|
|
8
|
+
`knowledge-{account_id}`) rather than filtering by account_id.
|
|
5
9
|
"""
|
|
6
10
|
|
|
7
11
|
from __future__ import annotations
|
|
@@ -506,7 +510,6 @@ class OpenSearchKnowledgeSearcher:
|
|
|
506
510
|
self,
|
|
507
511
|
index_name: str,
|
|
508
512
|
*,
|
|
509
|
-
account_id: str | None = None,
|
|
510
513
|
source_id: str | None = None,
|
|
511
514
|
collection_id: str | None = None,
|
|
512
515
|
limit: int = 50,
|
|
@@ -514,9 +517,12 @@ class OpenSearchKnowledgeSearcher:
|
|
|
514
517
|
) -> dict[str, Any]:
|
|
515
518
|
"""List documents with optional filters.
|
|
516
519
|
|
|
520
|
+
Note:
|
|
521
|
+
This method is tenant-agnostic. Multi-tenancy should be handled
|
|
522
|
+
at the API layer by using separate indices per account.
|
|
523
|
+
|
|
517
524
|
Args:
|
|
518
|
-
index_name: Index to query.
|
|
519
|
-
account_id: Optional account ID filter.
|
|
525
|
+
index_name: Index to query (use tenant-specific name for isolation).
|
|
520
526
|
source_id: Optional source ID filter.
|
|
521
527
|
collection_id: Optional collection ID filter.
|
|
522
528
|
limit: Maximum documents to return.
|
|
@@ -540,9 +546,6 @@ class OpenSearchKnowledgeSearcher:
|
|
|
540
546
|
# Build filter clauses
|
|
541
547
|
filters: list[dict[str, Any]] = []
|
|
542
548
|
|
|
543
|
-
if account_id:
|
|
544
|
-
filters.append({"term": {"account_id": account_id}})
|
|
545
|
-
|
|
546
549
|
if source_id:
|
|
547
550
|
filters.append({"term": {"source_id": source_id}})
|
|
548
551
|
|
gnosisllm_knowledge/cli/app.py
CHANGED
|
@@ -1,6 +1,11 @@
|
|
|
1
1
|
"""GnosisLLM Knowledge CLI Application.
|
|
2
2
|
|
|
3
3
|
Main entry point assembling all CLI commands with enterprise-grade UX.
|
|
4
|
+
|
|
5
|
+
Note:
|
|
6
|
+
This library is tenant-agnostic. Multi-tenancy is achieved through index
|
|
7
|
+
isolation - each tenant should use a separate index (e.g., "knowledge-{account_id}").
|
|
8
|
+
Use --index to target tenant-specific indices.
|
|
4
9
|
"""
|
|
5
10
|
|
|
6
11
|
from __future__ import annotations
|
|
@@ -147,17 +152,13 @@ def load(
|
|
|
147
152
|
typer.Option(
|
|
148
153
|
"--type",
|
|
149
154
|
"-t",
|
|
150
|
-
help="Source type: website, sitemap (auto-detects if not specified).",
|
|
155
|
+
help="Source type: website, sitemap, discovery (auto-detects if not specified).",
|
|
151
156
|
),
|
|
152
157
|
] = None,
|
|
153
158
|
index: Annotated[
|
|
154
159
|
str,
|
|
155
|
-
typer.Option("--index", "-i", help="Target index name."),
|
|
160
|
+
typer.Option("--index", "-i", help="Target index name (use tenant-specific name for multi-tenancy)."),
|
|
156
161
|
] = "knowledge",
|
|
157
|
-
account_id: Annotated[
|
|
158
|
-
Optional[str],
|
|
159
|
-
typer.Option("--account-id", "-a", help="Multi-tenant account ID."),
|
|
160
|
-
] = None,
|
|
161
162
|
collection_id: Annotated[
|
|
162
163
|
Optional[str],
|
|
163
164
|
typer.Option("--collection-id", "-c", help="Collection grouping ID."),
|
|
@@ -186,16 +187,50 @@ def load(
|
|
|
186
187
|
bool,
|
|
187
188
|
typer.Option("--verbose", "-V", help="Show per-document progress."),
|
|
188
189
|
] = False,
|
|
190
|
+
discovery: Annotated[
|
|
191
|
+
bool,
|
|
192
|
+
typer.Option(
|
|
193
|
+
"--discovery",
|
|
194
|
+
"-D",
|
|
195
|
+
help="Use discovery loader to crawl and discover all URLs from the website.",
|
|
196
|
+
),
|
|
197
|
+
] = False,
|
|
198
|
+
max_depth: Annotated[
|
|
199
|
+
int,
|
|
200
|
+
typer.Option("--max-depth", help="Maximum crawl depth for discovery (default: 3)."),
|
|
201
|
+
] = 3,
|
|
202
|
+
max_pages: Annotated[
|
|
203
|
+
int,
|
|
204
|
+
typer.Option("--max-pages", help="Maximum pages to discover (default: 100)."),
|
|
205
|
+
] = 100,
|
|
206
|
+
same_domain: Annotated[
|
|
207
|
+
bool,
|
|
208
|
+
typer.Option(
|
|
209
|
+
"--same-domain/--any-domain",
|
|
210
|
+
help="Only crawl URLs on the same domain (default: same domain only).",
|
|
211
|
+
),
|
|
212
|
+
] = True,
|
|
189
213
|
) -> None:
|
|
190
214
|
"""Load and index content from URLs or sitemaps.
|
|
191
215
|
|
|
192
216
|
Fetches content, chunks it for optimal embedding, and indexes
|
|
193
217
|
into OpenSearch with automatic embedding generation.
|
|
194
218
|
|
|
219
|
+
[bold]Multi-tenancy:[/bold]
|
|
220
|
+
Use --index with tenant-specific index names for isolation
|
|
221
|
+
(e.g., --index knowledge-{account_id}). Each tenant's data
|
|
222
|
+
is stored in a separate index for complete isolation.
|
|
223
|
+
|
|
224
|
+
[bold]Discovery Mode:[/bold]
|
|
225
|
+
Use --discovery to crawl and discover all URLs from a website
|
|
226
|
+
before loading. This is useful for sites without a sitemap.
|
|
227
|
+
|
|
195
228
|
[bold]Example:[/bold]
|
|
196
229
|
$ gnosisllm-knowledge load https://docs.example.com/intro
|
|
197
230
|
$ gnosisllm-knowledge load https://example.com/sitemap.xml --type sitemap
|
|
198
231
|
$ gnosisllm-knowledge load https://docs.example.com/sitemap.xml --max-urls 500
|
|
232
|
+
$ gnosisllm-knowledge load https://docs.example.com --discovery --max-depth 5
|
|
233
|
+
$ gnosisllm-knowledge load https://docs.example.com --index knowledge-tenant-123
|
|
199
234
|
"""
|
|
200
235
|
from gnosisllm_knowledge.cli.commands.load import load_command
|
|
201
236
|
|
|
@@ -205,7 +240,6 @@ def load(
|
|
|
205
240
|
source=source,
|
|
206
241
|
source_type=source_type,
|
|
207
242
|
index_name=index,
|
|
208
|
-
account_id=account_id,
|
|
209
243
|
collection_id=collection_id,
|
|
210
244
|
source_id=source_id,
|
|
211
245
|
batch_size=batch_size,
|
|
@@ -213,6 +247,10 @@ def load(
|
|
|
213
247
|
force=force,
|
|
214
248
|
dry_run=dry_run,
|
|
215
249
|
verbose=verbose,
|
|
250
|
+
discovery=discovery,
|
|
251
|
+
max_depth=max_depth,
|
|
252
|
+
max_pages=max_pages,
|
|
253
|
+
same_domain=same_domain,
|
|
216
254
|
)
|
|
217
255
|
)
|
|
218
256
|
|
|
@@ -238,7 +276,7 @@ def search(
|
|
|
238
276
|
] = "hybrid",
|
|
239
277
|
index: Annotated[
|
|
240
278
|
str,
|
|
241
|
-
typer.Option("--index", "-i", help="Index to search."),
|
|
279
|
+
typer.Option("--index", "-i", help="Index to search (use tenant-specific name for multi-tenancy)."),
|
|
242
280
|
] = "knowledge",
|
|
243
281
|
limit: Annotated[
|
|
244
282
|
int,
|
|
@@ -248,10 +286,6 @@ def search(
|
|
|
248
286
|
int,
|
|
249
287
|
typer.Option("--offset", "-o", help="Pagination offset."),
|
|
250
288
|
] = 0,
|
|
251
|
-
account_id: Annotated[
|
|
252
|
-
Optional[str],
|
|
253
|
-
typer.Option("--account-id", "-a", help="Filter by account ID."),
|
|
254
|
-
] = None,
|
|
255
289
|
collection_ids: Annotated[
|
|
256
290
|
Optional[str],
|
|
257
291
|
typer.Option("--collection-ids", "-c", help="Filter by collection IDs (comma-separated)."),
|
|
@@ -289,10 +323,16 @@ def search(
|
|
|
289
323
|
- [cyan]hybrid[/cyan]: Combined semantic + keyword (default, best results)
|
|
290
324
|
- [cyan]agentic[/cyan]: AI-powered search with reasoning
|
|
291
325
|
|
|
326
|
+
[bold]Multi-tenancy:[/bold]
|
|
327
|
+
Use --index with tenant-specific index names for isolation
|
|
328
|
+
(e.g., --index knowledge-{account_id}). Each tenant's data
|
|
329
|
+
is stored in a separate index for complete isolation.
|
|
330
|
+
|
|
292
331
|
[bold]Example:[/bold]
|
|
293
332
|
$ gnosisllm-knowledge search "how to configure auth"
|
|
294
333
|
$ gnosisllm-knowledge search "API reference" --mode semantic --limit 10
|
|
295
334
|
$ gnosisllm-knowledge search --interactive
|
|
335
|
+
$ gnosisllm-knowledge search "query" --index knowledge-tenant-123
|
|
296
336
|
"""
|
|
297
337
|
from gnosisllm_knowledge.cli.commands.search import search_command
|
|
298
338
|
|
|
@@ -304,7 +344,6 @@ def search(
|
|
|
304
344
|
index_name=index,
|
|
305
345
|
limit=limit,
|
|
306
346
|
offset=offset,
|
|
307
|
-
account_id=account_id,
|
|
308
347
|
collection_ids=collection_ids,
|
|
309
348
|
source_ids=source_ids,
|
|
310
349
|
min_score=min_score,
|
|
@@ -451,7 +490,7 @@ def agentic_setup(
|
|
|
451
490
|
def agentic_chat(
|
|
452
491
|
index: Annotated[
|
|
453
492
|
str,
|
|
454
|
-
typer.Option("--index", "-i", help="Index to search."),
|
|
493
|
+
typer.Option("--index", "-i", help="Index to search (use tenant-specific name for multi-tenancy)."),
|
|
455
494
|
] = "knowledge",
|
|
456
495
|
agent_type: Annotated[
|
|
457
496
|
str,
|
|
@@ -461,10 +500,6 @@ def agentic_chat(
|
|
|
461
500
|
help="Agent type: flow or conversational (default).",
|
|
462
501
|
),
|
|
463
502
|
] = "conversational",
|
|
464
|
-
account_id: Annotated[
|
|
465
|
-
Optional[str],
|
|
466
|
-
typer.Option("--account-id", "-a", help="Filter by account ID."),
|
|
467
|
-
] = None,
|
|
468
503
|
collection_ids: Annotated[
|
|
469
504
|
Optional[str],
|
|
470
505
|
typer.Option("--collection-ids", "-c", help="Filter by collection IDs (comma-separated)."),
|
|
@@ -479,10 +514,15 @@ def agentic_chat(
|
|
|
479
514
|
Start a conversation with the AI-powered knowledge assistant.
|
|
480
515
|
The agent remembers context for multi-turn dialogue.
|
|
481
516
|
|
|
517
|
+
[bold]Multi-tenancy:[/bold]
|
|
518
|
+
Use --index with tenant-specific index names for isolation
|
|
519
|
+
(e.g., --index knowledge-{account_id}).
|
|
520
|
+
|
|
482
521
|
[bold]Example:[/bold]
|
|
483
522
|
$ gnosisllm-knowledge agentic chat
|
|
484
523
|
$ gnosisllm-knowledge agentic chat --type flow
|
|
485
524
|
$ gnosisllm-knowledge agentic chat --verbose
|
|
525
|
+
$ gnosisllm-knowledge agentic chat --index knowledge-tenant-123
|
|
486
526
|
"""
|
|
487
527
|
from gnosisllm_knowledge.cli.commands.agentic import agentic_chat_command
|
|
488
528
|
|
|
@@ -491,7 +531,6 @@ def agentic_chat(
|
|
|
491
531
|
display=display,
|
|
492
532
|
index_name=index,
|
|
493
533
|
agent_type=agent_type,
|
|
494
|
-
account_id=account_id,
|
|
495
534
|
collection_ids=collection_ids,
|
|
496
535
|
verbose=verbose,
|
|
497
536
|
)
|
|
@@ -4,6 +4,10 @@ Commands:
|
|
|
4
4
|
- setup: Configure agents in OpenSearch
|
|
5
5
|
- chat: Interactive agentic chat session
|
|
6
6
|
- status: Show agent configuration status
|
|
7
|
+
|
|
8
|
+
Note:
|
|
9
|
+
This library is tenant-agnostic. Multi-tenancy is achieved through index
|
|
10
|
+
isolation - each tenant should use a separate index (e.g., "knowledge-{account_id}").
|
|
7
11
|
"""
|
|
8
12
|
|
|
9
13
|
from __future__ import annotations
|
|
@@ -202,17 +206,19 @@ async def agentic_chat_command(
|
|
|
202
206
|
display: RichDisplayService,
|
|
203
207
|
index_name: str = "knowledge",
|
|
204
208
|
agent_type: str = "conversational",
|
|
205
|
-
account_id: str | None = None,
|
|
206
209
|
collection_ids: str | None = None,
|
|
207
210
|
verbose: bool = False,
|
|
208
211
|
) -> None:
|
|
209
212
|
"""Interactive agentic chat session.
|
|
210
213
|
|
|
214
|
+
Note:
|
|
215
|
+
Multi-tenancy is achieved through index isolation. Use tenant-specific
|
|
216
|
+
index names instead (e.g., --index knowledge-tenant-123).
|
|
217
|
+
|
|
211
218
|
Args:
|
|
212
219
|
display: Display service for output.
|
|
213
|
-
index_name: Index to search.
|
|
220
|
+
index_name: Index to search (use tenant-specific name for isolation).
|
|
214
221
|
agent_type: Agent type ('flow' or 'conversational').
|
|
215
|
-
account_id: Filter by account ID.
|
|
216
222
|
collection_ids: Filter by collection IDs (comma-separated).
|
|
217
223
|
verbose: Show reasoning steps.
|
|
218
224
|
"""
|
|
@@ -242,7 +248,6 @@ async def agentic_chat_command(
|
|
|
242
248
|
if agent_type == "conversational":
|
|
243
249
|
return await searcher.create_conversation(
|
|
244
250
|
name="CLI Chat Session",
|
|
245
|
-
account_id=account_id,
|
|
246
251
|
)
|
|
247
252
|
return None
|
|
248
253
|
|
|
@@ -291,7 +296,6 @@ async def agentic_chat_command(
|
|
|
291
296
|
agent_type=AgentType.CONVERSATIONAL if agent_type == "conversational" else AgentType.FLOW,
|
|
292
297
|
conversation_id=conversation_id,
|
|
293
298
|
collection_ids=collection_list,
|
|
294
|
-
account_id=account_id,
|
|
295
299
|
include_reasoning=verbose,
|
|
296
300
|
)
|
|
297
301
|
|
|
@@ -395,7 +399,6 @@ async def agentic_search_command(
|
|
|
395
399
|
query: str,
|
|
396
400
|
index_name: str = "knowledge",
|
|
397
401
|
agent_type: str = "flow",
|
|
398
|
-
account_id: str | None = None,
|
|
399
402
|
collection_ids: str | None = None,
|
|
400
403
|
source_ids: str | None = None,
|
|
401
404
|
limit: int = 5,
|
|
@@ -404,12 +407,15 @@ async def agentic_search_command(
|
|
|
404
407
|
) -> dict[str, Any] | None:
|
|
405
408
|
"""Execute agentic search.
|
|
406
409
|
|
|
410
|
+
Note:
|
|
411
|
+
Multi-tenancy is achieved through index isolation. Use tenant-specific
|
|
412
|
+
index names instead (e.g., --index knowledge-tenant-123).
|
|
413
|
+
|
|
407
414
|
Args:
|
|
408
415
|
display: Display service for output.
|
|
409
416
|
query: Search query text.
|
|
410
|
-
index_name: Index to search.
|
|
417
|
+
index_name: Index to search (use tenant-specific name for isolation).
|
|
411
418
|
agent_type: Agent type ('flow' or 'conversational').
|
|
412
|
-
account_id: Filter by account ID.
|
|
413
419
|
collection_ids: Filter by collection IDs (comma-separated).
|
|
414
420
|
source_ids: Filter by source IDs (comma-separated).
|
|
415
421
|
limit: Maximum source documents to retrieve.
|
|
@@ -447,12 +453,12 @@ async def agentic_search_command(
|
|
|
447
453
|
)
|
|
448
454
|
|
|
449
455
|
# Build query
|
|
456
|
+
# Note: account_id is deprecated and ignored - use index isolation instead
|
|
450
457
|
agentic_query = AgenticSearchQuery(
|
|
451
458
|
text=query,
|
|
452
459
|
agent_type=AgentType.CONVERSATIONAL if agent_type == "conversational" else AgentType.FLOW,
|
|
453
460
|
collection_ids=collection_list,
|
|
454
461
|
source_ids=source_list,
|
|
455
|
-
account_id=account_id,
|
|
456
462
|
limit=limit,
|
|
457
463
|
include_reasoning=verbose,
|
|
458
464
|
)
|