gnosisllm-knowledge 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. gnosisllm_knowledge/api/knowledge.py +225 -35
  2. gnosisllm_knowledge/backends/memory/indexer.py +27 -2
  3. gnosisllm_knowledge/backends/memory/searcher.py +111 -10
  4. gnosisllm_knowledge/backends/opensearch/agentic.py +14 -9
  5. gnosisllm_knowledge/backends/opensearch/indexer.py +48 -3
  6. gnosisllm_knowledge/backends/opensearch/mappings.py +12 -4
  7. gnosisllm_knowledge/backends/opensearch/queries.py +33 -33
  8. gnosisllm_knowledge/backends/opensearch/searcher.py +9 -6
  9. gnosisllm_knowledge/cli/app.py +58 -19
  10. gnosisllm_knowledge/cli/commands/agentic.py +15 -9
  11. gnosisllm_knowledge/cli/commands/load.py +169 -19
  12. gnosisllm_knowledge/cli/commands/memory.py +10 -0
  13. gnosisllm_knowledge/cli/commands/search.py +9 -10
  14. gnosisllm_knowledge/cli/commands/setup.py +25 -1
  15. gnosisllm_knowledge/cli/utils/config.py +4 -4
  16. gnosisllm_knowledge/core/domain/__init__.py +13 -0
  17. gnosisllm_knowledge/core/domain/discovery.py +166 -0
  18. gnosisllm_knowledge/core/domain/document.py +14 -19
  19. gnosisllm_knowledge/core/domain/search.py +10 -25
  20. gnosisllm_knowledge/core/domain/source.py +11 -12
  21. gnosisllm_knowledge/core/events/__init__.py +8 -0
  22. gnosisllm_knowledge/core/events/types.py +122 -5
  23. gnosisllm_knowledge/core/exceptions.py +93 -0
  24. gnosisllm_knowledge/core/interfaces/agentic.py +11 -3
  25. gnosisllm_knowledge/core/interfaces/indexer.py +10 -1
  26. gnosisllm_knowledge/core/interfaces/searcher.py +10 -1
  27. gnosisllm_knowledge/core/interfaces/streaming.py +10 -4
  28. gnosisllm_knowledge/fetchers/__init__.py +8 -0
  29. gnosisllm_knowledge/fetchers/config.py +27 -0
  30. gnosisllm_knowledge/fetchers/neoreader.py +31 -3
  31. gnosisllm_knowledge/fetchers/neoreader_discovery.py +505 -0
  32. gnosisllm_knowledge/loaders/__init__.py +5 -1
  33. gnosisllm_knowledge/loaders/discovery.py +338 -0
  34. gnosisllm_knowledge/loaders/discovery_streaming.py +343 -0
  35. gnosisllm_knowledge/loaders/factory.py +46 -0
  36. gnosisllm_knowledge/services/indexing.py +35 -20
  37. gnosisllm_knowledge/services/search.py +37 -20
  38. gnosisllm_knowledge/services/streaming_pipeline.py +39 -7
  39. {gnosisllm_knowledge-0.3.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/METADATA +30 -10
  40. gnosisllm_knowledge-0.4.0.dist-info/RECORD +81 -0
  41. gnosisllm_knowledge-0.3.0.dist-info/RECORD +0 -77
  42. {gnosisllm_knowledge-0.3.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/WHEEL +0 -0
  43. {gnosisllm_knowledge-0.3.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/entry_points.txt +0 -0
@@ -2,6 +2,12 @@
2
2
 
3
3
  Uses OpenSearch ML agents for AI-powered search with reasoning capabilities.
4
4
  Supports flow agents (fast RAG) and conversational agents (multi-turn with memory).
5
+
6
+ Note:
7
+ This module is **tenant-agnostic**. Multi-tenancy is achieved through index isolation:
8
+ each tenant's data resides in a separate OpenSearch index. The caller (e.g., gnosisllm-api)
9
+ is responsible for constructing the appropriate index name (e.g., `knowledge-{account_id}`).
10
+ The library operates on the provided index without any tenant-specific filtering logic.
5
11
  """
6
12
 
7
13
  from __future__ import annotations
@@ -9,7 +15,6 @@ from __future__ import annotations
9
15
  import asyncio
10
16
  import json
11
17
  import logging
12
- import uuid
13
18
  from datetime import UTC, datetime
14
19
  from typing import TYPE_CHECKING, Any
15
20
 
@@ -297,13 +302,15 @@ class OpenSearchAgenticSearcher:
297
302
 
298
303
  async def list_conversations(
299
304
  self,
300
- account_id: str | None = None,
301
305
  limit: int = 100,
302
306
  ) -> list[dict[str, Any]]:
303
307
  """List active conversations.
304
308
 
309
+ Note:
310
+ This library is tenant-agnostic. Multi-tenancy is achieved through
311
+ index isolation (separate index per account).
312
+
305
313
  Args:
306
- account_id: Filter by account (multi-tenant).
307
314
  limit: Maximum number of conversations.
308
315
 
309
316
  Returns:
@@ -311,8 +318,6 @@ class OpenSearchAgenticSearcher:
311
318
  """
312
319
  try:
313
320
  body: dict[str, Any] = {"size": limit}
314
- if account_id:
315
- body["query"] = {"term": {"account_id": account_id}}
316
321
 
317
322
  response = await self._client.transport.perform_request(
318
323
  "POST",
@@ -365,16 +370,18 @@ class OpenSearchAgenticSearcher:
365
370
  async def create_conversation(
366
371
  self,
367
372
  name: str | None = None,
368
- account_id: str | None = None,
369
373
  ) -> str | None:
370
374
  """Create a new conversation memory.
371
375
 
372
376
  Uses the OpenSearch Memory API to create a conversation memory.
373
377
  The endpoint is POST /_plugins/_ml/memory (introduced in 2.12).
374
378
 
379
+ Note:
380
+ This library is tenant-agnostic. Multi-tenancy is achieved through
381
+ index isolation (separate index per account).
382
+
375
383
  Args:
376
384
  name: Optional name for the conversation.
377
- account_id: Optional account ID for multi-tenancy.
378
385
 
379
386
  Returns:
380
387
  The new conversation/memory ID, or None if creation fails.
@@ -382,8 +389,6 @@ class OpenSearchAgenticSearcher:
382
389
  body: dict[str, Any] = {}
383
390
  if name:
384
391
  body["name"] = name
385
- if account_id:
386
- body["account_id"] = account_id
387
392
 
388
393
  try:
389
394
  # POST /_plugins/_ml/memory creates a new memory (OpenSearch 2.12+)
@@ -87,13 +87,15 @@ class OpenSearchIndexer:
87
87
  # Embeddings are generated by OpenSearch ingest pipeline
88
88
  doc_body = self._prepare_document(document)
89
89
 
90
- # Index the document
90
+ # Index the document with ingest pipeline for embedding generation
91
91
  refresh = options.get("refresh", False)
92
+ pipeline = self._config.ingest_pipeline_name
92
93
  await self._client.index(
93
94
  index=index_name,
94
95
  id=document.doc_id,
95
96
  body=doc_body,
96
97
  refresh=refresh,
98
+ pipeline=pipeline,
97
99
  )
98
100
 
99
101
  return IndexResult(
@@ -272,6 +274,43 @@ class OpenSearchIndexer:
272
274
  failed_count=0,
273
275
  )
274
276
 
277
+ async def get(
278
+ self,
279
+ doc_id: str,
280
+ index_name: str,
281
+ ) -> dict[str, Any] | None:
282
+ """Get a document by ID.
283
+
284
+ Uses OpenSearch client's direct get() API (CRUD operation, not search).
285
+
286
+ Args:
287
+ doc_id: Document ID to retrieve.
288
+ index_name: Index name.
289
+
290
+ Returns:
291
+ Document dict (source fields) or None if not found.
292
+ Excludes embeddings from response for efficiency.
293
+ """
294
+ try:
295
+ response = await self._client.get(
296
+ index=index_name,
297
+ id=doc_id,
298
+ _source_excludes=["content_embedding"],
299
+ )
300
+ source = response.get("_source", {})
301
+ # Include the document ID in the response
302
+ source["id"] = response.get("_id", doc_id)
303
+ return source
304
+ except Exception as e:
305
+ if "not_found" in str(e).lower():
306
+ return None
307
+ logger.error(f"Failed to get document {doc_id}: {e}")
308
+ raise IndexError(
309
+ message=f"Failed to get document: {e}",
310
+ details={"document_id": doc_id},
311
+ cause=e,
312
+ ) from e
313
+
275
314
  async def delete(
276
315
  self,
277
316
  doc_id: str,
@@ -434,7 +473,9 @@ class OpenSearchIndexer:
434
473
  if not actions:
435
474
  return IndexResult(success=True, index_name=index_name, indexed_count=0, failed_count=0)
436
475
 
437
- response = await self._client.bulk(body=actions)
476
+ # Use ingest pipeline for embedding generation
477
+ pipeline = self._config.ingest_pipeline_name
478
+ response = await self._client.bulk(body=actions, pipeline=pipeline)
438
479
 
439
480
  indexed = 0
440
481
  failed = 0
@@ -460,6 +501,11 @@ class OpenSearchIndexer:
460
501
  def _prepare_document(self, document: Document) -> dict[str, Any]:
461
502
  """Prepare document for indexing.
462
503
 
504
+ Note:
505
+ This library is tenant-agnostic. Multi-tenancy is achieved through index
506
+ isolation. Tenant information should be passed in document.metadata if
507
+ needed for audit purposes.
508
+
463
509
  Args:
464
510
  document: Document to prepare.
465
511
 
@@ -479,7 +525,6 @@ class OpenSearchIndexer:
479
525
  "url": document.url,
480
526
  "title": document.title,
481
527
  "source": document.source,
482
- "account_id": document.account_id,
483
528
  "collection_id": document.collection_id,
484
529
  "collection_name": document.collection_name,
485
530
  "source_id": document.source_id,
@@ -1,4 +1,10 @@
1
- """OpenSearch index mappings for knowledge documents."""
1
+ """OpenSearch index mappings for knowledge documents.
2
+
3
+ Note:
4
+ This library is tenant-agnostic. Multi-tenancy is achieved through index
5
+ isolation (e.g., `knowledge-{account_id}`). Index mappings do not include
6
+ tenant-specific fields like account_id.
7
+ """
2
8
 
3
9
  from __future__ import annotations
4
10
 
@@ -56,8 +62,7 @@ def get_knowledge_index_mappings(config: OpenSearchConfig) -> dict[str, Any]:
56
62
  "fields": {"keyword": {"type": "keyword", "ignore_above": 512}},
57
63
  },
58
64
  "source": {"type": "keyword"},
59
- # === Multi-tenant Fields ===
60
- "account_id": {"type": "keyword"},
65
+ # === Collection Fields ===
61
66
  "collection_id": {"type": "keyword"},
62
67
  "collection_name": {"type": "keyword"}, # For aggregation display
63
68
  "source_id": {"type": "keyword"},
@@ -129,13 +134,16 @@ def get_memory_index_settings(config: OpenSearchConfig) -> dict[str, Any]:
129
134
  def get_memory_index_mappings() -> dict[str, Any]:
130
135
  """Get index mappings for conversation memory.
131
136
 
137
+ Note:
138
+ This library is tenant-agnostic. Multi-tenancy is achieved through index
139
+ isolation. Use tenant-specific index names for conversation memory.
140
+
132
141
  Returns:
133
142
  Index mappings dictionary.
134
143
  """
135
144
  return {
136
145
  "properties": {
137
146
  "conversation_id": {"type": "keyword"},
138
- "account_id": {"type": "keyword"},
139
147
  "user_id": {"type": "keyword"},
140
148
  "message_index": {"type": "integer"},
141
149
  "role": {"type": "keyword"}, # user, assistant, system
@@ -2,6 +2,10 @@
2
2
 
3
3
  Uses OpenSearch neural search - embeddings are generated automatically
4
4
  via the deployed model. No Python-side embedding generation needed.
5
+
6
+ Note: This module is tenant-agnostic. Multi-tenancy should be handled
7
+ at the API layer by using separate indices per account (e.g.,
8
+ `knowledge-{account_id}`) rather than filtering by account_id.
5
9
  """
6
10
 
7
11
  from __future__ import annotations
@@ -18,9 +22,13 @@ class QueryBuilder:
18
22
  model handles embedding generation automatically via ingest and
19
23
  search pipelines.
20
24
 
25
+ Note:
26
+ This builder is tenant-agnostic. Multi-tenancy should be handled
27
+ by using separate indices per account.
28
+
21
29
  Example:
22
30
  ```python
23
- query = SearchQuery(text="how to configure", account_id="acc123")
31
+ query = SearchQuery(text="how to configure", collection_ids=["col-1"])
24
32
  builder = QueryBuilder(query, model_id="abc123")
25
33
  os_query = builder.build_hybrid_query()
26
34
  ```
@@ -204,12 +212,12 @@ class QueryBuilder:
204
212
  },
205
213
  }
206
214
 
207
- # Apply filters at top level for hybrid
215
+ # Apply filters using post_filter for hybrid queries
216
+ # Hybrid queries cannot be wrapped in bool - they must be top-level
208
217
  filters = self._build_filters()
209
218
  if filters:
210
- query["query"] = {
219
+ query["post_filter"] = {
211
220
  "bool": {
212
- "must": [query["query"]],
213
221
  "filter": filters,
214
222
  }
215
223
  }
@@ -270,15 +278,15 @@ class QueryBuilder:
270
278
  def _build_filters(self) -> list[dict[str, Any]]:
271
279
  """Build filter clauses from query parameters.
272
280
 
281
+ Note:
282
+ This method is tenant-agnostic. Multi-tenancy should be handled
283
+ at the API layer by using separate indices per account.
284
+
273
285
  Returns:
274
- List of filter clauses.
286
+ List of filter clauses for collection, source, and metadata filters.
275
287
  """
276
288
  filters: list[dict[str, Any]] = []
277
289
 
278
- # Multi-tenant filter (required for security)
279
- if self._query.account_id:
280
- filters.append({"term": {"account_id": self._query.account_id}})
281
-
282
290
  # Collection filter
283
291
  if self._query.collection_ids:
284
292
  filters.append({"terms": {"collection_id": self._query.collection_ids}})
@@ -357,67 +365,61 @@ class QueryBuilder:
357
365
  ]
358
366
 
359
367
 
360
- def build_delete_by_source_query(
361
- source_id: str,
362
- account_id: str | None = None,
363
- ) -> dict[str, Any]:
368
+ def build_delete_by_source_query(source_id: str) -> dict[str, Any]:
364
369
  """Build query to delete documents by source.
365
370
 
371
+ Note:
372
+ This function is tenant-agnostic. Multi-tenancy should be handled
373
+ at the API layer by using separate indices per account.
374
+
366
375
  Args:
367
376
  source_id: Source ID to delete.
368
- account_id: Optional account filter for multi-tenancy.
369
377
 
370
378
  Returns:
371
379
  Delete-by-query dictionary.
372
380
  """
373
- filters = [{"term": {"source_id": source_id}}]
374
- if account_id:
375
- filters.append({"term": {"account_id": account_id}})
376
-
377
381
  return {
378
382
  "query": {
379
383
  "bool": {
380
- "filter": filters,
384
+ "filter": [{"term": {"source_id": source_id}}],
381
385
  }
382
386
  }
383
387
  }
384
388
 
385
389
 
386
- def build_delete_by_collection_query(
387
- collection_id: str,
388
- account_id: str | None = None,
389
- ) -> dict[str, Any]:
390
+ def build_delete_by_collection_query(collection_id: str) -> dict[str, Any]:
390
391
  """Build query to delete documents by collection.
391
392
 
393
+ Note:
394
+ This function is tenant-agnostic. Multi-tenancy should be handled
395
+ at the API layer by using separate indices per account.
396
+
392
397
  Args:
393
398
  collection_id: Collection ID to delete.
394
- account_id: Optional account filter for multi-tenancy.
395
399
 
396
400
  Returns:
397
401
  Delete-by-query dictionary.
398
402
  """
399
- filters = [{"term": {"collection_id": collection_id}}]
400
- if account_id:
401
- filters.append({"term": {"account_id": account_id}})
402
-
403
403
  return {
404
404
  "query": {
405
405
  "bool": {
406
- "filter": filters,
406
+ "filter": [{"term": {"collection_id": collection_id}}],
407
407
  }
408
408
  }
409
409
  }
410
410
 
411
411
 
412
412
  def build_count_query(
413
- account_id: str | None = None,
414
413
  collection_id: str | None = None,
415
414
  source_id: str | None = None,
416
415
  ) -> dict[str, Any]:
417
416
  """Build query to count documents.
418
417
 
418
+ Note:
419
+ This function is tenant-agnostic. Multi-tenancy should be handled
420
+ at the API layer by using separate indices per account.
421
+
419
422
  Args:
420
- account_id: Optional account filter.
421
423
  collection_id: Optional collection filter.
422
424
  source_id: Optional source filter.
423
425
 
@@ -426,8 +428,6 @@ def build_count_query(
426
428
  """
427
429
  filters: list[dict[str, Any]] = []
428
430
 
429
- if account_id:
430
- filters.append({"term": {"account_id": account_id}})
431
431
  if collection_id:
432
432
  filters.append({"term": {"collection_id": collection_id}})
433
433
  if source_id:
@@ -2,6 +2,10 @@
2
2
 
3
3
  Uses OpenSearch neural search - embeddings are generated automatically
4
4
  by the deployed ML model. No Python-side embedding generation needed.
5
+
6
+ Note: This module is tenant-agnostic. Multi-tenancy should be handled
7
+ at the API layer by using separate indices per account (e.g.,
8
+ `knowledge-{account_id}`) rather than filtering by account_id.
5
9
  """
6
10
 
7
11
  from __future__ import annotations
@@ -506,7 +510,6 @@ class OpenSearchKnowledgeSearcher:
506
510
  self,
507
511
  index_name: str,
508
512
  *,
509
- account_id: str | None = None,
510
513
  source_id: str | None = None,
511
514
  collection_id: str | None = None,
512
515
  limit: int = 50,
@@ -514,9 +517,12 @@ class OpenSearchKnowledgeSearcher:
514
517
  ) -> dict[str, Any]:
515
518
  """List documents with optional filters.
516
519
 
520
+ Note:
521
+ This method is tenant-agnostic. Multi-tenancy should be handled
522
+ at the API layer by using separate indices per account.
523
+
517
524
  Args:
518
- index_name: Index to query.
519
- account_id: Optional account ID filter.
525
+ index_name: Index to query (use tenant-specific name for isolation).
520
526
  source_id: Optional source ID filter.
521
527
  collection_id: Optional collection ID filter.
522
528
  limit: Maximum documents to return.
@@ -540,9 +546,6 @@ class OpenSearchKnowledgeSearcher:
540
546
  # Build filter clauses
541
547
  filters: list[dict[str, Any]] = []
542
548
 
543
- if account_id:
544
- filters.append({"term": {"account_id": account_id}})
545
-
546
549
  if source_id:
547
550
  filters.append({"term": {"source_id": source_id}})
548
551
 
@@ -1,6 +1,11 @@
1
1
  """GnosisLLM Knowledge CLI Application.
2
2
 
3
3
  Main entry point assembling all CLI commands with enterprise-grade UX.
4
+
5
+ Note:
6
+ This library is tenant-agnostic. Multi-tenancy is achieved through index
7
+ isolation - each tenant should use a separate index (e.g., "knowledge-{account_id}").
8
+ Use --index to target tenant-specific indices.
4
9
  """
5
10
 
6
11
  from __future__ import annotations
@@ -147,17 +152,13 @@ def load(
147
152
  typer.Option(
148
153
  "--type",
149
154
  "-t",
150
- help="Source type: website, sitemap (auto-detects if not specified).",
155
+ help="Source type: website, sitemap, discovery (auto-detects if not specified).",
151
156
  ),
152
157
  ] = None,
153
158
  index: Annotated[
154
159
  str,
155
- typer.Option("--index", "-i", help="Target index name."),
160
+ typer.Option("--index", "-i", help="Target index name (use tenant-specific name for multi-tenancy)."),
156
161
  ] = "knowledge",
157
- account_id: Annotated[
158
- Optional[str],
159
- typer.Option("--account-id", "-a", help="Multi-tenant account ID."),
160
- ] = None,
161
162
  collection_id: Annotated[
162
163
  Optional[str],
163
164
  typer.Option("--collection-id", "-c", help="Collection grouping ID."),
@@ -186,16 +187,50 @@ def load(
186
187
  bool,
187
188
  typer.Option("--verbose", "-V", help="Show per-document progress."),
188
189
  ] = False,
190
+ discovery: Annotated[
191
+ bool,
192
+ typer.Option(
193
+ "--discovery",
194
+ "-D",
195
+ help="Use discovery loader to crawl and discover all URLs from the website.",
196
+ ),
197
+ ] = False,
198
+ max_depth: Annotated[
199
+ int,
200
+ typer.Option("--max-depth", help="Maximum crawl depth for discovery (default: 3)."),
201
+ ] = 3,
202
+ max_pages: Annotated[
203
+ int,
204
+ typer.Option("--max-pages", help="Maximum pages to discover (default: 100)."),
205
+ ] = 100,
206
+ same_domain: Annotated[
207
+ bool,
208
+ typer.Option(
209
+ "--same-domain/--any-domain",
210
+ help="Only crawl URLs on the same domain (default: same domain only).",
211
+ ),
212
+ ] = True,
189
213
  ) -> None:
190
214
  """Load and index content from URLs or sitemaps.
191
215
 
192
216
  Fetches content, chunks it for optimal embedding, and indexes
193
217
  into OpenSearch with automatic embedding generation.
194
218
 
219
+ [bold]Multi-tenancy:[/bold]
220
+ Use --index with tenant-specific index names for isolation
221
+ (e.g., --index knowledge-{account_id}). Each tenant's data
222
+ is stored in a separate index for complete isolation.
223
+
224
+ [bold]Discovery Mode:[/bold]
225
+ Use --discovery to crawl and discover all URLs from a website
226
+ before loading. This is useful for sites without a sitemap.
227
+
195
228
  [bold]Example:[/bold]
196
229
  $ gnosisllm-knowledge load https://docs.example.com/intro
197
230
  $ gnosisllm-knowledge load https://example.com/sitemap.xml --type sitemap
198
231
  $ gnosisllm-knowledge load https://docs.example.com/sitemap.xml --max-urls 500
232
+ $ gnosisllm-knowledge load https://docs.example.com --discovery --max-depth 5
233
+ $ gnosisllm-knowledge load https://docs.example.com --index knowledge-tenant-123
199
234
  """
200
235
  from gnosisllm_knowledge.cli.commands.load import load_command
201
236
 
@@ -205,7 +240,6 @@ def load(
205
240
  source=source,
206
241
  source_type=source_type,
207
242
  index_name=index,
208
- account_id=account_id,
209
243
  collection_id=collection_id,
210
244
  source_id=source_id,
211
245
  batch_size=batch_size,
@@ -213,6 +247,10 @@ def load(
213
247
  force=force,
214
248
  dry_run=dry_run,
215
249
  verbose=verbose,
250
+ discovery=discovery,
251
+ max_depth=max_depth,
252
+ max_pages=max_pages,
253
+ same_domain=same_domain,
216
254
  )
217
255
  )
218
256
 
@@ -238,7 +276,7 @@ def search(
238
276
  ] = "hybrid",
239
277
  index: Annotated[
240
278
  str,
241
- typer.Option("--index", "-i", help="Index to search."),
279
+ typer.Option("--index", "-i", help="Index to search (use tenant-specific name for multi-tenancy)."),
242
280
  ] = "knowledge",
243
281
  limit: Annotated[
244
282
  int,
@@ -248,10 +286,6 @@ def search(
248
286
  int,
249
287
  typer.Option("--offset", "-o", help="Pagination offset."),
250
288
  ] = 0,
251
- account_id: Annotated[
252
- Optional[str],
253
- typer.Option("--account-id", "-a", help="Filter by account ID."),
254
- ] = None,
255
289
  collection_ids: Annotated[
256
290
  Optional[str],
257
291
  typer.Option("--collection-ids", "-c", help="Filter by collection IDs (comma-separated)."),
@@ -289,10 +323,16 @@ def search(
289
323
  - [cyan]hybrid[/cyan]: Combined semantic + keyword (default, best results)
290
324
  - [cyan]agentic[/cyan]: AI-powered search with reasoning
291
325
 
326
+ [bold]Multi-tenancy:[/bold]
327
+ Use --index with tenant-specific index names for isolation
328
+ (e.g., --index knowledge-{account_id}). Each tenant's data
329
+ is stored in a separate index for complete isolation.
330
+
292
331
  [bold]Example:[/bold]
293
332
  $ gnosisllm-knowledge search "how to configure auth"
294
333
  $ gnosisllm-knowledge search "API reference" --mode semantic --limit 10
295
334
  $ gnosisllm-knowledge search --interactive
335
+ $ gnosisllm-knowledge search "query" --index knowledge-tenant-123
296
336
  """
297
337
  from gnosisllm_knowledge.cli.commands.search import search_command
298
338
 
@@ -304,7 +344,6 @@ def search(
304
344
  index_name=index,
305
345
  limit=limit,
306
346
  offset=offset,
307
- account_id=account_id,
308
347
  collection_ids=collection_ids,
309
348
  source_ids=source_ids,
310
349
  min_score=min_score,
@@ -451,7 +490,7 @@ def agentic_setup(
451
490
  def agentic_chat(
452
491
  index: Annotated[
453
492
  str,
454
- typer.Option("--index", "-i", help="Index to search."),
493
+ typer.Option("--index", "-i", help="Index to search (use tenant-specific name for multi-tenancy)."),
455
494
  ] = "knowledge",
456
495
  agent_type: Annotated[
457
496
  str,
@@ -461,10 +500,6 @@ def agentic_chat(
461
500
  help="Agent type: flow or conversational (default).",
462
501
  ),
463
502
  ] = "conversational",
464
- account_id: Annotated[
465
- Optional[str],
466
- typer.Option("--account-id", "-a", help="Filter by account ID."),
467
- ] = None,
468
503
  collection_ids: Annotated[
469
504
  Optional[str],
470
505
  typer.Option("--collection-ids", "-c", help="Filter by collection IDs (comma-separated)."),
@@ -479,10 +514,15 @@ def agentic_chat(
479
514
  Start a conversation with the AI-powered knowledge assistant.
480
515
  The agent remembers context for multi-turn dialogue.
481
516
 
517
+ [bold]Multi-tenancy:[/bold]
518
+ Use --index with tenant-specific index names for isolation
519
+ (e.g., --index knowledge-{account_id}).
520
+
482
521
  [bold]Example:[/bold]
483
522
  $ gnosisllm-knowledge agentic chat
484
523
  $ gnosisllm-knowledge agentic chat --type flow
485
524
  $ gnosisllm-knowledge agentic chat --verbose
525
+ $ gnosisllm-knowledge agentic chat --index knowledge-tenant-123
486
526
  """
487
527
  from gnosisllm_knowledge.cli.commands.agentic import agentic_chat_command
488
528
 
@@ -491,7 +531,6 @@ def agentic_chat(
491
531
  display=display,
492
532
  index_name=index,
493
533
  agent_type=agent_type,
494
- account_id=account_id,
495
534
  collection_ids=collection_ids,
496
535
  verbose=verbose,
497
536
  )
@@ -4,6 +4,10 @@ Commands:
4
4
  - setup: Configure agents in OpenSearch
5
5
  - chat: Interactive agentic chat session
6
6
  - status: Show agent configuration status
7
+
8
+ Note:
9
+ This library is tenant-agnostic. Multi-tenancy is achieved through index
10
+ isolation - each tenant should use a separate index (e.g., "knowledge-{account_id}").
7
11
  """
8
12
 
9
13
  from __future__ import annotations
@@ -202,17 +206,19 @@ async def agentic_chat_command(
202
206
  display: RichDisplayService,
203
207
  index_name: str = "knowledge",
204
208
  agent_type: str = "conversational",
205
- account_id: str | None = None,
206
209
  collection_ids: str | None = None,
207
210
  verbose: bool = False,
208
211
  ) -> None:
209
212
  """Interactive agentic chat session.
210
213
 
214
+ Note:
215
+ Multi-tenancy is achieved through index isolation. Use tenant-specific
216
+ index names instead (e.g., --index knowledge-tenant-123).
217
+
211
218
  Args:
212
219
  display: Display service for output.
213
- index_name: Index to search.
220
+ index_name: Index to search (use tenant-specific name for isolation).
214
221
  agent_type: Agent type ('flow' or 'conversational').
215
- account_id: Filter by account ID.
216
222
  collection_ids: Filter by collection IDs (comma-separated).
217
223
  verbose: Show reasoning steps.
218
224
  """
@@ -242,7 +248,6 @@ async def agentic_chat_command(
242
248
  if agent_type == "conversational":
243
249
  return await searcher.create_conversation(
244
250
  name="CLI Chat Session",
245
- account_id=account_id,
246
251
  )
247
252
  return None
248
253
 
@@ -291,7 +296,6 @@ async def agentic_chat_command(
291
296
  agent_type=AgentType.CONVERSATIONAL if agent_type == "conversational" else AgentType.FLOW,
292
297
  conversation_id=conversation_id,
293
298
  collection_ids=collection_list,
294
- account_id=account_id,
295
299
  include_reasoning=verbose,
296
300
  )
297
301
 
@@ -395,7 +399,6 @@ async def agentic_search_command(
395
399
  query: str,
396
400
  index_name: str = "knowledge",
397
401
  agent_type: str = "flow",
398
- account_id: str | None = None,
399
402
  collection_ids: str | None = None,
400
403
  source_ids: str | None = None,
401
404
  limit: int = 5,
@@ -404,12 +407,15 @@ async def agentic_search_command(
404
407
  ) -> dict[str, Any] | None:
405
408
  """Execute agentic search.
406
409
 
410
+ Note:
411
+ Multi-tenancy is achieved through index isolation. Use tenant-specific
412
+ index names instead (e.g., --index knowledge-tenant-123).
413
+
407
414
  Args:
408
415
  display: Display service for output.
409
416
  query: Search query text.
410
- index_name: Index to search.
417
+ index_name: Index to search (use tenant-specific name for isolation).
411
418
  agent_type: Agent type ('flow' or 'conversational').
412
- account_id: Filter by account ID.
413
419
  collection_ids: Filter by collection IDs (comma-separated).
414
420
  source_ids: Filter by source IDs (comma-separated).
415
421
  limit: Maximum source documents to retrieve.
@@ -447,12 +453,12 @@ async def agentic_search_command(
447
453
  )
448
454
 
449
455
  # Build query
456
+ # Note: account_id is deprecated and ignored - use index isolation instead
450
457
  agentic_query = AgenticSearchQuery(
451
458
  text=query,
452
459
  agent_type=AgentType.CONVERSATIONAL if agent_type == "conversational" else AgentType.FLOW,
453
460
  collection_ids=collection_list,
454
461
  source_ids=source_list,
455
- account_id=account_id,
456
462
  limit=limit,
457
463
  include_reasoning=verbose,
458
464
  )