gnosisllm-knowledge 0.3.0__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/PKG-INFO +30 -10
  2. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/README.md +29 -9
  3. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/pyproject.toml +1 -1
  4. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/api/knowledge.py +225 -35
  5. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/backends/memory/indexer.py +27 -2
  6. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/backends/memory/searcher.py +111 -10
  7. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/backends/opensearch/agentic.py +14 -9
  8. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/backends/opensearch/indexer.py +48 -3
  9. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/backends/opensearch/mappings.py +12 -4
  10. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/backends/opensearch/queries.py +33 -33
  11. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/backends/opensearch/searcher.py +9 -6
  12. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/cli/app.py +58 -19
  13. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/cli/commands/agentic.py +15 -9
  14. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/cli/commands/load.py +169 -19
  15. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/cli/commands/memory.py +10 -0
  16. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/cli/commands/search.py +9 -10
  17. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/cli/commands/setup.py +25 -1
  18. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/cli/utils/config.py +4 -4
  19. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/core/domain/__init__.py +13 -0
  20. gnosisllm_knowledge-0.4.0/src/gnosisllm_knowledge/core/domain/discovery.py +166 -0
  21. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/core/domain/document.py +14 -19
  22. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/core/domain/search.py +10 -25
  23. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/core/domain/source.py +11 -12
  24. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/core/events/__init__.py +8 -0
  25. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/core/events/types.py +122 -5
  26. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/core/exceptions.py +93 -0
  27. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/core/interfaces/agentic.py +11 -3
  28. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/core/interfaces/indexer.py +10 -1
  29. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/core/interfaces/searcher.py +10 -1
  30. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/core/interfaces/streaming.py +10 -4
  31. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/fetchers/__init__.py +8 -0
  32. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/fetchers/config.py +27 -0
  33. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/fetchers/neoreader.py +31 -3
  34. gnosisllm_knowledge-0.4.0/src/gnosisllm_knowledge/fetchers/neoreader_discovery.py +505 -0
  35. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/loaders/__init__.py +5 -1
  36. gnosisllm_knowledge-0.4.0/src/gnosisllm_knowledge/loaders/discovery.py +338 -0
  37. gnosisllm_knowledge-0.4.0/src/gnosisllm_knowledge/loaders/discovery_streaming.py +343 -0
  38. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/loaders/factory.py +46 -0
  39. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/services/indexing.py +35 -20
  40. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/services/search.py +37 -20
  41. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/services/streaming_pipeline.py +39 -7
  42. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/__init__.py +0 -0
  43. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/api/__init__.py +0 -0
  44. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/api/memory.py +0 -0
  45. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/backends/__init__.py +0 -0
  46. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/backends/memory/__init__.py +0 -0
  47. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/backends/opensearch/__init__.py +0 -0
  48. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/backends/opensearch/config.py +0 -0
  49. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/backends/opensearch/memory/__init__.py +0 -0
  50. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/backends/opensearch/memory/client.py +0 -0
  51. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/backends/opensearch/memory/config.py +0 -0
  52. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/backends/opensearch/memory/setup.py +0 -0
  53. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/backends/opensearch/setup.py +0 -0
  54. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/chunking/__init__.py +0 -0
  55. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/chunking/fixed.py +0 -0
  56. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/chunking/sentence.py +0 -0
  57. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/cli/__init__.py +0 -0
  58. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/cli/commands/__init__.py +0 -0
  59. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/cli/display/__init__.py +0 -0
  60. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/cli/display/service.py +0 -0
  61. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/cli/utils/__init__.py +0 -0
  62. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/core/__init__.py +0 -0
  63. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/core/domain/memory.py +0 -0
  64. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/core/domain/result.py +0 -0
  65. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/core/events/emitter.py +0 -0
  66. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/core/interfaces/__init__.py +0 -0
  67. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/core/interfaces/chunker.py +0 -0
  68. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/core/interfaces/fetcher.py +0 -0
  69. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/core/interfaces/loader.py +0 -0
  70. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/core/interfaces/memory.py +0 -0
  71. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/core/interfaces/setup.py +0 -0
  72. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/core/streaming/__init__.py +0 -0
  73. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/core/streaming/pipeline.py +0 -0
  74. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/fetchers/http.py +0 -0
  75. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/loaders/base.py +0 -0
  76. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/loaders/sitemap.py +0 -0
  77. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/loaders/sitemap_streaming.py +0 -0
  78. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/loaders/website.py +0 -0
  79. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/py.typed +0 -0
  80. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.0}/src/gnosisllm_knowledge/services/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gnosisllm-knowledge
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Enterprise-grade knowledge loading, indexing, and search for Python
5
5
  License: MIT
6
6
  Keywords: knowledge-base,rag,semantic-search,vector-search,opensearch,llm,embeddings,enterprise
@@ -46,7 +46,7 @@ Enterprise-grade knowledge loading, indexing, and semantic search library for Py
46
46
  - **Multiple Loaders**: Load content from websites, sitemaps, and files
47
47
  - **Intelligent Chunking**: Sentence-aware text splitting with configurable overlap
48
48
  - **OpenSearch Backend**: Production-ready with k-NN vector search
49
- - **Multi-Tenancy**: Built-in support for account and collection isolation
49
+ - **Multi-Tenancy**: Index isolation for complete tenant separation (tenant-agnostic library)
50
50
  - **Event-Driven**: Observer pattern for progress tracking and monitoring
51
51
  - **SOLID Architecture**: Clean, maintainable, and extensible codebase
52
52
 
@@ -144,14 +144,15 @@ gnosisllm-knowledge load <URL> [OPTIONS]
144
144
 
145
145
  Options:
146
146
  --type Source type: website, sitemap (auto-detects)
147
- --index Target index name (default: knowledge)
148
- --account-id Multi-tenant account ID
147
+ --index Target index name (e.g., knowledge-tenant-123)
149
148
  --collection-id Collection grouping ID
150
149
  --batch-size Documents per batch (default: 100)
151
150
  --max-urls Max URLs from sitemap (default: 1000)
152
151
  --dry-run Preview without indexing
153
152
  ```
154
153
 
154
+ Multi-tenancy is achieved through index isolation. Use `--index` with tenant-specific names (e.g., `--index knowledge-tenant-123`).
155
+
155
156
  ### Search
156
157
 
157
158
  Search indexed content with multiple modes:
@@ -161,14 +162,15 @@ gnosisllm-knowledge search <QUERY> [OPTIONS]
161
162
 
162
163
  Options:
163
164
  --mode Search mode: semantic, keyword, hybrid, agentic
164
- --index Index to search (default: knowledge)
165
+ --index Index to search (e.g., knowledge-tenant-123)
165
166
  --limit Max results (default: 5)
166
- --account-id Filter by account
167
167
  --collection-ids Filter by collections (comma-separated)
168
168
  --json Output as JSON for scripting
169
169
  --interactive Interactive search session
170
170
  ```
171
171
 
172
+ Multi-tenancy is achieved through index isolation. Use `--index` with tenant-specific names.
173
+
172
174
  ## Architecture
173
175
 
174
176
  ```
@@ -319,22 +321,40 @@ agent_body = {
319
321
 
320
322
  ## Multi-Tenancy
321
323
 
324
+ This library is **tenant-agnostic**. Multi-tenancy is achieved through **index isolation** - each tenant gets their own OpenSearch index.
325
+
322
326
  ```python
323
- # Load with tenant isolation
327
+ # The calling application (e.g., API) constructs tenant-specific index names
328
+ index_name = f"knowledge-{account_id}"
329
+
330
+ # Create Knowledge instance for the tenant
331
+ knowledge = Knowledge.from_opensearch(
332
+ host="localhost",
333
+ port=9200,
334
+ index_prefix=index_name, # knowledge-tenant-123
335
+ )
336
+
337
+ # Load content to tenant's isolated index
324
338
  await knowledge.load(
325
339
  source="https://docs.example.com/sitemap.xml",
326
- account_id="tenant-123",
327
340
  collection_id="docs",
328
341
  )
329
342
 
330
- # Search within tenant
343
+ # Search within tenant's index (no account_id filter needed)
331
344
  results = await knowledge.search(
332
345
  "query",
333
- account_id="tenant-123",
334
346
  collection_ids=["docs"],
335
347
  )
336
348
  ```
337
349
 
350
+ **Note**: For audit purposes, you can store `account_id` in document metadata:
351
+ ```python
352
+ await knowledge.load(
353
+ source="https://docs.example.com/sitemap.xml",
354
+ document_defaults={"metadata": {"account_id": "tenant-123"}},
355
+ )
356
+ ```
357
+
338
358
  ## Agentic Memory
339
359
 
340
360
  Conversational memory with automatic fact extraction using OpenSearch's ML Memory plugin.
@@ -11,7 +11,7 @@ Enterprise-grade knowledge loading, indexing, and semantic search library for Py
11
11
  - **Multiple Loaders**: Load content from websites, sitemaps, and files
12
12
  - **Intelligent Chunking**: Sentence-aware text splitting with configurable overlap
13
13
  - **OpenSearch Backend**: Production-ready with k-NN vector search
14
- - **Multi-Tenancy**: Built-in support for account and collection isolation
14
+ - **Multi-Tenancy**: Index isolation for complete tenant separation (tenant-agnostic library)
15
15
  - **Event-Driven**: Observer pattern for progress tracking and monitoring
16
16
  - **SOLID Architecture**: Clean, maintainable, and extensible codebase
17
17
 
@@ -109,14 +109,15 @@ gnosisllm-knowledge load <URL> [OPTIONS]
109
109
 
110
110
  Options:
111
111
  --type Source type: website, sitemap (auto-detects)
112
- --index Target index name (default: knowledge)
113
- --account-id Multi-tenant account ID
112
+ --index Target index name (e.g., knowledge-tenant-123)
114
113
  --collection-id Collection grouping ID
115
114
  --batch-size Documents per batch (default: 100)
116
115
  --max-urls Max URLs from sitemap (default: 1000)
117
116
  --dry-run Preview without indexing
118
117
  ```
119
118
 
119
+ Multi-tenancy is achieved through index isolation. Use `--index` with tenant-specific names (e.g., `--index knowledge-tenant-123`).
120
+
120
121
  ### Search
121
122
 
122
123
  Search indexed content with multiple modes:
@@ -126,14 +127,15 @@ gnosisllm-knowledge search <QUERY> [OPTIONS]
126
127
 
127
128
  Options:
128
129
  --mode Search mode: semantic, keyword, hybrid, agentic
129
- --index Index to search (default: knowledge)
130
+ --index Index to search (e.g., knowledge-tenant-123)
130
131
  --limit Max results (default: 5)
131
- --account-id Filter by account
132
132
  --collection-ids Filter by collections (comma-separated)
133
133
  --json Output as JSON for scripting
134
134
  --interactive Interactive search session
135
135
  ```
136
136
 
137
+ Multi-tenancy is achieved through index isolation. Use `--index` with tenant-specific names.
138
+
137
139
  ## Architecture
138
140
 
139
141
  ```
@@ -284,22 +286,40 @@ agent_body = {
284
286
 
285
287
  ## Multi-Tenancy
286
288
 
289
+ This library is **tenant-agnostic**. Multi-tenancy is achieved through **index isolation** - each tenant gets their own OpenSearch index.
290
+
287
291
  ```python
288
- # Load with tenant isolation
292
+ # The calling application (e.g., API) constructs tenant-specific index names
293
+ index_name = f"knowledge-{account_id}"
294
+
295
+ # Create Knowledge instance for the tenant
296
+ knowledge = Knowledge.from_opensearch(
297
+ host="localhost",
298
+ port=9200,
299
+ index_prefix=index_name, # knowledge-tenant-123
300
+ )
301
+
302
+ # Load content to tenant's isolated index
289
303
  await knowledge.load(
290
304
  source="https://docs.example.com/sitemap.xml",
291
- account_id="tenant-123",
292
305
  collection_id="docs",
293
306
  )
294
307
 
295
- # Search within tenant
308
+ # Search within tenant's index (no account_id filter needed)
296
309
  results = await knowledge.search(
297
310
  "query",
298
- account_id="tenant-123",
299
311
  collection_ids=["docs"],
300
312
  )
301
313
  ```
302
314
 
315
+ **Note**: For audit purposes, you can store `account_id` in document metadata:
316
+ ```python
317
+ await knowledge.load(
318
+ source="https://docs.example.com/sitemap.xml",
319
+ document_defaults={"metadata": {"account_id": "tenant-123"}},
320
+ )
321
+ ```
322
+
303
323
  ## Agentic Memory
304
324
 
305
325
  Conversational memory with automatic fact extraction using OpenSearch's ML Memory plugin.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "gnosisllm-knowledge"
3
- version = "0.3.0"
3
+ version = "0.4.0"
4
4
  description = "Enterprise-grade knowledge loading, indexing, and search for Python"
5
5
  authors = [
6
6
  {name = "David Marsa", email = "david.marsa@neomanex.com"},
@@ -1,4 +1,39 @@
1
- """High-level Knowledge API facade."""
1
+ """High-level Knowledge API facade.
2
+
3
+ This module provides the main entry point for the gnosisllm-knowledge library.
4
+ The Knowledge class is a high-level facade that abstracts the complexity of
5
+ loading, indexing, and searching knowledge documents.
6
+
7
+ Note:
8
+ This library is tenant-agnostic. Multi-tenancy should be handled at the
9
+ API layer by using separate indices per account (e.g.,
10
+ `knowledge-{account_id}`) rather than filtering by account_id.
11
+
12
+ Example:
13
+ ```python
14
+ # Create Knowledge instance for a specific tenant
15
+ knowledge = Knowledge.from_opensearch(
16
+ host="localhost",
17
+ port=9200,
18
+ )
19
+
20
+ # Use a tenant-specific index
21
+ tenant_index = f"knowledge-{account_id}"
22
+
23
+ # Load content
24
+ await knowledge.load(
25
+ "https://docs.example.com/sitemap.xml",
26
+ index_name=tenant_index,
27
+ collection_id="docs",
28
+ )
29
+
30
+ # Search (tenant isolation via index name)
31
+ results = await knowledge.search(
32
+ "how to configure",
33
+ index_name=tenant_index,
34
+ )
35
+ ```
36
+ """
2
37
 
3
38
  from __future__ import annotations
4
39
 
@@ -130,6 +165,10 @@ class Knowledge:
130
165
  ) -> Knowledge:
131
166
  """Create Knowledge instance with OpenSearch backend.
132
167
 
168
+ This factory creates a Knowledge instance configured for OpenSearch.
169
+ The returned instance is tenant-agnostic - multi-tenancy should be
170
+ handled by using separate indices per account.
171
+
133
172
  Args:
134
173
  host: OpenSearch host.
135
174
  port: OpenSearch port.
@@ -147,6 +186,19 @@ class Knowledge:
147
186
  Note:
148
187
  Embeddings are generated automatically by OpenSearch ingest pipeline.
149
188
  Run 'gnosisllm-knowledge setup' to configure the ML model.
189
+
190
+ Example:
191
+ ```python
192
+ # Create a Knowledge instance
193
+ knowledge = Knowledge.from_opensearch(
194
+ host="localhost",
195
+ port=9200,
196
+ )
197
+
198
+ # Use tenant-specific index for isolation
199
+ tenant_index = f"gnosisllm-{account_id}-knowledge"
200
+ await knowledge.load(source, index_name=tenant_index)
201
+ ```
150
202
  """
151
203
  # Import OpenSearch client
152
204
  try:
@@ -216,15 +268,29 @@ class Knowledge:
216
268
  def from_env(cls) -> Knowledge:
217
269
  """Create Knowledge instance from environment variables.
218
270
 
271
+ This factory creates a Knowledge instance using configuration from
272
+ environment variables. The returned instance is tenant-agnostic -
273
+ multi-tenancy should be handled by using separate indices per account.
274
+
219
275
  Returns:
220
276
  Configured Knowledge instance.
277
+
278
+ Example:
279
+ ```python
280
+ # Create from environment
281
+ knowledge = Knowledge.from_env()
282
+
283
+ # Use tenant-specific index for isolation
284
+ tenant_index = f"gnosisllm-{account_id}-knowledge"
285
+ await knowledge.search("query", index_name=tenant_index)
286
+ ```
221
287
  """
222
288
  config = OpenSearchConfig.from_env()
223
289
  neoreader_config = NeoreaderConfig.from_env()
224
290
 
225
291
  return cls.from_opensearch(
226
292
  config=config,
227
- neoreader_url=neoreader_config.base_url if neoreader_config.base_url else None,
293
+ neoreader_url=neoreader_config.host if neoreader_config.host else None,
228
294
  )
229
295
 
230
296
  @property
@@ -318,7 +384,6 @@ class Knowledge:
318
384
  source: str,
319
385
  *,
320
386
  index_name: str | None = None,
321
- account_id: str | None = None,
322
387
  collection_id: str | None = None,
323
388
  source_id: str | None = None,
324
389
  source_type: str | None = None,
@@ -329,10 +394,13 @@ class Knowledge:
329
394
 
330
395
  Automatically detects source type (sitemap, website, etc.).
331
396
 
397
+ Note:
398
+ This method is tenant-agnostic. Multi-tenancy should be handled
399
+ by using separate indices per account.
400
+
332
401
  Args:
333
402
  source: Source URL or path.
334
- index_name: Target index (uses default if not provided).
335
- account_id: Account ID for multi-tenancy.
403
+ index_name: Target index (use tenant-specific name for isolation).
336
404
  collection_id: Collection ID.
337
405
  source_id: Source ID (auto-generated if not provided).
338
406
  source_type: Explicit source type (auto-detected if not provided).
@@ -366,7 +434,6 @@ class Knowledge:
366
434
  return await service.load_and_index(
367
435
  source=source,
368
436
  index_name=index,
369
- account_id=account_id,
370
437
  collection_id=collection_id,
371
438
  source_id=source_id,
372
439
  **options,
@@ -377,7 +444,6 @@ class Knowledge:
377
444
  source: str,
378
445
  *,
379
446
  index_name: str | None = None,
380
- account_id: str | None = None,
381
447
  collection_id: str | None = None,
382
448
  collection_name: str | None = None,
383
449
  source_id: str | None = None,
@@ -398,10 +464,13 @@ class Knowledge:
398
464
  - Document storage: O(index_batch_size)
399
465
  - In-flight fetches: O(fetch_concurrency * avg_page_size)
400
466
 
467
+ Note:
468
+ This method is tenant-agnostic. Multi-tenancy should be handled
469
+ by using separate indices per account.
470
+
401
471
  Args:
402
472
  source: Sitemap URL.
403
- index_name: Target index (uses default if not provided).
404
- account_id: Account ID for multi-tenancy.
473
+ index_name: Target index (use tenant-specific name for isolation).
405
474
  collection_id: Collection ID.
406
475
  collection_name: Collection name for display.
407
476
  source_id: Source ID (auto-generated if not provided).
@@ -419,6 +488,7 @@ class Knowledge:
419
488
  # Efficiently load 100k+ URL sitemap
420
489
  result = await knowledge.load_streaming(
421
490
  "https://large-site.com/sitemap.xml",
491
+ index_name="knowledge-account123", # Tenant-specific
422
492
  url_batch_size=100,
423
493
  fetch_concurrency=20,
424
494
  max_urls=50000,
@@ -454,7 +524,6 @@ class Knowledge:
454
524
  return await pipeline.execute(
455
525
  source=source,
456
526
  index_name=index,
457
- account_id=account_id,
458
527
  collection_id=collection_id,
459
528
  collection_name=collection_name,
460
529
  source_id=source_id,
@@ -471,7 +540,6 @@ class Knowledge:
471
540
  mode: SearchMode = SearchMode.HYBRID,
472
541
  limit: int = 10,
473
542
  offset: int = 0,
474
- account_id: str | None = None,
475
543
  collection_ids: list[str] | None = None,
476
544
  source_ids: list[str] | None = None,
477
545
  min_score: float | None = None,
@@ -479,13 +547,16 @@ class Knowledge:
479
547
  ) -> SearchResult:
480
548
  """Search for knowledge documents.
481
549
 
550
+ Note:
551
+ This method is tenant-agnostic. Multi-tenancy should be handled
552
+ by using separate indices per account.
553
+
482
554
  Args:
483
555
  query: Search query text.
484
- index_name: Index to search (uses default if not provided).
556
+ index_name: Index to search (use tenant-specific name for isolation).
485
557
  mode: Search mode (semantic, keyword, hybrid).
486
558
  limit: Maximum results.
487
559
  offset: Result offset for pagination.
488
- account_id: Account ID for multi-tenancy.
489
560
  collection_ids: Filter by collection IDs.
490
561
  source_ids: Filter by source IDs.
491
562
  min_score: Minimum score threshold.
@@ -500,7 +571,6 @@ class Knowledge:
500
571
  mode=mode,
501
572
  limit=limit,
502
573
  offset=offset,
503
- account_id=account_id,
504
574
  collection_ids=collection_ids,
505
575
  source_ids=source_ids,
506
576
  min_score=min_score,
@@ -578,19 +648,73 @@ class Knowledge:
578
648
 
579
649
  # === Management Methods ===
580
650
 
651
+ async def get_document(
652
+ self,
653
+ document_id: str,
654
+ *,
655
+ index_name: str | None = None,
656
+ ) -> dict[str, Any] | None:
657
+ """Get a single document by ID.
658
+
659
+ Note:
660
+ This method is tenant-agnostic. Multi-tenancy should be handled
661
+ by using separate indices per account.
662
+
663
+ Args:
664
+ document_id: Document ID to retrieve.
665
+ index_name: Index name (use tenant-specific name for isolation).
666
+ Uses default index if not provided.
667
+
668
+ Returns:
669
+ Document dict with all fields (excluding embeddings) or None if not found.
670
+ """
671
+ index = index_name or self._default_index
672
+ if not index:
673
+ raise ValueError("No index specified and no default index configured")
674
+
675
+ return await self._indexer.get(document_id, index)
676
+
677
+ async def delete_document(
678
+ self,
679
+ document_id: str,
680
+ *,
681
+ index_name: str | None = None,
682
+ ) -> bool:
683
+ """Delete a single document by ID.
684
+
685
+ Note:
686
+ This method is tenant-agnostic. Multi-tenancy should be handled
687
+ by using separate indices per account.
688
+
689
+ Args:
690
+ document_id: Document ID to delete.
691
+ index_name: Index name (use tenant-specific name for isolation).
692
+ Uses default index if not provided.
693
+
694
+ Returns:
695
+ True if deleted, False if not found.
696
+ """
697
+ index = index_name or self._default_index
698
+ if not index:
699
+ raise ValueError("No index specified and no default index configured")
700
+
701
+ return await self._indexer.delete(document_id, index)
702
+
581
703
  async def delete_source(
582
704
  self,
583
705
  source_id: str,
584
706
  *,
585
707
  index_name: str | None = None,
586
- account_id: str | None = None,
587
708
  ) -> int:
588
709
  """Delete all documents from a source.
589
710
 
711
+ Note:
712
+ This method is tenant-agnostic. Multi-tenancy should be handled
713
+ by using separate indices per account.
714
+
590
715
  Args:
591
716
  source_id: Source ID to delete.
592
- index_name: Index name.
593
- account_id: Account ID for multi-tenancy.
717
+ index_name: Index name (use tenant-specific name for isolation).
594
718
 
595
719
  Returns:
596
720
  Count of deleted documents.
@@ -599,21 +723,23 @@ class Knowledge:
599
723
  if not index:
600
724
  raise ValueError("No index specified")
601
725
 
602
- return await self.indexing.delete_source(source_id, index, account_id)
726
+ return await self.indexing.delete_source(source_id, index)
603
727
 
604
728
  async def delete_collection(
605
729
  self,
606
730
  collection_id: str,
607
731
  *,
608
732
  index_name: str | None = None,
609
- account_id: str | None = None,
610
733
  ) -> int:
611
734
  """Delete all documents from a collection.
612
735
 
736
+ Note:
737
+ This method is tenant-agnostic. Multi-tenancy should be handled
738
+ by using separate indices per account.
739
+
613
740
  Args:
614
741
  collection_id: Collection ID to delete.
615
- index_name: Index name.
616
- account_id: Account ID for multi-tenancy.
742
+ index_name: Index name (use tenant-specific name for isolation).
617
743
 
618
744
  Returns:
619
745
  Count of deleted documents.
@@ -622,54 +748,85 @@ class Knowledge:
622
748
  if not index:
623
749
  raise ValueError("No index specified")
624
750
 
625
- return await self.indexing.delete_collection(collection_id, index, account_id)
751
+ return await self.indexing.delete_collection(collection_id, index)
626
752
 
627
753
  async def count(
628
754
  self,
629
755
  *,
630
756
  index_name: str | None = None,
631
- account_id: str | None = None,
632
757
  collection_id: str | None = None,
758
+ source_id: str | None = None,
633
759
  ) -> int:
634
760
  """Count documents.
635
761
 
762
+ Note:
763
+ This method is tenant-agnostic. Multi-tenancy should be handled
764
+ by using separate indices per account.
765
+
636
766
  Args:
637
- index_name: Index to count.
638
- account_id: Filter by account.
767
+ index_name: Index to count (use tenant-specific name for isolation).
639
768
  collection_id: Filter by collection.
769
+ source_id: Filter by source (for source deletion confirmation).
640
770
 
641
771
  Returns:
642
772
  Document count.
643
773
  """
644
774
  return await self.search_service.count(
645
775
  index_name=index_name,
646
- account_id=account_id,
647
776
  collection_id=collection_id,
777
+ source_id=source_id,
648
778
  )
649
779
 
650
780
  # === Collection and Stats Methods ===
651
781
 
652
- async def get_collections(self) -> list[dict[str, Any]]:
782
+ async def get_collections(
783
+ self,
784
+ *,
785
+ index_name: str | None = None,
786
+ ) -> list[dict[str, Any]]:
653
787
  """Get all collections with document counts.
654
788
 
655
789
  Aggregates unique collection_ids from indexed documents.
656
790
 
791
+ Note:
792
+ This method is tenant-agnostic. Multi-tenancy should be handled
793
+ by using separate indices per account.
794
+
795
+ Args:
796
+ index_name: Index to query (use tenant-specific name for isolation).
797
+ Uses default index if not provided.
798
+
657
799
  Returns:
658
800
  List of collection dictionaries with id, name, and document_count.
659
801
  """
660
- return await self.search_service.get_collections()
802
+ index = index_name or self._default_index
803
+ return await self.search_service.get_collections(index_name=index)
661
804
 
662
- async def get_stats(self) -> dict[str, Any]:
805
+ async def get_stats(
806
+ self,
807
+ *,
808
+ index_name: str | None = None,
809
+ ) -> dict[str, Any]:
663
810
  """Get index statistics.
664
811
 
812
+ Note:
813
+ This method is tenant-agnostic. Multi-tenancy should be handled
814
+ by using separate indices per account.
815
+
816
+ Args:
817
+ index_name: Index to query (use tenant-specific name for isolation).
818
+ Uses default index if not provided.
819
+
665
820
  Returns:
666
821
  Dictionary with document_count, index_name, and other stats.
667
822
  """
668
- return await self.search_service.get_stats()
823
+ index = index_name or self._default_index
824
+ return await self.search_service.get_stats(index_name=index)
669
825
 
670
826
  async def list_documents(
671
827
  self,
672
828
  *,
829
+ index_name: str | None = None,
673
830
  source_id: str | None = None,
674
831
  collection_id: str | None = None,
675
832
  limit: int = 50,
@@ -677,7 +834,13 @@ class Knowledge:
677
834
  ) -> dict[str, Any]:
678
835
  """List documents with optional filters.
679
836
 
837
+ Note:
838
+ This method is tenant-agnostic. Multi-tenancy should be handled
839
+ by using separate indices per account.
840
+
680
841
  Args:
842
+ index_name: Index to query (use tenant-specific name for isolation).
843
+ Uses default index if not provided.
681
844
  source_id: Optional source ID filter.
682
845
  collection_id: Optional collection ID filter.
683
846
  limit: Maximum documents to return (max 100).
@@ -686,9 +849,9 @@ class Knowledge:
686
849
  Returns:
687
850
  Dictionary with documents, total, limit, offset.
688
851
  """
689
- index = self._default_index
852
+ index = index_name or self._default_index
690
853
  if not index:
691
- raise ValueError("No default index configured")
854
+ raise ValueError("No index specified and no default index configured")
692
855
 
693
856
  # Clamp limit to reasonable bounds
694
857
  limit = min(max(1, limit), 100)
@@ -823,6 +986,33 @@ class Knowledge:
823
986
  return await agentic_searcher.agentic_search(agentic_query, index, **options)
824
987
 
825
988
  async def close(self) -> None:
826
- """Close connections and clean up resources."""
827
- # Subclasses or future implementations can override this
828
- pass
989
+ """Close connections and clean up resources.
990
+
991
+ Closes the underlying AsyncOpenSearch client to prevent
992
+ unclosed aiohttp session warnings. Properly handles
993
+ CancelledError during event loop shutdown.
994
+ """
995
+ import asyncio
996
+
997
+ # Close the OpenSearch client via the searcher
998
+ # Note: indexer, searcher, and setup share the same client instance,
999
+ # so closing via searcher is sufficient
1000
+ if hasattr(self._searcher, '_client') and self._searcher._client is not None:
1001
+ client = self._searcher._client
1002
+ try:
1003
+ await client.close()
1004
+ logger.debug("Closed OpenSearch client connection")
1005
+ except asyncio.CancelledError:
1006
+ # Event loop is shutting down - this is expected during cleanup
1007
+ logger.debug("OpenSearch client close cancelled (event loop shutting down)")
1008
+ except Exception as e:
1009
+ logger.warning(f"Error closing OpenSearch client: {e}")
1010
+ finally:
1011
+ # Clear client reference on all components that share it
1012
+ # This prevents any accidental reuse after close
1013
+ if hasattr(self._searcher, '_client'):
1014
+ self._searcher._client = None
1015
+ if hasattr(self._indexer, '_client'):
1016
+ self._indexer._client = None
1017
+ if self._setup and hasattr(self._setup, '_client'):
1018
+ self._setup._client = None