gnosisllm-knowledge 0.3.0__tar.gz → 0.4.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/PKG-INFO +30 -10
  2. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/README.md +29 -9
  3. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/pyproject.toml +1 -1
  4. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/api/knowledge.py +233 -35
  5. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/memory/indexer.py +27 -2
  6. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/memory/searcher.py +132 -10
  7. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/opensearch/agentic.py +14 -9
  8. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/opensearch/config.py +7 -0
  9. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/opensearch/indexer.py +48 -3
  10. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/opensearch/mappings.py +12 -4
  11. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/opensearch/queries.py +33 -33
  12. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/opensearch/searcher.py +64 -6
  13. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/opensearch/setup.py +29 -33
  14. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/cli/app.py +58 -19
  15. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/cli/commands/agentic.py +15 -9
  16. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/cli/commands/load.py +169 -19
  17. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/cli/commands/memory.py +10 -0
  18. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/cli/commands/search.py +9 -10
  19. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/cli/commands/setup.py +25 -1
  20. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/cli/utils/config.py +4 -4
  21. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/domain/__init__.py +13 -0
  22. gnosisllm_knowledge-0.4.3/src/gnosisllm_knowledge/core/domain/discovery.py +166 -0
  23. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/domain/document.py +14 -19
  24. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/domain/search.py +10 -25
  25. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/domain/source.py +11 -12
  26. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/events/__init__.py +8 -0
  27. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/events/types.py +122 -5
  28. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/exceptions.py +93 -0
  29. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/interfaces/agentic.py +11 -3
  30. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/interfaces/indexer.py +10 -1
  31. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/interfaces/searcher.py +30 -1
  32. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/interfaces/streaming.py +10 -4
  33. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/fetchers/__init__.py +8 -0
  34. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/fetchers/config.py +27 -0
  35. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/fetchers/neoreader.py +31 -3
  36. gnosisllm_knowledge-0.4.3/src/gnosisllm_knowledge/fetchers/neoreader_discovery.py +505 -0
  37. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/loaders/__init__.py +5 -1
  38. gnosisllm_knowledge-0.4.3/src/gnosisllm_knowledge/loaders/discovery.py +338 -0
  39. gnosisllm_knowledge-0.4.3/src/gnosisllm_knowledge/loaders/discovery_streaming.py +343 -0
  40. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/loaders/factory.py +46 -0
  41. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/services/indexing.py +51 -21
  42. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/services/search.py +42 -28
  43. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/services/streaming_pipeline.py +45 -7
  44. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/__init__.py +0 -0
  45. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/api/__init__.py +0 -0
  46. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/api/memory.py +0 -0
  47. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/__init__.py +0 -0
  48. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/memory/__init__.py +0 -0
  49. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/opensearch/__init__.py +0 -0
  50. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/opensearch/memory/__init__.py +0 -0
  51. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/opensearch/memory/client.py +0 -0
  52. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/opensearch/memory/config.py +0 -0
  53. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/opensearch/memory/setup.py +0 -0
  54. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/chunking/__init__.py +0 -0
  55. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/chunking/fixed.py +0 -0
  56. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/chunking/sentence.py +0 -0
  57. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/cli/__init__.py +0 -0
  58. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/cli/commands/__init__.py +0 -0
  59. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/cli/display/__init__.py +0 -0
  60. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/cli/display/service.py +0 -0
  61. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/cli/utils/__init__.py +0 -0
  62. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/__init__.py +0 -0
  63. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/domain/memory.py +0 -0
  64. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/domain/result.py +0 -0
  65. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/events/emitter.py +0 -0
  66. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/interfaces/__init__.py +0 -0
  67. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/interfaces/chunker.py +0 -0
  68. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/interfaces/fetcher.py +0 -0
  69. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/interfaces/loader.py +0 -0
  70. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/interfaces/memory.py +0 -0
  71. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/interfaces/setup.py +0 -0
  72. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/streaming/__init__.py +0 -0
  73. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/streaming/pipeline.py +0 -0
  74. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/fetchers/http.py +0 -0
  75. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/loaders/base.py +0 -0
  76. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/loaders/sitemap.py +0 -0
  77. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/loaders/sitemap_streaming.py +0 -0
  78. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/loaders/website.py +0 -0
  79. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/py.typed +0 -0
  80. {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/services/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gnosisllm-knowledge
3
- Version: 0.3.0
3
+ Version: 0.4.3
4
4
  Summary: Enterprise-grade knowledge loading, indexing, and search for Python
5
5
  License: MIT
6
6
  Keywords: knowledge-base,rag,semantic-search,vector-search,opensearch,llm,embeddings,enterprise
@@ -46,7 +46,7 @@ Enterprise-grade knowledge loading, indexing, and semantic search library for Py
46
46
  - **Multiple Loaders**: Load content from websites, sitemaps, and files
47
47
  - **Intelligent Chunking**: Sentence-aware text splitting with configurable overlap
48
48
  - **OpenSearch Backend**: Production-ready with k-NN vector search
49
- - **Multi-Tenancy**: Built-in support for account and collection isolation
49
+ - **Multi-Tenancy**: Index isolation for complete tenant separation (tenant-agnostic library)
50
50
  - **Event-Driven**: Observer pattern for progress tracking and monitoring
51
51
  - **SOLID Architecture**: Clean, maintainable, and extensible codebase
52
52
 
@@ -144,14 +144,15 @@ gnosisllm-knowledge load <URL> [OPTIONS]
144
144
 
145
145
  Options:
146
146
  --type Source type: website, sitemap (auto-detects)
147
- --index Target index name (default: knowledge)
148
- --account-id Multi-tenant account ID
147
+ --index Target index name (e.g., knowledge-tenant-123)
149
148
  --collection-id Collection grouping ID
150
149
  --batch-size Documents per batch (default: 100)
151
150
  --max-urls Max URLs from sitemap (default: 1000)
152
151
  --dry-run Preview without indexing
153
152
  ```
154
153
 
154
+ Multi-tenancy is achieved through index isolation. Use `--index` with tenant-specific names (e.g., `--index knowledge-tenant-123`).
155
+
155
156
  ### Search
156
157
 
157
158
  Search indexed content with multiple modes:
@@ -161,14 +162,15 @@ gnosisllm-knowledge search <QUERY> [OPTIONS]
161
162
 
162
163
  Options:
163
164
  --mode Search mode: semantic, keyword, hybrid, agentic
164
- --index Index to search (default: knowledge)
165
+ --index Index to search (e.g., knowledge-tenant-123)
165
166
  --limit Max results (default: 5)
166
- --account-id Filter by account
167
167
  --collection-ids Filter by collections (comma-separated)
168
168
  --json Output as JSON for scripting
169
169
  --interactive Interactive search session
170
170
  ```
171
171
 
172
+ Multi-tenancy is achieved through index isolation. Use `--index` with tenant-specific names.
173
+
172
174
  ## Architecture
173
175
 
174
176
  ```
@@ -319,22 +321,40 @@ agent_body = {
319
321
 
320
322
  ## Multi-Tenancy
321
323
 
324
+ This library is **tenant-agnostic**. Multi-tenancy is achieved through **index isolation** - each tenant gets their own OpenSearch index.
325
+
322
326
  ```python
323
- # Load with tenant isolation
327
+ # The calling application (e.g., API) constructs tenant-specific index names
328
+ index_name = f"knowledge-{account_id}"
329
+
330
+ # Create Knowledge instance for the tenant
331
+ knowledge = Knowledge.from_opensearch(
332
+ host="localhost",
333
+ port=9200,
334
+ index_prefix=index_name, # knowledge-tenant-123
335
+ )
336
+
337
+ # Load content to tenant's isolated index
324
338
  await knowledge.load(
325
339
  source="https://docs.example.com/sitemap.xml",
326
- account_id="tenant-123",
327
340
  collection_id="docs",
328
341
  )
329
342
 
330
- # Search within tenant
343
+ # Search within tenant's index (no account_id filter needed)
331
344
  results = await knowledge.search(
332
345
  "query",
333
- account_id="tenant-123",
334
346
  collection_ids=["docs"],
335
347
  )
336
348
  ```
337
349
 
350
+ **Note**: For audit purposes, you can store `account_id` in document metadata:
351
+ ```python
352
+ await knowledge.load(
353
+ source="https://docs.example.com/sitemap.xml",
354
+ document_defaults={"metadata": {"account_id": "tenant-123"}},
355
+ )
356
+ ```
357
+
338
358
  ## Agentic Memory
339
359
 
340
360
  Conversational memory with automatic fact extraction using OpenSearch's ML Memory plugin.
@@ -11,7 +11,7 @@ Enterprise-grade knowledge loading, indexing, and semantic search library for Py
11
11
  - **Multiple Loaders**: Load content from websites, sitemaps, and files
12
12
  - **Intelligent Chunking**: Sentence-aware text splitting with configurable overlap
13
13
  - **OpenSearch Backend**: Production-ready with k-NN vector search
14
- - **Multi-Tenancy**: Built-in support for account and collection isolation
14
+ - **Multi-Tenancy**: Index isolation for complete tenant separation (tenant-agnostic library)
15
15
  - **Event-Driven**: Observer pattern for progress tracking and monitoring
16
16
  - **SOLID Architecture**: Clean, maintainable, and extensible codebase
17
17
 
@@ -109,14 +109,15 @@ gnosisllm-knowledge load <URL> [OPTIONS]
109
109
 
110
110
  Options:
111
111
  --type Source type: website, sitemap (auto-detects)
112
- --index Target index name (default: knowledge)
113
- --account-id Multi-tenant account ID
112
+ --index Target index name (e.g., knowledge-tenant-123)
114
113
  --collection-id Collection grouping ID
115
114
  --batch-size Documents per batch (default: 100)
116
115
  --max-urls Max URLs from sitemap (default: 1000)
117
116
  --dry-run Preview without indexing
118
117
  ```
119
118
 
119
+ Multi-tenancy is achieved through index isolation. Use `--index` with tenant-specific names (e.g., `--index knowledge-tenant-123`).
120
+
120
121
  ### Search
121
122
 
122
123
  Search indexed content with multiple modes:
@@ -126,14 +127,15 @@ gnosisllm-knowledge search <QUERY> [OPTIONS]
126
127
 
127
128
  Options:
128
129
  --mode Search mode: semantic, keyword, hybrid, agentic
129
- --index Index to search (default: knowledge)
130
+ --index Index to search (e.g., knowledge-tenant-123)
130
131
  --limit Max results (default: 5)
131
- --account-id Filter by account
132
132
  --collection-ids Filter by collections (comma-separated)
133
133
  --json Output as JSON for scripting
134
134
  --interactive Interactive search session
135
135
  ```
136
136
 
137
+ Multi-tenancy is achieved through index isolation. Use `--index` with tenant-specific names.
138
+
137
139
  ## Architecture
138
140
 
139
141
  ```
@@ -284,22 +286,40 @@ agent_body = {
284
286
 
285
287
  ## Multi-Tenancy
286
288
 
289
+ This library is **tenant-agnostic**. Multi-tenancy is achieved through **index isolation** - each tenant gets their own OpenSearch index.
290
+
287
291
  ```python
288
- # Load with tenant isolation
292
+ # The calling application (e.g., API) constructs tenant-specific index names
293
+ index_name = f"knowledge-{account_id}"
294
+
295
+ # Create Knowledge instance for the tenant
296
+ knowledge = Knowledge.from_opensearch(
297
+ host="localhost",
298
+ port=9200,
299
+ index_prefix=index_name, # knowledge-tenant-123
300
+ )
301
+
302
+ # Load content to tenant's isolated index
289
303
  await knowledge.load(
290
304
  source="https://docs.example.com/sitemap.xml",
291
- account_id="tenant-123",
292
305
  collection_id="docs",
293
306
  )
294
307
 
295
- # Search within tenant
308
+ # Search within tenant's index (no account_id filter needed)
296
309
  results = await knowledge.search(
297
310
  "query",
298
- account_id="tenant-123",
299
311
  collection_ids=["docs"],
300
312
  )
301
313
  ```
302
314
 
315
+ **Note**: For audit purposes, you can store `account_id` in document metadata:
316
+ ```python
317
+ await knowledge.load(
318
+ source="https://docs.example.com/sitemap.xml",
319
+ document_defaults={"metadata": {"account_id": "tenant-123"}},
320
+ )
321
+ ```
322
+
303
323
  ## Agentic Memory
304
324
 
305
325
  Conversational memory with automatic fact extraction using OpenSearch's ML Memory plugin.
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "gnosisllm-knowledge"
3
- version = "0.3.0"
3
+ version = "0.4.3"
4
4
  description = "Enterprise-grade knowledge loading, indexing, and search for Python"
5
5
  authors = [
6
6
  {name = "David Marsa", email = "david.marsa@neomanex.com"},