gnosisllm-knowledge 0.3.0__tar.gz → 0.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/PKG-INFO +30 -10
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/README.md +29 -9
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/pyproject.toml +1 -1
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/api/knowledge.py +233 -35
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/memory/indexer.py +27 -2
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/memory/searcher.py +132 -10
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/opensearch/agentic.py +14 -9
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/opensearch/config.py +7 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/opensearch/indexer.py +48 -3
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/opensearch/mappings.py +12 -4
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/opensearch/queries.py +33 -33
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/opensearch/searcher.py +64 -6
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/opensearch/setup.py +29 -33
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/cli/app.py +58 -19
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/cli/commands/agentic.py +15 -9
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/cli/commands/load.py +169 -19
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/cli/commands/memory.py +10 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/cli/commands/search.py +9 -10
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/cli/commands/setup.py +25 -1
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/cli/utils/config.py +4 -4
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/domain/__init__.py +13 -0
- gnosisllm_knowledge-0.4.3/src/gnosisllm_knowledge/core/domain/discovery.py +166 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/domain/document.py +14 -19
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/domain/search.py +10 -25
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/domain/source.py +11 -12
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/events/__init__.py +8 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/events/types.py +122 -5
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/exceptions.py +93 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/interfaces/agentic.py +11 -3
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/interfaces/indexer.py +10 -1
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/interfaces/searcher.py +30 -1
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/interfaces/streaming.py +10 -4
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/fetchers/__init__.py +8 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/fetchers/config.py +27 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/fetchers/neoreader.py +31 -3
- gnosisllm_knowledge-0.4.3/src/gnosisllm_knowledge/fetchers/neoreader_discovery.py +505 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/loaders/__init__.py +5 -1
- gnosisllm_knowledge-0.4.3/src/gnosisllm_knowledge/loaders/discovery.py +338 -0
- gnosisllm_knowledge-0.4.3/src/gnosisllm_knowledge/loaders/discovery_streaming.py +343 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/loaders/factory.py +46 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/services/indexing.py +51 -21
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/services/search.py +42 -28
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/services/streaming_pipeline.py +45 -7
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/__init__.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/api/__init__.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/api/memory.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/__init__.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/memory/__init__.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/opensearch/__init__.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/opensearch/memory/__init__.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/opensearch/memory/client.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/opensearch/memory/config.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/backends/opensearch/memory/setup.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/chunking/__init__.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/chunking/fixed.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/chunking/sentence.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/cli/__init__.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/cli/commands/__init__.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/cli/display/__init__.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/cli/display/service.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/cli/utils/__init__.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/__init__.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/domain/memory.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/domain/result.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/events/emitter.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/interfaces/__init__.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/interfaces/chunker.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/interfaces/fetcher.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/interfaces/loader.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/interfaces/memory.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/interfaces/setup.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/streaming/__init__.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/core/streaming/pipeline.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/fetchers/http.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/loaders/base.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/loaders/sitemap.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/loaders/sitemap_streaming.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/loaders/website.py +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/py.typed +0 -0
- {gnosisllm_knowledge-0.3.0 → gnosisllm_knowledge-0.4.3}/src/gnosisllm_knowledge/services/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: gnosisllm-knowledge
|
|
3
|
-
Version: 0.3
|
|
3
|
+
Version: 0.4.3
|
|
4
4
|
Summary: Enterprise-grade knowledge loading, indexing, and search for Python
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: knowledge-base,rag,semantic-search,vector-search,opensearch,llm,embeddings,enterprise
|
|
@@ -46,7 +46,7 @@ Enterprise-grade knowledge loading, indexing, and semantic search library for Py
|
|
|
46
46
|
- **Multiple Loaders**: Load content from websites, sitemaps, and files
|
|
47
47
|
- **Intelligent Chunking**: Sentence-aware text splitting with configurable overlap
|
|
48
48
|
- **OpenSearch Backend**: Production-ready with k-NN vector search
|
|
49
|
-
- **Multi-Tenancy**:
|
|
49
|
+
- **Multi-Tenancy**: Index isolation for complete tenant separation (tenant-agnostic library)
|
|
50
50
|
- **Event-Driven**: Observer pattern for progress tracking and monitoring
|
|
51
51
|
- **SOLID Architecture**: Clean, maintainable, and extensible codebase
|
|
52
52
|
|
|
@@ -144,14 +144,15 @@ gnosisllm-knowledge load <URL> [OPTIONS]
|
|
|
144
144
|
|
|
145
145
|
Options:
|
|
146
146
|
--type Source type: website, sitemap (auto-detects)
|
|
147
|
-
--index Target index name (
|
|
148
|
-
--account-id Multi-tenant account ID
|
|
147
|
+
--index Target index name (e.g., knowledge-tenant-123)
|
|
149
148
|
--collection-id Collection grouping ID
|
|
150
149
|
--batch-size Documents per batch (default: 100)
|
|
151
150
|
--max-urls Max URLs from sitemap (default: 1000)
|
|
152
151
|
--dry-run Preview without indexing
|
|
153
152
|
```
|
|
154
153
|
|
|
154
|
+
Multi-tenancy is achieved through index isolation. Use `--index` with tenant-specific names (e.g., `--index knowledge-tenant-123`).
|
|
155
|
+
|
|
155
156
|
### Search
|
|
156
157
|
|
|
157
158
|
Search indexed content with multiple modes:
|
|
@@ -161,14 +162,15 @@ gnosisllm-knowledge search <QUERY> [OPTIONS]
|
|
|
161
162
|
|
|
162
163
|
Options:
|
|
163
164
|
--mode Search mode: semantic, keyword, hybrid, agentic
|
|
164
|
-
--index Index to search (
|
|
165
|
+
--index Index to search (e.g., knowledge-tenant-123)
|
|
165
166
|
--limit Max results (default: 5)
|
|
166
|
-
--account-id Filter by account
|
|
167
167
|
--collection-ids Filter by collections (comma-separated)
|
|
168
168
|
--json Output as JSON for scripting
|
|
169
169
|
--interactive Interactive search session
|
|
170
170
|
```
|
|
171
171
|
|
|
172
|
+
Multi-tenancy is achieved through index isolation. Use `--index` with tenant-specific names.
|
|
173
|
+
|
|
172
174
|
## Architecture
|
|
173
175
|
|
|
174
176
|
```
|
|
@@ -319,22 +321,40 @@ agent_body = {
|
|
|
319
321
|
|
|
320
322
|
## Multi-Tenancy
|
|
321
323
|
|
|
324
|
+
This library is **tenant-agnostic**. Multi-tenancy is achieved through **index isolation** - each tenant gets their own OpenSearch index.
|
|
325
|
+
|
|
322
326
|
```python
|
|
323
|
-
#
|
|
327
|
+
# The calling application (e.g., API) constructs tenant-specific index names
|
|
328
|
+
index_name = f"knowledge-{account_id}"
|
|
329
|
+
|
|
330
|
+
# Create Knowledge instance for the tenant
|
|
331
|
+
knowledge = Knowledge.from_opensearch(
|
|
332
|
+
host="localhost",
|
|
333
|
+
port=9200,
|
|
334
|
+
index_prefix=index_name, # knowledge-tenant-123
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
# Load content to tenant's isolated index
|
|
324
338
|
await knowledge.load(
|
|
325
339
|
source="https://docs.example.com/sitemap.xml",
|
|
326
|
-
account_id="tenant-123",
|
|
327
340
|
collection_id="docs",
|
|
328
341
|
)
|
|
329
342
|
|
|
330
|
-
# Search within tenant
|
|
343
|
+
# Search within tenant's index (no account_id filter needed)
|
|
331
344
|
results = await knowledge.search(
|
|
332
345
|
"query",
|
|
333
|
-
account_id="tenant-123",
|
|
334
346
|
collection_ids=["docs"],
|
|
335
347
|
)
|
|
336
348
|
```
|
|
337
349
|
|
|
350
|
+
**Note**: For audit purposes, you can store `account_id` in document metadata:
|
|
351
|
+
```python
|
|
352
|
+
await knowledge.load(
|
|
353
|
+
source="https://docs.example.com/sitemap.xml",
|
|
354
|
+
document_defaults={"metadata": {"account_id": "tenant-123"}},
|
|
355
|
+
)
|
|
356
|
+
```
|
|
357
|
+
|
|
338
358
|
## Agentic Memory
|
|
339
359
|
|
|
340
360
|
Conversational memory with automatic fact extraction using OpenSearch's ML Memory plugin.
|
|
@@ -11,7 +11,7 @@ Enterprise-grade knowledge loading, indexing, and semantic search library for Py
|
|
|
11
11
|
- **Multiple Loaders**: Load content from websites, sitemaps, and files
|
|
12
12
|
- **Intelligent Chunking**: Sentence-aware text splitting with configurable overlap
|
|
13
13
|
- **OpenSearch Backend**: Production-ready with k-NN vector search
|
|
14
|
-
- **Multi-Tenancy**:
|
|
14
|
+
- **Multi-Tenancy**: Index isolation for complete tenant separation (tenant-agnostic library)
|
|
15
15
|
- **Event-Driven**: Observer pattern for progress tracking and monitoring
|
|
16
16
|
- **SOLID Architecture**: Clean, maintainable, and extensible codebase
|
|
17
17
|
|
|
@@ -109,14 +109,15 @@ gnosisllm-knowledge load <URL> [OPTIONS]
|
|
|
109
109
|
|
|
110
110
|
Options:
|
|
111
111
|
--type Source type: website, sitemap (auto-detects)
|
|
112
|
-
--index Target index name (
|
|
113
|
-
--account-id Multi-tenant account ID
|
|
112
|
+
--index Target index name (e.g., knowledge-tenant-123)
|
|
114
113
|
--collection-id Collection grouping ID
|
|
115
114
|
--batch-size Documents per batch (default: 100)
|
|
116
115
|
--max-urls Max URLs from sitemap (default: 1000)
|
|
117
116
|
--dry-run Preview without indexing
|
|
118
117
|
```
|
|
119
118
|
|
|
119
|
+
Multi-tenancy is achieved through index isolation. Use `--index` with tenant-specific names (e.g., `--index knowledge-tenant-123`).
|
|
120
|
+
|
|
120
121
|
### Search
|
|
121
122
|
|
|
122
123
|
Search indexed content with multiple modes:
|
|
@@ -126,14 +127,15 @@ gnosisllm-knowledge search <QUERY> [OPTIONS]
|
|
|
126
127
|
|
|
127
128
|
Options:
|
|
128
129
|
--mode Search mode: semantic, keyword, hybrid, agentic
|
|
129
|
-
--index Index to search (
|
|
130
|
+
--index Index to search (e.g., knowledge-tenant-123)
|
|
130
131
|
--limit Max results (default: 5)
|
|
131
|
-
--account-id Filter by account
|
|
132
132
|
--collection-ids Filter by collections (comma-separated)
|
|
133
133
|
--json Output as JSON for scripting
|
|
134
134
|
--interactive Interactive search session
|
|
135
135
|
```
|
|
136
136
|
|
|
137
|
+
Multi-tenancy is achieved through index isolation. Use `--index` with tenant-specific names.
|
|
138
|
+
|
|
137
139
|
## Architecture
|
|
138
140
|
|
|
139
141
|
```
|
|
@@ -284,22 +286,40 @@ agent_body = {
|
|
|
284
286
|
|
|
285
287
|
## Multi-Tenancy
|
|
286
288
|
|
|
289
|
+
This library is **tenant-agnostic**. Multi-tenancy is achieved through **index isolation** - each tenant gets their own OpenSearch index.
|
|
290
|
+
|
|
287
291
|
```python
|
|
288
|
-
#
|
|
292
|
+
# The calling application (e.g., API) constructs tenant-specific index names
|
|
293
|
+
index_name = f"knowledge-{account_id}"
|
|
294
|
+
|
|
295
|
+
# Create Knowledge instance for the tenant
|
|
296
|
+
knowledge = Knowledge.from_opensearch(
|
|
297
|
+
host="localhost",
|
|
298
|
+
port=9200,
|
|
299
|
+
index_prefix=index_name, # knowledge-tenant-123
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
# Load content to tenant's isolated index
|
|
289
303
|
await knowledge.load(
|
|
290
304
|
source="https://docs.example.com/sitemap.xml",
|
|
291
|
-
account_id="tenant-123",
|
|
292
305
|
collection_id="docs",
|
|
293
306
|
)
|
|
294
307
|
|
|
295
|
-
# Search within tenant
|
|
308
|
+
# Search within tenant's index (no account_id filter needed)
|
|
296
309
|
results = await knowledge.search(
|
|
297
310
|
"query",
|
|
298
|
-
account_id="tenant-123",
|
|
299
311
|
collection_ids=["docs"],
|
|
300
312
|
)
|
|
301
313
|
```
|
|
302
314
|
|
|
315
|
+
**Note**: For audit purposes, you can store `account_id` in document metadata:
|
|
316
|
+
```python
|
|
317
|
+
await knowledge.load(
|
|
318
|
+
source="https://docs.example.com/sitemap.xml",
|
|
319
|
+
document_defaults={"metadata": {"account_id": "tenant-123"}},
|
|
320
|
+
)
|
|
321
|
+
```
|
|
322
|
+
|
|
303
323
|
## Agentic Memory
|
|
304
324
|
|
|
305
325
|
Conversational memory with automatic fact extraction using OpenSearch's ML Memory plugin.
|