gnosisllm-knowledge 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. gnosisllm_knowledge/__init__.py +91 -39
  2. gnosisllm_knowledge/api/__init__.py +3 -2
  3. gnosisllm_knowledge/api/knowledge.py +502 -32
  4. gnosisllm_knowledge/api/memory.py +966 -0
  5. gnosisllm_knowledge/backends/__init__.py +14 -5
  6. gnosisllm_knowledge/backends/memory/indexer.py +27 -2
  7. gnosisllm_knowledge/backends/memory/searcher.py +111 -10
  8. gnosisllm_knowledge/backends/opensearch/agentic.py +355 -48
  9. gnosisllm_knowledge/backends/opensearch/config.py +49 -28
  10. gnosisllm_knowledge/backends/opensearch/indexer.py +49 -3
  11. gnosisllm_knowledge/backends/opensearch/mappings.py +14 -5
  12. gnosisllm_knowledge/backends/opensearch/memory/__init__.py +12 -0
  13. gnosisllm_knowledge/backends/opensearch/memory/client.py +1380 -0
  14. gnosisllm_knowledge/backends/opensearch/memory/config.py +127 -0
  15. gnosisllm_knowledge/backends/opensearch/memory/setup.py +322 -0
  16. gnosisllm_knowledge/backends/opensearch/queries.py +33 -33
  17. gnosisllm_knowledge/backends/opensearch/searcher.py +238 -0
  18. gnosisllm_knowledge/backends/opensearch/setup.py +308 -148
  19. gnosisllm_knowledge/cli/app.py +436 -31
  20. gnosisllm_knowledge/cli/commands/agentic.py +26 -9
  21. gnosisllm_knowledge/cli/commands/load.py +169 -19
  22. gnosisllm_knowledge/cli/commands/memory.py +733 -0
  23. gnosisllm_knowledge/cli/commands/search.py +9 -10
  24. gnosisllm_knowledge/cli/commands/setup.py +49 -23
  25. gnosisllm_knowledge/cli/display/service.py +43 -0
  26. gnosisllm_knowledge/cli/utils/config.py +62 -4
  27. gnosisllm_knowledge/core/domain/__init__.py +54 -0
  28. gnosisllm_knowledge/core/domain/discovery.py +166 -0
  29. gnosisllm_knowledge/core/domain/document.py +19 -19
  30. gnosisllm_knowledge/core/domain/memory.py +440 -0
  31. gnosisllm_knowledge/core/domain/result.py +11 -3
  32. gnosisllm_knowledge/core/domain/search.py +12 -25
  33. gnosisllm_knowledge/core/domain/source.py +11 -12
  34. gnosisllm_knowledge/core/events/__init__.py +8 -0
  35. gnosisllm_knowledge/core/events/types.py +198 -5
  36. gnosisllm_knowledge/core/exceptions.py +227 -0
  37. gnosisllm_knowledge/core/interfaces/__init__.py +17 -0
  38. gnosisllm_knowledge/core/interfaces/agentic.py +11 -3
  39. gnosisllm_knowledge/core/interfaces/indexer.py +10 -1
  40. gnosisllm_knowledge/core/interfaces/memory.py +524 -0
  41. gnosisllm_knowledge/core/interfaces/searcher.py +10 -1
  42. gnosisllm_knowledge/core/interfaces/streaming.py +133 -0
  43. gnosisllm_knowledge/core/streaming/__init__.py +36 -0
  44. gnosisllm_knowledge/core/streaming/pipeline.py +228 -0
  45. gnosisllm_knowledge/fetchers/__init__.py +8 -0
  46. gnosisllm_knowledge/fetchers/config.py +27 -0
  47. gnosisllm_knowledge/fetchers/neoreader.py +31 -3
  48. gnosisllm_knowledge/fetchers/neoreader_discovery.py +505 -0
  49. gnosisllm_knowledge/loaders/__init__.py +5 -1
  50. gnosisllm_knowledge/loaders/base.py +3 -4
  51. gnosisllm_knowledge/loaders/discovery.py +338 -0
  52. gnosisllm_knowledge/loaders/discovery_streaming.py +343 -0
  53. gnosisllm_knowledge/loaders/factory.py +46 -0
  54. gnosisllm_knowledge/loaders/sitemap.py +129 -1
  55. gnosisllm_knowledge/loaders/sitemap_streaming.py +258 -0
  56. gnosisllm_knowledge/services/indexing.py +100 -93
  57. gnosisllm_knowledge/services/search.py +84 -31
  58. gnosisllm_knowledge/services/streaming_pipeline.py +334 -0
  59. {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/METADATA +73 -10
  60. gnosisllm_knowledge-0.4.0.dist-info/RECORD +81 -0
  61. gnosisllm_knowledge-0.2.0.dist-info/RECORD +0 -64
  62. {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/WHEEL +0 -0
  63. {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,334 @@
1
+ """Streaming indexing pipeline with bounded memory.
2
+
3
+ This module provides the StreamingIndexingPipeline that orchestrates
4
+ the load -> index pipeline with guaranteed bounded memory usage.
5
+
6
+ Note:
7
+ This module is tenant-agnostic. Multi-tenancy should be handled at the
8
+ API layer by using separate indices per account (e.g.,
9
+ gnosisllm-{account_id}-knowledge) rather than filtering by account_id.
10
+ The account_id parameters are deprecated and will be ignored.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import logging
16
+ import time
17
+ import warnings
18
+ from dataclasses import dataclass, field
19
+ from typing import TYPE_CHECKING, Any
20
+
21
+ from gnosisllm_knowledge.core.domain.document import Document, DocumentStatus
22
+ from gnosisllm_knowledge.core.domain.result import IndexResult
23
+ from gnosisllm_knowledge.core.events.emitter import EventEmitter
24
+ from gnosisllm_knowledge.core.events.types import (
25
+ BatchCompletedEvent,
26
+ StreamingCompletedEvent,
27
+ StreamingProgressEvent,
28
+ )
29
+ from gnosisllm_knowledge.core.streaming.pipeline import PipelineConfig
30
+
31
+ if TYPE_CHECKING:
32
+ from gnosisllm_knowledge.core.interfaces.indexer import IDocumentIndexer
33
+ from gnosisllm_knowledge.loaders.sitemap import SitemapLoader
34
+
35
+
36
+ @dataclass
37
+ class StreamingProgress:
38
+ """Progress tracking for streaming operations.
39
+
40
+ Attributes:
41
+ urls_discovered: Total URLs discovered so far.
42
+ urls_processed: URLs that have been fetched and processed.
43
+ documents_indexed: Documents successfully indexed.
44
+ documents_failed: Documents that failed to index.
45
+ current_phase: Current pipeline phase (discovering, fetching, indexing, completed).
46
+ memory_estimate_mb: Estimated current memory usage in MB.
47
+ """
48
+
49
+ urls_discovered: int = 0
50
+ urls_processed: int = 0
51
+ documents_indexed: int = 0
52
+ documents_failed: int = 0
53
+ current_phase: str = "initializing"
54
+ memory_estimate_mb: float = 0.0
55
+
56
+
57
+ @dataclass
58
+ class StreamingPipelineResult:
59
+ """Result of a streaming pipeline execution.
60
+
61
+ Attributes:
62
+ success: Whether the pipeline completed successfully.
63
+ indexed_count: Total documents indexed.
64
+ failed_count: Total documents that failed.
65
+ urls_processed: Total URLs processed.
66
+ batches_processed: Number of batches processed.
67
+ duration_ms: Total duration in milliseconds.
68
+ errors: List of errors encountered.
69
+ """
70
+
71
+ success: bool
72
+ indexed_count: int = 0
73
+ failed_count: int = 0
74
+ urls_processed: int = 0
75
+ batches_processed: int = 0
76
+ duration_ms: float = 0.0
77
+ errors: list[dict[str, Any]] = field(default_factory=list)
78
+
79
+
80
+ class StreamingIndexingPipeline:
81
+ """Orchestrates streaming load -> index pipeline with bounded memory.
82
+
83
+ This pipeline ensures:
84
+ 1. URLs are discovered and processed in batches
85
+ 2. Documents are indexed immediately after fetching
86
+ 3. Memory is freed between batches
87
+ 4. Progress is tracked and emitted as events
88
+ 5. Errors don't stop the entire pipeline
89
+
90
+ Memory Guarantees:
91
+ - URL storage: O(url_batch_size)
92
+ - Document storage: O(index_batch_size)
93
+ - In-flight fetches: O(fetch_concurrency * avg_page_size)
94
+ - Total: Bounded, independent of sitemap size
95
+
96
+ Example:
97
+ ```python
98
+ pipeline = StreamingIndexingPipeline(
99
+ loader=sitemap_loader,
100
+ indexer=opensearch_indexer,
101
+ config=PipelineConfig(
102
+ url_batch_size=50,
103
+ fetch_concurrency=10,
104
+ index_batch_size=100,
105
+ ),
106
+ )
107
+
108
+ result = await pipeline.execute(
109
+ source="https://example.com/sitemap.xml",
110
+ index_name="knowledge-account123",
111
+ account_id="account123",
112
+ )
113
+ ```
114
+ """
115
+
116
+ def __init__(
117
+ self,
118
+ loader: SitemapLoader,
119
+ indexer: IDocumentIndexer,
120
+ config: PipelineConfig | None = None,
121
+ events: EventEmitter | None = None,
122
+ ) -> None:
123
+ """Initialize the streaming pipeline.
124
+
125
+ Args:
126
+ loader: Sitemap loader instance.
127
+ indexer: Document indexer instance.
128
+ config: Pipeline configuration.
129
+ events: Event emitter for progress events.
130
+ """
131
+ self._loader = loader
132
+ self._indexer = indexer
133
+ self._config = config or PipelineConfig()
134
+ self._events = events or EventEmitter()
135
+ self._logger = logging.getLogger(__name__)
136
+ self._progress = StreamingProgress()
137
+
138
+ async def execute(
139
+ self,
140
+ source: str,
141
+ index_name: str,
142
+ *,
143
+ account_id: str | None = None,
144
+ collection_id: str | None = None,
145
+ collection_name: str | None = None,
146
+ source_id: str | None = None,
147
+ **options: Any,
148
+ ) -> IndexResult:
149
+ """Execute the streaming pipeline.
150
+
151
+ Note:
152
+ This method is tenant-agnostic. Multi-tenancy should be handled
153
+ at the API layer by using separate indices per account. The
154
+ account_id parameter is deprecated and will be ignored.
155
+
156
+ Args:
157
+ source: Sitemap URL.
158
+ index_name: Target OpenSearch index.
159
+ account_id: Deprecated. This parameter is ignored.
160
+ Use index isolation (separate index per account) instead.
161
+ collection_id: Collection within account.
162
+ collection_name: Collection name for display.
163
+ source_id: Source identifier.
164
+ **options: Additional loader options.
165
+
166
+ Returns:
167
+ Aggregated index result.
168
+ """
169
+ if account_id is not None:
170
+ warnings.warn(
171
+ "account_id parameter is deprecated and will be ignored. "
172
+ "Use index isolation (separate index per account) instead.",
173
+ DeprecationWarning,
174
+ stacklevel=2,
175
+ )
176
+ start_time = time.time()
177
+ self._progress = StreamingProgress(current_phase="starting")
178
+ await self._emit_progress()
179
+
180
+ batch_count = 0
181
+
182
+ # Create index callback that enriches and indexes documents
183
+ async def index_batch(documents: list[Document]) -> IndexResult:
184
+ nonlocal batch_count
185
+
186
+ enriched = [
187
+ self._enrich_document(
188
+ doc,
189
+ source=source,
190
+ collection_id=collection_id,
191
+ collection_name=collection_name,
192
+ source_id=source_id,
193
+ )
194
+ for doc in documents
195
+ ]
196
+
197
+ batch_start = time.time()
198
+ result = await self._indexer.bulk_index(enriched, index_name)
199
+ batch_duration = (time.time() - batch_start) * 1000
200
+
201
+ self._progress.documents_indexed += result.indexed_count
202
+ self._progress.documents_failed += result.failed_count
203
+ self._progress.current_phase = "indexing"
204
+ await self._emit_progress()
205
+
206
+ # Emit batch completed event
207
+ self._events.emit(
208
+ BatchCompletedEvent(
209
+ batch_index=batch_count,
210
+ success_count=result.indexed_count,
211
+ failure_count=result.failed_count,
212
+ duration_ms=batch_duration,
213
+ )
214
+ )
215
+ batch_count += 1
216
+
217
+ return result
218
+
219
+ # Execute streaming load with indexing
220
+ self._progress.current_phase = "processing"
221
+ await self._emit_progress()
222
+
223
+ try:
224
+ result = await self._loader.load_streaming_with_indexing(
225
+ source=source,
226
+ index_callback=index_batch,
227
+ url_batch_size=self._config.url_batch_size,
228
+ doc_batch_size=self._config.index_batch_size,
229
+ config=self._config,
230
+ **options,
231
+ )
232
+ except Exception as e:
233
+ self._logger.exception(f"Streaming pipeline failed: {e}")
234
+ duration_ms = (time.time() - start_time) * 1000
235
+ return IndexResult(
236
+ success=False,
237
+ indexed_count=self._progress.documents_indexed,
238
+ failed_count=self._progress.documents_failed,
239
+ error_message=str(e),
240
+ duration_ms=duration_ms,
241
+ )
242
+
243
+ duration_ms = (time.time() - start_time) * 1000
244
+ self._progress.current_phase = "completed"
245
+ await self._emit_progress()
246
+
247
+ # Emit completion event
248
+ self._events.emit(
249
+ StreamingCompletedEvent(
250
+ total_urls=self._progress.urls_processed,
251
+ total_documents=result.indexed_count + result.failed_count,
252
+ indexed_count=result.indexed_count,
253
+ failed_count=result.failed_count,
254
+ duration_ms=duration_ms,
255
+ )
256
+ )
257
+
258
+ return IndexResult(
259
+ success=result.failed_count == 0,
260
+ indexed_count=result.indexed_count,
261
+ failed_count=result.failed_count,
262
+ errors=result.errors,
263
+ duration_ms=duration_ms,
264
+ )
265
+
266
+ def _enrich_document(
267
+ self,
268
+ doc: Document,
269
+ source: str,
270
+ collection_id: str | None,
271
+ collection_name: str | None,
272
+ source_id: str | None,
273
+ account_id: str | None = None,
274
+ ) -> Document:
275
+ """Add source info to document.
276
+
277
+ Note:
278
+ This method is tenant-agnostic. Multi-tenancy should be handled
279
+ at the API layer by using separate indices per account. The
280
+ account_id parameter is deprecated and will be ignored.
281
+
282
+ Args:
283
+ doc: Original document.
284
+ source: Source URL.
285
+ collection_id: Collection identifier.
286
+ collection_name: Collection name for display.
287
+ source_id: Source identifier.
288
+ account_id: Deprecated. This parameter is ignored.
289
+ Use index isolation (separate index per account) instead.
290
+
291
+ Returns:
292
+ New Document with source info.
293
+ """
294
+ if account_id is not None:
295
+ warnings.warn(
296
+ "account_id parameter is deprecated and will be ignored. "
297
+ "Use index isolation (separate index per account) instead.",
298
+ DeprecationWarning,
299
+ stacklevel=2,
300
+ )
301
+
302
+ return Document(
303
+ content=doc.content,
304
+ source=source,
305
+ doc_id=doc.doc_id,
306
+ url=doc.url,
307
+ title=doc.title,
308
+ collection_id=collection_id,
309
+ collection_name=collection_name,
310
+ source_id=source_id,
311
+ chunk_index=doc.chunk_index,
312
+ total_chunks=doc.total_chunks,
313
+ parent_doc_id=doc.parent_doc_id,
314
+ status=DocumentStatus.INDEXED,
315
+ metadata=doc.metadata,
316
+ )
317
+
318
+ async def _emit_progress(self) -> None:
319
+ """Emit progress event."""
320
+ await self._events.emit_async(
321
+ StreamingProgressEvent(
322
+ urls_discovered=self._progress.urls_discovered,
323
+ urls_processed=self._progress.urls_processed,
324
+ documents_indexed=self._progress.documents_indexed,
325
+ documents_failed=self._progress.documents_failed,
326
+ phase=self._progress.current_phase,
327
+ memory_mb=self._progress.memory_estimate_mb,
328
+ )
329
+ )
330
+
331
+ @property
332
+ def progress(self) -> StreamingProgress:
333
+ """Get current progress."""
334
+ return self._progress
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: gnosisllm-knowledge
3
- Version: 0.2.0
3
+ Version: 0.4.0
4
4
  Summary: Enterprise-grade knowledge loading, indexing, and search for Python
5
5
  License: MIT
6
6
  Keywords: knowledge-base,rag,semantic-search,vector-search,opensearch,llm,embeddings,enterprise
@@ -41,10 +41,12 @@ Enterprise-grade knowledge loading, indexing, and semantic search library for Py
41
41
 
42
42
  - **Semantic Search**: Vector-based similarity search using OpenAI embeddings
43
43
  - **Hybrid Search**: Combine semantic and keyword (BM25) search for best results
44
+ - **Agentic Search**: AI-powered search with reasoning and natural language answers
45
+ - **Agentic Memory**: Conversational memory with automatic fact extraction
44
46
  - **Multiple Loaders**: Load content from websites, sitemaps, and files
45
47
  - **Intelligent Chunking**: Sentence-aware text splitting with configurable overlap
46
48
  - **OpenSearch Backend**: Production-ready with k-NN vector search
47
- - **Multi-Tenancy**: Built-in support for account and collection isolation
49
+ - **Multi-Tenancy**: Index isolation for complete tenant separation (tenant-agnostic library)
48
50
  - **Event-Driven**: Observer pattern for progress tracking and monitoring
49
51
  - **SOLID Architecture**: Clean, maintainable, and extensible codebase
50
52
 
@@ -142,14 +144,15 @@ gnosisllm-knowledge load <URL> [OPTIONS]
142
144
 
143
145
  Options:
144
146
  --type Source type: website, sitemap (auto-detects)
145
- --index Target index name (default: knowledge)
146
- --account-id Multi-tenant account ID
147
+ --index Target index name (e.g., knowledge-tenant-123)
147
148
  --collection-id Collection grouping ID
148
149
  --batch-size Documents per batch (default: 100)
149
150
  --max-urls Max URLs from sitemap (default: 1000)
150
151
  --dry-run Preview without indexing
151
152
  ```
152
153
 
154
+ Multi-tenancy is achieved through index isolation. Use `--index` with tenant-specific names (e.g., `--index knowledge-tenant-123`).
155
+
153
156
  ### Search
154
157
 
155
158
  Search indexed content with multiple modes:
@@ -159,14 +162,15 @@ gnosisllm-knowledge search <QUERY> [OPTIONS]
159
162
 
160
163
  Options:
161
164
  --mode Search mode: semantic, keyword, hybrid, agentic
162
- --index Index to search (default: knowledge)
165
+ --index Index to search (e.g., knowledge-tenant-123)
163
166
  --limit Max results (default: 5)
164
- --account-id Filter by account
165
167
  --collection-ids Filter by collections (comma-separated)
166
168
  --json Output as JSON for scripting
167
169
  --interactive Interactive search session
168
170
  ```
169
171
 
172
+ Multi-tenancy is achieved through index isolation. Use `--index` with tenant-specific names.
173
+
170
174
  ## Architecture
171
175
 
172
176
  ```
@@ -317,22 +321,81 @@ agent_body = {
317
321
 
318
322
  ## Multi-Tenancy
319
323
 
324
+ This library is **tenant-agnostic**. Multi-tenancy is achieved through **index isolation** - each tenant gets their own OpenSearch index.
325
+
320
326
  ```python
321
- # Load with tenant isolation
327
+ # The calling application (e.g., API) constructs tenant-specific index names
328
+ index_name = f"knowledge-{account_id}"
329
+
330
+ # Create Knowledge instance for the tenant
331
+ knowledge = Knowledge.from_opensearch(
332
+ host="localhost",
333
+ port=9200,
334
+ index_prefix=index_name, # knowledge-tenant-123
335
+ )
336
+
337
+ # Load content to tenant's isolated index
322
338
  await knowledge.load(
323
339
  source="https://docs.example.com/sitemap.xml",
324
- account_id="tenant-123",
325
340
  collection_id="docs",
326
341
  )
327
342
 
328
- # Search within tenant
343
+ # Search within tenant's index (no account_id filter needed)
329
344
  results = await knowledge.search(
330
345
  "query",
331
- account_id="tenant-123",
332
346
  collection_ids=["docs"],
333
347
  )
334
348
  ```
335
349
 
350
+ **Note**: For audit purposes, you can store `account_id` in document metadata:
351
+ ```python
352
+ await knowledge.load(
353
+ source="https://docs.example.com/sitemap.xml",
354
+ document_defaults={"metadata": {"account_id": "tenant-123"}},
355
+ )
356
+ ```
357
+
358
+ ## Agentic Memory
359
+
360
+ Conversational memory with automatic fact extraction using OpenSearch's ML Memory plugin.
361
+
362
+ ```bash
363
+ # Setup memory connectors
364
+ gnosisllm-knowledge memory setup --openai-key sk-...
365
+
366
+ # Create container and store conversations
367
+ gnosisllm-knowledge memory container create my-memory
368
+ gnosisllm-knowledge memory store <container-id> --file messages.json --user-id alice
369
+ gnosisllm-knowledge memory recall <container-id> "user preferences" --user-id alice
370
+ ```
371
+
372
+ ```python
373
+ from gnosisllm_knowledge import Memory, MemoryStrategy, StrategyConfig, Message
374
+
375
+ memory = Memory.from_env()
376
+
377
+ # Create container with strategies
378
+ container = await memory.create_container(
379
+ name="agent-memory",
380
+ strategies=[
381
+ StrategyConfig(type=MemoryStrategy.SEMANTIC, namespace=["user_id"]),
382
+ ],
383
+ )
384
+
385
+ # Store conversation with fact extraction
386
+ await memory.store(
387
+ container_id=container.id,
388
+ messages=[Message(role="user", content="I prefer dark mode")],
389
+ user_id="alice",
390
+ infer=True,
391
+ )
392
+
393
+ # Recall memories
394
+ result = await memory.recall(container.id, "preferences", user_id="alice")
395
+ ```
396
+
397
+ See [docs/memory.md](docs/memory.md) for full documentation.
398
+
336
399
  ## Event Tracking
337
400
 
338
401
  ```python
@@ -0,0 +1,81 @@
1
+ gnosisllm_knowledge/__init__.py,sha256=Egfn6ROa9OttWweCBVocNCUivKwWfdeFEFgpwAoo_ww,4809
2
+ gnosisllm_knowledge/api/__init__.py,sha256=xVG6wMjbFa7_qDVRKlp43O4f0EQmPr9w3pAOGmmh32U,200
3
+ gnosisllm_knowledge/api/knowledge.py,sha256=sjBXBe__wRdGoawmLfDKw9W2o8efOTy0aDVPbt4SMyQ,33718
4
+ gnosisllm_knowledge/api/memory.py,sha256=BrYWS3fCPaymwjzlnLqAKiCrUuMlUDMod-DcrTaarkQ,28926
5
+ gnosisllm_knowledge/backends/__init__.py,sha256=QkVcZbr-u_vDyGiOMs2jrV1oA21VyCLd6g6aJZcSJxk,927
6
+ gnosisllm_knowledge/backends/memory/__init__.py,sha256=Q_2Fh0-Xs1f7CsNn9V7z8NLW0P7F1owmxZQ4k3fTSXo,238
7
+ gnosisllm_knowledge/backends/memory/indexer.py,sha256=4BotG-jveyEFjw7F8K-Co-to9NlydvyDA67JncRtD_E,11284
8
+ gnosisllm_knowledge/backends/memory/searcher.py,sha256=wCnt_ssfA4YxASjSQ7QnXidTqb0Fk0cYYoqKEuiZBMQ,18591
9
+ gnosisllm_knowledge/backends/opensearch/__init__.py,sha256=wwg3QPVVMLkq5qHUGw_9m6gvXO3hxa-xaBj3_BRJlN4,679
10
+ gnosisllm_knowledge/backends/opensearch/agentic.py,sha256=VT9ubsgNacgHEIibKtC1mOEAFKV8VjZmf1NhH_exMvk,36861
11
+ gnosisllm_knowledge/backends/opensearch/config.py,sha256=9xHMvr5-vnCcGFOPoO2_bbKuB70Gm979BqrhvcKsKm0,8640
12
+ gnosisllm_knowledge/backends/opensearch/indexer.py,sha256=0KGagbZ6lMXat9DMSDQgBM-hckNZtDn_eStSlUxQpGo,16803
13
+ gnosisllm_knowledge/backends/opensearch/mappings.py,sha256=zyUBzSPanGEDMUCDs98X_4uwLqLfRikLIO8ut03oc-w,8279
14
+ gnosisllm_knowledge/backends/opensearch/memory/__init__.py,sha256=9EganuADfP6-1lky1Mk_oogr2d9pCmOFcNUjO7UMVZo,403
15
+ gnosisllm_knowledge/backends/opensearch/memory/client.py,sha256=ho0Sn1lXK7EDy0RR6HXjTX5pekrfRc7Vuo4YfBiElr4,44636
16
+ gnosisllm_knowledge/backends/opensearch/memory/config.py,sha256=astoJqofwhl4KUZ6vTUG-yiG1h5hfpJjYdsBzhKzx_E,4633
17
+ gnosisllm_knowledge/backends/opensearch/memory/setup.py,sha256=0ILimioesxx2GLQ4I3uCA-Nu9Wy09PI-rSA85t8ssLw,10372
18
+ gnosisllm_knowledge/backends/opensearch/queries.py,sha256=cubU6fUCeSA6Ag6uDCgrF_pZAj2BgVVmAot2t5sgbxY,13590
19
+ gnosisllm_knowledge/backends/opensearch/searcher.py,sha256=3WvvIjjrEr4iZbDoBGdEGsgARJagHEe2CDfJak0_Gbs,20192
20
+ gnosisllm_knowledge/backends/opensearch/setup.py,sha256=G9G0KZkz_ZynJjRIRWJImLDEFvQQE2wWRJfq_mlfpIs,59483
21
+ gnosisllm_knowledge/chunking/__init__.py,sha256=XKrk04DqwhhPxxP_SLGnQrVUo2fexmvzuEN9YzO4Jxk,225
22
+ gnosisllm_knowledge/chunking/fixed.py,sha256=LXZhRbYbtg4hQzqrAKyGrM_zmDhqJWXDi6cHOiJlGrM,3905
23
+ gnosisllm_knowledge/chunking/sentence.py,sha256=EqY2Y1dpfNK_qukXQC9QYrKN8b1dqyyX9TZN5bUYGqs,7271
24
+ gnosisllm_knowledge/cli/__init__.py,sha256=G8AO-LwNQqXSKZCFxLiXggOQf-xmNITWVs25xWl_-Cg,564
25
+ gnosisllm_knowledge/cli/app.py,sha256=RX7p6GlqcwtbxxxNkEpEXf-Bhjg00ZEF3DvG446rAeA,27411
26
+ gnosisllm_knowledge/cli/commands/__init__.py,sha256=TGrGl_AxVVXpAv01iO4JCUtWW8wzSQTBtQFj4vPeE0A,303
27
+ gnosisllm_knowledge/cli/commands/agentic.py,sha256=QRNaHPryAQKpeVKA3oXj-Xs3rtYUqmRFddC0K0MO7nY,20674
28
+ gnosisllm_knowledge/cli/commands/load.py,sha256=wpJ8ltfC-iTNn8UGAtM5jlToGob9Ia9jDkbDhub-iy8,20315
29
+ gnosisllm_knowledge/cli/commands/memory.py,sha256=ae6AVcdVqJY3eeGBfZv7GHKeFwue-rkuCd8kULb6kQw,24503
30
+ gnosisllm_knowledge/cli/commands/search.py,sha256=eiXdjAvco7I3jU68VcWVabUEt8VlRyqoPDE61FEZ7wo,15385
31
+ gnosisllm_knowledge/cli/commands/setup.py,sha256=yNNqQsydOuN8qwP_nu1coNEjP94EqJmRLU8pv3hve7U,9428
32
+ gnosisllm_knowledge/cli/display/__init__.py,sha256=hLQ3XCBxw8C5WOT-wu1lqFyrD7FJVPHSWXUZM24Qnjc,155
33
+ gnosisllm_knowledge/cli/display/service.py,sha256=Aq2-xSjLtpmRo7K41ca64yHCB338C1r2lb3jLjvKOyI,19226
34
+ gnosisllm_knowledge/cli/utils/__init__.py,sha256=BlzSMhfXM_x9EAv304krVb_DHvdJaKLrlbb716LKkQI,106
35
+ gnosisllm_knowledge/cli/utils/config.py,sha256=UZm_28ZvdviOTeP_Lr54zt4reZ88yOLAZ2w1WyNefJQ,9573
36
+ gnosisllm_knowledge/core/__init__.py,sha256=yeZYSnaypTdStzt3_FERALGUgfC5fuudTCGixUm1_z0,1827
37
+ gnosisllm_knowledge/core/domain/__init__.py,sha256=2D6L5lSiKI9mBTymwGVg5FHmryQskVs_xaGCrUZejuM,2048
38
+ gnosisllm_knowledge/core/domain/discovery.py,sha256=j26zJSzrjRut8AOEdk6xBhOlc24Zh6CCnjKEwimrlmM,5315
39
+ gnosisllm_knowledge/core/domain/document.py,sha256=YhIPmUx2WB8kwgetuqeTVTMFrJSbPeHrMt0VgsZkLWc,8003
40
+ gnosisllm_knowledge/core/domain/memory.py,sha256=sDdqX9gf8vdm4UXtQ7RUAzppNXvwqvWrDLisZAB8Fg4,13139
41
+ gnosisllm_knowledge/core/domain/result.py,sha256=W667qa7KLYXr1cDR6-bz4AN4oancLxNnb_CpynP_XJ0,6112
42
+ gnosisllm_knowledge/core/domain/search.py,sha256=qZEiCPaCNo_uMonPItnfgen0GVNCgVsOZneIuLS8vuM,10180
43
+ gnosisllm_knowledge/core/domain/source.py,sha256=heMIDGvPgO2I91YFYhAjzYz9PHeXDxK59NQ3pMeR0XY,4491
44
+ gnosisllm_knowledge/core/events/__init__.py,sha256=JoUuAeglHMEodE3aUriNN-TVKFkI9Fboyh31XutL4CI,768
45
+ gnosisllm_knowledge/core/events/emitter.py,sha256=qFANzBtgdnQMVgLFE76rA-4i6Es_7ItaI-JfOl9vrNc,6940
46
+ gnosisllm_knowledge/core/events/types.py,sha256=cT8AeM1YJC8Qd0PO2iJ06TFa4BRLDZr2mYURQNeKPOc,12151
47
+ gnosisllm_knowledge/core/exceptions.py,sha256=CIoxRgcNt-EefmkzAvjJnvGZqn-qQsCx0e_RawYEKWI,16315
48
+ gnosisllm_knowledge/core/interfaces/__init__.py,sha256=NZQNCTI4xWTnS6RyiHU4POv4RyrtgazQ0XVNDUoeUOs,1169
49
+ gnosisllm_knowledge/core/interfaces/agentic.py,sha256=hvBPw3fEHoNVjLLQj3-ZmB2QLfbQTLrOTYoUcr9KFAM,3896
50
+ gnosisllm_knowledge/core/interfaces/chunker.py,sha256=FUu3B6Ent5Zofs1XLWZ0RqevHBbGd7iNRO7pAg2JqdE,1862
51
+ gnosisllm_knowledge/core/interfaces/fetcher.py,sha256=19u2TKE6rDjX-8l7xIRKUqa6DLmZz_RncPy1p0sN3tk,3318
52
+ gnosisllm_knowledge/core/interfaces/indexer.py,sha256=PMm2cPJhkVKuNcyLA35S9yYBIk-bBGI2KlQiEv6bKcc,6546
53
+ gnosisllm_knowledge/core/interfaces/loader.py,sha256=NKq5Yj1Pmbv1I8AUMDtleUCZO5URZFw6kCdx6uLG7Bk,2903
54
+ gnosisllm_knowledge/core/interfaces/memory.py,sha256=xrgPsqSfmj1-Snmyd35b-HOKipI_9N_FWRQklHj6x8I,13057
55
+ gnosisllm_knowledge/core/interfaces/searcher.py,sha256=iuFZ5moiMCRhkWty8171BlpP7Jf-NjYqeArenD-L_4E,5015
56
+ gnosisllm_knowledge/core/interfaces/setup.py,sha256=9DMKVqoNUrWKbS58QCnfwlGk4sD9R5iGe-WLwlW7z-Q,4457
57
+ gnosisllm_knowledge/core/interfaces/streaming.py,sha256=sQuWTch8gMandFG1AY4xUVbVC-gwijzktq_Pj5QexWc,4160
58
+ gnosisllm_knowledge/core/streaming/__init__.py,sha256=-PKIWbSu9bI3X3H8bucjuoqBGBiH0UOvM9OSn9dh1Rs,814
59
+ gnosisllm_knowledge/core/streaming/pipeline.py,sha256=H3_9gAvemZp5AdsbrZ5OTbY9aDAfwsl58eq7vOhA09k,6429
60
+ gnosisllm_knowledge/fetchers/__init__.py,sha256=zCLxr1q6XUSjJERn4GUZ5-zGvzcAEY3UwnmQdWsHMYA,679
61
+ gnosisllm_knowledge/fetchers/config.py,sha256=Aqkr3zu0dixpAWLeHSkd_3is2d24ea70e6RagO1pBBE,4068
62
+ gnosisllm_knowledge/fetchers/http.py,sha256=nQjH1nKTIR65tmbfW6RWaP3pR5JJe4j33cTzSBWHM-E,5133
63
+ gnosisllm_knowledge/fetchers/neoreader.py,sha256=8gyo88KilndbJaAE-co-v40jlpXwb6QQKoNgnKQDfA8,7728
64
+ gnosisllm_knowledge/fetchers/neoreader_discovery.py,sha256=uyJ1VEl683J2QS7TWaPUQLsWRXMvcYp1HKQqSQ5cFAQ,16656
65
+ gnosisllm_knowledge/loaders/__init__.py,sha256=gCQpuv4Op0sLj_MspGA-gzGTCFUflnF9s3PpqEqfQSU,601
66
+ gnosisllm_knowledge/loaders/base.py,sha256=bqFzWFD-jeBXdovAuiRUgLKvl5ExaBoDrS0L9e5JhqY,13020
67
+ gnosisllm_knowledge/loaders/discovery.py,sha256=I_lhJ01Ho0JxU8wNW9yCvP0SSEMimld_FuIL2R_5Vaw,12487
68
+ gnosisllm_knowledge/loaders/discovery_streaming.py,sha256=u7g0XpBjaTww6Tx10rs72XXvUI2brliK1uslS7RmayI,13042
69
+ gnosisllm_knowledge/loaders/factory.py,sha256=BMaTZry4LRQ2gm3Dt6mariZigdqlHas4yXhoBW_VzGs,8197
70
+ gnosisllm_knowledge/loaders/sitemap.py,sha256=2VZ7iSYjjQ_od-FuHl9x6dFB5rj4XToi6RWfFR45O4Y,14349
71
+ gnosisllm_knowledge/loaders/sitemap_streaming.py,sha256=KkkIg_oJxJrjQP88kS_jvqQvCShGE9vTGVVeiAr0VDE,8728
72
+ gnosisllm_knowledge/loaders/website.py,sha256=i54S7X0wTYNw9jJQfaoWFqMkGJkbAJeBv-OFBNjPQ_g,1587
73
+ gnosisllm_knowledge/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
74
+ gnosisllm_knowledge/services/__init__.py,sha256=qOn1CjsB25UQ2K1TQIGMBe9-CUrGxAKhBrNh9Awo8n8,273
75
+ gnosisllm_knowledge/services/indexing.py,sha256=xZDsULtEuQBYyJ8EZvK_Kp-Gpr6oD87DeURtg-CY3A0,13000
76
+ gnosisllm_knowledge/services/search.py,sha256=WcoO9LJIqdpmThrv0iKvs0CvNWnB3fBh8kdOKaBLO9I,11992
77
+ gnosisllm_knowledge/services/streaming_pipeline.py,sha256=WaLAoDLdgfiBCoqh6A_Im-m4NNwDvCVNtd6f5WrKJPs,11500
78
+ gnosisllm_knowledge-0.4.0.dist-info/METADATA,sha256=I1hufeDnDOl_UMdIlPmDgjt_4bv63oDfbmmuQv-grXE,16311
79
+ gnosisllm_knowledge-0.4.0.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
80
+ gnosisllm_knowledge-0.4.0.dist-info/entry_points.txt,sha256=-4wtWCTX7bT8WessWJyqfkQ99-2aCVuSoTL1S_QpaOo,68
81
+ gnosisllm_knowledge-0.4.0.dist-info/RECORD,,
@@ -1,64 +0,0 @@
1
- gnosisllm_knowledge/__init__.py,sha256=4pqB289D-9CG7nTnzLfGLwZFTkmnl9REtOAhyDhKE-g,3675
2
- gnosisllm_knowledge/api/__init__.py,sha256=YOiYXDvA1UPFKpZve_F-wKxN_NNyyiftHEzqsYmRd0I,129
3
- gnosisllm_knowledge/api/knowledge.py,sha256=on00xHGBe80xq1q2qtmAFTLrdeWGRUKGP8EUiTKVk0o,16791
4
- gnosisllm_knowledge/backends/__init__.py,sha256=8O0Lsi7gDVbviUmHFjy2rV4eZ2R354Amq-G2EL2eZtk,708
5
- gnosisllm_knowledge/backends/memory/__init__.py,sha256=Q_2Fh0-Xs1f7CsNn9V7z8NLW0P7F1owmxZQ4k3fTSXo,238
6
- gnosisllm_knowledge/backends/memory/indexer.py,sha256=R_Yd1gEplzS01o7QfPxjaAZ21VxjfdvatbgrNsyp5lM,10517
7
- gnosisllm_knowledge/backends/memory/searcher.py,sha256=k6OGtppC-UsHB8aIhlK3wSASVOPEokfe6LLV1jbmE5M,15547
8
- gnosisllm_knowledge/backends/opensearch/__init__.py,sha256=wwg3QPVVMLkq5qHUGw_9m6gvXO3hxa-xaBj3_BRJlN4,679
9
- gnosisllm_knowledge/backends/opensearch/agentic.py,sha256=UJdiVDaxQARigOQz0cVfPrduxF8Oa7EH96lorjwji98,25462
10
- gnosisllm_knowledge/backends/opensearch/config.py,sha256=P30mOAVpo4bEKNHHP04lX2ttzNXgDbu1i-SfobcURNk,7256
11
- gnosisllm_knowledge/backends/opensearch/indexer.py,sha256=fkfpCwg7Ka2UwDl2L0V1z9sXlvnQbyYMiFeKk1Q1D1Q,15101
12
- gnosisllm_knowledge/backends/opensearch/mappings.py,sha256=ssev3HFLf82lB1guzqZ0iDQE1C6ppvG1HNGVjG4vwAU,7873
13
- gnosisllm_knowledge/backends/opensearch/queries.py,sha256=IEZ_teGqc80OFrzD4XIwUArXRre-UhQWIH2oZnRGvC8,13301
14
- gnosisllm_knowledge/backends/opensearch/searcher.py,sha256=j6j0kzLSxgDRCk1fkqZ642rAXONLa-hnzZr1qEQ1CKI,11594
15
- gnosisllm_knowledge/backends/opensearch/setup.py,sha256=mZoHBxMK_WZA63F5PviLvGLeH8CqmAn9XM0FnuJXfrQ,52319
16
- gnosisllm_knowledge/chunking/__init__.py,sha256=XKrk04DqwhhPxxP_SLGnQrVUo2fexmvzuEN9YzO4Jxk,225
17
- gnosisllm_knowledge/chunking/fixed.py,sha256=LXZhRbYbtg4hQzqrAKyGrM_zmDhqJWXDi6cHOiJlGrM,3905
18
- gnosisllm_knowledge/chunking/sentence.py,sha256=EqY2Y1dpfNK_qukXQC9QYrKN8b1dqyyX9TZN5bUYGqs,7271
19
- gnosisllm_knowledge/cli/__init__.py,sha256=G8AO-LwNQqXSKZCFxLiXggOQf-xmNITWVs25xWl_-Cg,564
20
- gnosisllm_knowledge/cli/app.py,sha256=4Nm5ccxjNHQw9jy9VH7EJ7-rn138i_fPU1yzaef8ELE,15132
21
- gnosisllm_knowledge/cli/commands/__init__.py,sha256=TGrGl_AxVVXpAv01iO4JCUtWW8wzSQTBtQFj4vPeE0A,303
22
- gnosisllm_knowledge/cli/commands/agentic.py,sha256=XGZyDKu2TkNNhetzbDIp5zFbBeLwgSyfFfR3VZgZRQM,19836
23
- gnosisllm_knowledge/cli/commands/load.py,sha256=RZhLtw2Ba6LCceLTD665QE4QcOm-HBc9D-lEX1ReGLM,13117
24
- gnosisllm_knowledge/cli/commands/search.py,sha256=lj3kUXmvSZRA_z8utbN2Ph_2NczOuzGkuxILreJuj_o,15326
25
- gnosisllm_knowledge/cli/commands/setup.py,sha256=7NtGGulAggVVQVvzcDq-YLPZlqpGtdKDnYVXd1KPpiQ,7596
26
- gnosisllm_knowledge/cli/display/__init__.py,sha256=hLQ3XCBxw8C5WOT-wu1lqFyrD7FJVPHSWXUZM24Qnjc,155
27
- gnosisllm_knowledge/cli/display/service.py,sha256=klGJtc-w7LhNWUaG530QQIazc6c5t8laIM6bnwdyhlg,17537
28
- gnosisllm_knowledge/cli/utils/__init__.py,sha256=BlzSMhfXM_x9EAv304krVb_DHvdJaKLrlbb716LKkQI,106
29
- gnosisllm_knowledge/cli/utils/config.py,sha256=8iGzJfInv83lucAHSDfxA5wbTIzh9MlqB2-xa4vcwJg,7292
30
- gnosisllm_knowledge/core/__init__.py,sha256=yeZYSnaypTdStzt3_FERALGUgfC5fuudTCGixUm1_z0,1827
31
- gnosisllm_knowledge/core/domain/__init__.py,sha256=15IWkFNV-VKQnInFxMtY4FYh7pNmjYXDq4MPpSsAoPk,938
32
- gnosisllm_knowledge/core/domain/document.py,sha256=FYvM4Vi1DQeDIq4OEY94dX9eNMpQCmkDQRHkyYqq_WY,7765
33
- gnosisllm_knowledge/core/domain/result.py,sha256=xyPfselaG7EMhvmcYjepFOEseM4Mfc15Q1Ciq7KxQFE,5843
34
- gnosisllm_knowledge/core/domain/search.py,sha256=uMJZhGCQ_kS_R5kb1hN5VNEyGbWkuRL_ON_BR4BoqSM,10569
35
- gnosisllm_knowledge/core/domain/source.py,sha256=kKmWr3U01sJkfG8jtzAQDzxtVVzUOTSqEJZT5gGlYU8,4493
36
- gnosisllm_knowledge/core/events/__init__.py,sha256=pqWmPSuZIV3_teyYxGgNOLu8yaEw5jTO7A0tKDTydpE,540
37
- gnosisllm_knowledge/core/events/emitter.py,sha256=qFANzBtgdnQMVgLFE76rA-4i6Es_7ItaI-JfOl9vrNc,6940
38
- gnosisllm_knowledge/core/events/types.py,sha256=UtmCxkhSfgpHaeFtl43aarmw-eg84KdLuXMBAttICh0,6636
39
- gnosisllm_knowledge/core/exceptions.py,sha256=fOnE_w2fqX6uS7PGkyXJxiIWcySXpRhfQtUJVA4AFdw,10367
40
- gnosisllm_knowledge/core/interfaces/__init__.py,sha256=LpBUW0ABmIMUscNIaX-dVkkarqK2pXr0Gf1rg0TSrtU,775
41
- gnosisllm_knowledge/core/interfaces/agentic.py,sha256=OrS-3YRJ6Z6j1yAngsAv3B9fN9otmwlEyZ_G_fjDJPY,3600
42
- gnosisllm_knowledge/core/interfaces/chunker.py,sha256=FUu3B6Ent5Zofs1XLWZ0RqevHBbGd7iNRO7pAg2JqdE,1862
43
- gnosisllm_knowledge/core/interfaces/fetcher.py,sha256=19u2TKE6rDjX-8l7xIRKUqa6DLmZz_RncPy1p0sN3tk,3318
44
- gnosisllm_knowledge/core/interfaces/indexer.py,sha256=4a33oyhYetAiQ6TF2u7ICPyjjidGwsrjILJ0i0YZCqY,6166
45
- gnosisllm_knowledge/core/interfaces/loader.py,sha256=NKq5Yj1Pmbv1I8AUMDtleUCZO5URZFw6kCdx6uLG7Bk,2903
46
- gnosisllm_knowledge/core/interfaces/searcher.py,sha256=c76SH2DD3EEVmaD6TuVjZu5hP6HjoyawUoiT9aANZz0,4634
47
- gnosisllm_knowledge/core/interfaces/setup.py,sha256=9DMKVqoNUrWKbS58QCnfwlGk4sD9R5iGe-WLwlW7z-Q,4457
48
- gnosisllm_knowledge/fetchers/__init__.py,sha256=CiupP4NdIPzWme6SKp--a_wDX1qAy-QBUO_bjoVDFBQ,393
49
- gnosisllm_knowledge/fetchers/config.py,sha256=msUHhTzCGa-cASzoXeAhXFNrDpY9E1XaDo_gqs2CtrU,2611
50
- gnosisllm_knowledge/fetchers/http.py,sha256=nQjH1nKTIR65tmbfW6RWaP3pR5JJe4j33cTzSBWHM-E,5133
51
- gnosisllm_knowledge/fetchers/neoreader.py,sha256=Gc5SqHr9e3zvP9TE_xHmW_iwXcYq0UqDCzob7frkaqM,6637
52
- gnosisllm_knowledge/loaders/__init__.py,sha256=4XlrhFapi9uWsHUAG2eeQEfc47T75a9GRUXde1dmom4,387
53
- gnosisllm_knowledge/loaders/base.py,sha256=T5m0zrtn-4lA3nYR6mzswQfPwCnYiRhVogzNBvYK2xA,13089
54
- gnosisllm_knowledge/loaders/factory.py,sha256=gmUqYT3yTijMLP0XRldFWlgLFWdmdvKrj5WvD3RMZw0,6374
55
- gnosisllm_knowledge/loaders/sitemap.py,sha256=sbsLYs_EugwJRWXOOawpCy7iB2lu46ZW9_3eNymp33I,9524
56
- gnosisllm_knowledge/loaders/website.py,sha256=i54S7X0wTYNw9jJQfaoWFqMkGJkbAJeBv-OFBNjPQ_g,1587
57
- gnosisllm_knowledge/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
- gnosisllm_knowledge/services/__init__.py,sha256=qOn1CjsB25UQ2K1TQIGMBe9-CUrGxAKhBrNh9Awo8n8,273
59
- gnosisllm_knowledge/services/indexing.py,sha256=ZVDDV2_Zlv6zmCWg-VSNeczieR7LyNaqzXcaFP-uvsE,12471
60
- gnosisllm_knowledge/services/search.py,sha256=ft8Q7rtRKqXVZ0c_bVqa01jDaV5UQF51LnrY2ICfC1Q,10156
61
- gnosisllm_knowledge-0.2.0.dist-info/METADATA,sha256=Q5bOjQAt93BXqPFdSF_3XlZh_6ystmXCX8-hcSIuwpg,14152
62
- gnosisllm_knowledge-0.2.0.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
63
- gnosisllm_knowledge-0.2.0.dist-info/entry_points.txt,sha256=-4wtWCTX7bT8WessWJyqfkQ99-2aCVuSoTL1S_QpaOo,68
64
- gnosisllm_knowledge-0.2.0.dist-info/RECORD,,