gnosisllm-knowledge 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gnosisllm_knowledge/__init__.py +91 -39
- gnosisllm_knowledge/api/__init__.py +3 -2
- gnosisllm_knowledge/api/knowledge.py +502 -32
- gnosisllm_knowledge/api/memory.py +966 -0
- gnosisllm_knowledge/backends/__init__.py +14 -5
- gnosisllm_knowledge/backends/memory/indexer.py +27 -2
- gnosisllm_knowledge/backends/memory/searcher.py +111 -10
- gnosisllm_knowledge/backends/opensearch/agentic.py +355 -48
- gnosisllm_knowledge/backends/opensearch/config.py +49 -28
- gnosisllm_knowledge/backends/opensearch/indexer.py +49 -3
- gnosisllm_knowledge/backends/opensearch/mappings.py +14 -5
- gnosisllm_knowledge/backends/opensearch/memory/__init__.py +12 -0
- gnosisllm_knowledge/backends/opensearch/memory/client.py +1380 -0
- gnosisllm_knowledge/backends/opensearch/memory/config.py +127 -0
- gnosisllm_knowledge/backends/opensearch/memory/setup.py +322 -0
- gnosisllm_knowledge/backends/opensearch/queries.py +33 -33
- gnosisllm_knowledge/backends/opensearch/searcher.py +238 -0
- gnosisllm_knowledge/backends/opensearch/setup.py +308 -148
- gnosisllm_knowledge/cli/app.py +436 -31
- gnosisllm_knowledge/cli/commands/agentic.py +26 -9
- gnosisllm_knowledge/cli/commands/load.py +169 -19
- gnosisllm_knowledge/cli/commands/memory.py +733 -0
- gnosisllm_knowledge/cli/commands/search.py +9 -10
- gnosisllm_knowledge/cli/commands/setup.py +49 -23
- gnosisllm_knowledge/cli/display/service.py +43 -0
- gnosisllm_knowledge/cli/utils/config.py +62 -4
- gnosisllm_knowledge/core/domain/__init__.py +54 -0
- gnosisllm_knowledge/core/domain/discovery.py +166 -0
- gnosisllm_knowledge/core/domain/document.py +19 -19
- gnosisllm_knowledge/core/domain/memory.py +440 -0
- gnosisllm_knowledge/core/domain/result.py +11 -3
- gnosisllm_knowledge/core/domain/search.py +12 -25
- gnosisllm_knowledge/core/domain/source.py +11 -12
- gnosisllm_knowledge/core/events/__init__.py +8 -0
- gnosisllm_knowledge/core/events/types.py +198 -5
- gnosisllm_knowledge/core/exceptions.py +227 -0
- gnosisllm_knowledge/core/interfaces/__init__.py +17 -0
- gnosisllm_knowledge/core/interfaces/agentic.py +11 -3
- gnosisllm_knowledge/core/interfaces/indexer.py +10 -1
- gnosisllm_knowledge/core/interfaces/memory.py +524 -0
- gnosisllm_knowledge/core/interfaces/searcher.py +10 -1
- gnosisllm_knowledge/core/interfaces/streaming.py +133 -0
- gnosisllm_knowledge/core/streaming/__init__.py +36 -0
- gnosisllm_knowledge/core/streaming/pipeline.py +228 -0
- gnosisllm_knowledge/fetchers/__init__.py +8 -0
- gnosisllm_knowledge/fetchers/config.py +27 -0
- gnosisllm_knowledge/fetchers/neoreader.py +31 -3
- gnosisllm_knowledge/fetchers/neoreader_discovery.py +505 -0
- gnosisllm_knowledge/loaders/__init__.py +5 -1
- gnosisllm_knowledge/loaders/base.py +3 -4
- gnosisllm_knowledge/loaders/discovery.py +338 -0
- gnosisllm_knowledge/loaders/discovery_streaming.py +343 -0
- gnosisllm_knowledge/loaders/factory.py +46 -0
- gnosisllm_knowledge/loaders/sitemap.py +129 -1
- gnosisllm_knowledge/loaders/sitemap_streaming.py +258 -0
- gnosisllm_knowledge/services/indexing.py +100 -93
- gnosisllm_knowledge/services/search.py +84 -31
- gnosisllm_knowledge/services/streaming_pipeline.py +334 -0
- {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/METADATA +73 -10
- gnosisllm_knowledge-0.4.0.dist-info/RECORD +81 -0
- gnosisllm_knowledge-0.2.0.dist-info/RECORD +0 -64
- {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/WHEEL +0 -0
- {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/entry_points.txt +0 -0
|
@@ -10,6 +10,11 @@ from typing import Any
|
|
|
10
10
|
class SourceConfig:
|
|
11
11
|
"""Configuration for a content source.
|
|
12
12
|
|
|
13
|
+
Note:
|
|
14
|
+
This library is tenant-agnostic. Multi-tenancy is achieved through index
|
|
15
|
+
isolation (e.g., `knowledge-{account_id}`). Tenant information should be
|
|
16
|
+
managed by the caller, not embedded in source configuration.
|
|
17
|
+
|
|
13
18
|
Attributes:
|
|
14
19
|
url: The source URL or path.
|
|
15
20
|
source_type: Type of source (website, sitemap, file, etc.).
|
|
@@ -26,8 +31,7 @@ class SourceConfig:
|
|
|
26
31
|
remove_selector: CSS selector for elements to remove.
|
|
27
32
|
timeout: Request timeout in seconds.
|
|
28
33
|
|
|
29
|
-
|
|
30
|
-
account_id: Account/tenant identifier.
|
|
34
|
+
Collection:
|
|
31
35
|
collection_id: Collection identifier.
|
|
32
36
|
source_id: Source identifier within collection.
|
|
33
37
|
"""
|
|
@@ -47,8 +51,7 @@ class SourceConfig:
|
|
|
47
51
|
remove_selector: str | None = None
|
|
48
52
|
timeout: int | None = None
|
|
49
53
|
|
|
50
|
-
#
|
|
51
|
-
account_id: str | None = None
|
|
54
|
+
# Collection
|
|
52
55
|
collection_id: str | None = None
|
|
53
56
|
source_id: str | None = None
|
|
54
57
|
|
|
@@ -73,26 +76,23 @@ class SourceConfig:
|
|
|
73
76
|
target_selector=self.target_selector,
|
|
74
77
|
remove_selector=self.remove_selector,
|
|
75
78
|
timeout=self.timeout,
|
|
76
|
-
account_id=self.account_id,
|
|
77
79
|
collection_id=self.collection_id,
|
|
78
80
|
source_id=self.source_id,
|
|
79
81
|
)
|
|
80
82
|
|
|
81
|
-
def
|
|
83
|
+
def with_collection(
|
|
82
84
|
self,
|
|
83
|
-
|
|
84
|
-
collection_id: str | None = None,
|
|
85
|
+
collection_id: str,
|
|
85
86
|
source_id: str | None = None,
|
|
86
87
|
) -> SourceConfig:
|
|
87
|
-
"""Create a copy with
|
|
88
|
+
"""Create a copy with collection information.
|
|
88
89
|
|
|
89
90
|
Args:
|
|
90
|
-
account_id: Account/tenant identifier.
|
|
91
91
|
collection_id: Collection identifier.
|
|
92
92
|
source_id: Source identifier.
|
|
93
93
|
|
|
94
94
|
Returns:
|
|
95
|
-
New SourceConfig with
|
|
95
|
+
New SourceConfig with collection information.
|
|
96
96
|
"""
|
|
97
97
|
return SourceConfig(
|
|
98
98
|
url=self.url,
|
|
@@ -105,7 +105,6 @@ class SourceConfig:
|
|
|
105
105
|
target_selector=self.target_selector,
|
|
106
106
|
remove_selector=self.remove_selector,
|
|
107
107
|
timeout=self.timeout,
|
|
108
|
-
account_id=account_id,
|
|
109
108
|
collection_id=collection_id,
|
|
110
109
|
source_id=source_id,
|
|
111
110
|
)
|
|
@@ -4,6 +4,10 @@ from gnosisllm_knowledge.core.events.emitter import EventEmitter
|
|
|
4
4
|
from gnosisllm_knowledge.core.events.types import (
|
|
5
5
|
BatchCompletedEvent,
|
|
6
6
|
BatchStartedEvent,
|
|
7
|
+
DiscoveryCompletedEvent,
|
|
8
|
+
DiscoveryFailedEvent,
|
|
9
|
+
DiscoveryProgressEvent,
|
|
10
|
+
DiscoveryStartedEvent,
|
|
7
11
|
DocumentIndexedEvent,
|
|
8
12
|
DocumentLoadedEvent,
|
|
9
13
|
Event,
|
|
@@ -20,4 +24,8 @@ __all__ = [
|
|
|
20
24
|
"SitemapDiscoveryEvent",
|
|
21
25
|
"BatchStartedEvent",
|
|
22
26
|
"BatchCompletedEvent",
|
|
27
|
+
"DiscoveryStartedEvent",
|
|
28
|
+
"DiscoveryProgressEvent",
|
|
29
|
+
"DiscoveryCompletedEvent",
|
|
30
|
+
"DiscoveryFailedEvent",
|
|
23
31
|
]
|
|
@@ -14,6 +14,7 @@ class EventType(str, Enum):
|
|
|
14
14
|
Events are organized by category:
|
|
15
15
|
- Loading events: Document and content loading
|
|
16
16
|
- Indexing events: Document indexing operations
|
|
17
|
+
- Streaming events: Streaming pipeline progress
|
|
17
18
|
- Search events: Search and retrieval operations
|
|
18
19
|
- Agentic events: AI-powered operations
|
|
19
20
|
- Setup events: Backend setup operations
|
|
@@ -33,6 +34,17 @@ class EventType(str, Enum):
|
|
|
33
34
|
LOAD_FAILED = "load_failed"
|
|
34
35
|
SITEMAP_DISCOVERED = "sitemap_discovered"
|
|
35
36
|
|
|
37
|
+
# Discovery events
|
|
38
|
+
DISCOVERY_STARTED = "discovery_started"
|
|
39
|
+
DISCOVERY_PROGRESS = "discovery_progress"
|
|
40
|
+
DISCOVERY_COMPLETED = "discovery_completed"
|
|
41
|
+
DISCOVERY_FAILED = "discovery_failed"
|
|
42
|
+
|
|
43
|
+
# Streaming events
|
|
44
|
+
STREAMING_PROGRESS = "streaming_progress"
|
|
45
|
+
URL_BATCH_PROCESSED = "url_batch_processed"
|
|
46
|
+
STREAMING_COMPLETED = "streaming_completed"
|
|
47
|
+
|
|
36
48
|
# Indexing events
|
|
37
49
|
INDEX_STARTED = "index_started"
|
|
38
50
|
DOCUMENT_INDEXED = "document_indexed"
|
|
@@ -85,11 +97,14 @@ class EventType(str, Enum):
|
|
|
85
97
|
class Event:
|
|
86
98
|
"""Base event class.
|
|
87
99
|
|
|
100
|
+
Note:
|
|
101
|
+
This library is tenant-agnostic. Multi-tenancy is achieved through index
|
|
102
|
+
isolation. Any tenant-specific context should be passed in the data dict.
|
|
103
|
+
|
|
88
104
|
Attributes:
|
|
89
105
|
event_type: The type of event.
|
|
90
106
|
timestamp: When the event occurred.
|
|
91
|
-
data: Additional event data.
|
|
92
|
-
account_id: Account ID for multi-tenant context.
|
|
107
|
+
data: Additional event data (can include tenant context for audit).
|
|
93
108
|
user_id: User ID if applicable.
|
|
94
109
|
request_id: Request ID for tracing.
|
|
95
110
|
trace_id: Distributed trace ID.
|
|
@@ -101,7 +116,6 @@ class Event:
|
|
|
101
116
|
data: dict[str, Any] = field(default_factory=dict)
|
|
102
117
|
|
|
103
118
|
# Context
|
|
104
|
-
account_id: str | None = None
|
|
105
119
|
user_id: str | None = None
|
|
106
120
|
request_id: str | None = None
|
|
107
121
|
|
|
@@ -111,7 +125,6 @@ class Event:
|
|
|
111
125
|
|
|
112
126
|
def with_context(
|
|
113
127
|
self,
|
|
114
|
-
account_id: str | None = None,
|
|
115
128
|
user_id: str | None = None,
|
|
116
129
|
request_id: str | None = None,
|
|
117
130
|
) -> Event:
|
|
@@ -120,7 +133,6 @@ class Event:
|
|
|
120
133
|
event_type=self.event_type,
|
|
121
134
|
timestamp=self.timestamp,
|
|
122
135
|
data=self.data.copy(),
|
|
123
|
-
account_id=account_id or self.account_id,
|
|
124
136
|
user_id=user_id or self.user_id,
|
|
125
137
|
request_id=request_id or self.request_id,
|
|
126
138
|
trace_id=self.trace_id,
|
|
@@ -224,3 +236,184 @@ class BatchCompletedEvent(Event):
|
|
|
224
236
|
"failure_count": self.failure_count,
|
|
225
237
|
"duration_ms": self.duration_ms,
|
|
226
238
|
}
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
@dataclass
|
|
242
|
+
class StreamingProgressEvent(Event):
|
|
243
|
+
"""Progress event for streaming operations.
|
|
244
|
+
|
|
245
|
+
Emitted periodically during streaming pipeline execution to
|
|
246
|
+
provide visibility into progress.
|
|
247
|
+
"""
|
|
248
|
+
|
|
249
|
+
urls_discovered: int = 0
|
|
250
|
+
urls_processed: int = 0
|
|
251
|
+
documents_indexed: int = 0
|
|
252
|
+
documents_failed: int = 0
|
|
253
|
+
phase: str = "unknown"
|
|
254
|
+
memory_mb: float | None = None
|
|
255
|
+
|
|
256
|
+
def __post_init__(self) -> None:
|
|
257
|
+
"""Set event type."""
|
|
258
|
+
self.event_type = EventType.STREAMING_PROGRESS
|
|
259
|
+
self.data = {
|
|
260
|
+
"urls_discovered": self.urls_discovered,
|
|
261
|
+
"urls_processed": self.urls_processed,
|
|
262
|
+
"documents_indexed": self.documents_indexed,
|
|
263
|
+
"documents_failed": self.documents_failed,
|
|
264
|
+
"phase": self.phase,
|
|
265
|
+
"memory_mb": self.memory_mb,
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
@dataclass
|
|
270
|
+
class UrlBatchProcessedEvent(Event):
|
|
271
|
+
"""Event emitted when a batch of URLs is processed."""
|
|
272
|
+
|
|
273
|
+
batch_index: int = 0
|
|
274
|
+
urls_in_batch: int = 0
|
|
275
|
+
documents_created: int = 0
|
|
276
|
+
total_urls_processed: int = 0
|
|
277
|
+
|
|
278
|
+
def __post_init__(self) -> None:
|
|
279
|
+
"""Set event type."""
|
|
280
|
+
self.event_type = EventType.URL_BATCH_PROCESSED
|
|
281
|
+
self.data = {
|
|
282
|
+
"batch_index": self.batch_index,
|
|
283
|
+
"urls_in_batch": self.urls_in_batch,
|
|
284
|
+
"documents_created": self.documents_created,
|
|
285
|
+
"total_urls_processed": self.total_urls_processed,
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
@dataclass
|
|
290
|
+
class StreamingCompletedEvent(Event):
|
|
291
|
+
"""Event emitted when streaming pipeline completes."""
|
|
292
|
+
|
|
293
|
+
total_urls: int = 0
|
|
294
|
+
total_documents: int = 0
|
|
295
|
+
indexed_count: int = 0
|
|
296
|
+
failed_count: int = 0
|
|
297
|
+
duration_ms: float = 0.0
|
|
298
|
+
|
|
299
|
+
def __post_init__(self) -> None:
|
|
300
|
+
"""Set event type."""
|
|
301
|
+
self.event_type = EventType.STREAMING_COMPLETED
|
|
302
|
+
self.data = {
|
|
303
|
+
"total_urls": self.total_urls,
|
|
304
|
+
"total_documents": self.total_documents,
|
|
305
|
+
"indexed_count": self.indexed_count,
|
|
306
|
+
"failed_count": self.failed_count,
|
|
307
|
+
"duration_ms": self.duration_ms,
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
# === Discovery Events ===
|
|
312
|
+
|
|
313
|
+
|
|
314
|
+
@dataclass
|
|
315
|
+
class DiscoveryStartedEvent(Event):
|
|
316
|
+
"""Event emitted when a discovery job starts.
|
|
317
|
+
|
|
318
|
+
Attributes:
|
|
319
|
+
url: The starting URL for discovery.
|
|
320
|
+
job_id: The discovery job ID.
|
|
321
|
+
config: Discovery configuration as dictionary.
|
|
322
|
+
"""
|
|
323
|
+
|
|
324
|
+
url: str = ""
|
|
325
|
+
job_id: str = ""
|
|
326
|
+
config: dict[str, Any] = field(default_factory=dict)
|
|
327
|
+
|
|
328
|
+
def __post_init__(self) -> None:
|
|
329
|
+
"""Set event type."""
|
|
330
|
+
self.event_type = EventType.DISCOVERY_STARTED
|
|
331
|
+
self.data = {
|
|
332
|
+
"url": self.url,
|
|
333
|
+
"job_id": self.job_id,
|
|
334
|
+
"config": self.config,
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
@dataclass
|
|
339
|
+
class DiscoveryProgressEvent(Event):
|
|
340
|
+
"""Event emitted during discovery progress updates.
|
|
341
|
+
|
|
342
|
+
Attributes:
|
|
343
|
+
job_id: The discovery job ID.
|
|
344
|
+
percent: Progress percentage (0-100).
|
|
345
|
+
pages_crawled: Number of pages crawled so far.
|
|
346
|
+
urls_discovered: Number of URLs discovered so far.
|
|
347
|
+
current_depth: Current crawl depth.
|
|
348
|
+
message: Human-readable progress message.
|
|
349
|
+
"""
|
|
350
|
+
|
|
351
|
+
job_id: str = ""
|
|
352
|
+
percent: int = 0
|
|
353
|
+
pages_crawled: int = 0
|
|
354
|
+
urls_discovered: int = 0
|
|
355
|
+
current_depth: int = 0
|
|
356
|
+
message: str = ""
|
|
357
|
+
|
|
358
|
+
def __post_init__(self) -> None:
|
|
359
|
+
"""Set event type."""
|
|
360
|
+
self.event_type = EventType.DISCOVERY_PROGRESS
|
|
361
|
+
self.data = {
|
|
362
|
+
"job_id": self.job_id,
|
|
363
|
+
"percent": self.percent,
|
|
364
|
+
"pages_crawled": self.pages_crawled,
|
|
365
|
+
"urls_discovered": self.urls_discovered,
|
|
366
|
+
"current_depth": self.current_depth,
|
|
367
|
+
"message": self.message,
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
@dataclass
|
|
372
|
+
class DiscoveryCompletedEvent(Event):
|
|
373
|
+
"""Event emitted when discovery completes successfully.
|
|
374
|
+
|
|
375
|
+
Attributes:
|
|
376
|
+
job_id: The discovery job ID.
|
|
377
|
+
urls_count: Total number of URLs discovered.
|
|
378
|
+
pages_crawled: Total number of pages crawled.
|
|
379
|
+
duration_seconds: Total discovery duration.
|
|
380
|
+
errors: Number of errors encountered during discovery.
|
|
381
|
+
"""
|
|
382
|
+
|
|
383
|
+
job_id: str = ""
|
|
384
|
+
urls_count: int = 0
|
|
385
|
+
pages_crawled: int = 0
|
|
386
|
+
duration_seconds: float = 0.0
|
|
387
|
+
errors: int = 0
|
|
388
|
+
|
|
389
|
+
def __post_init__(self) -> None:
|
|
390
|
+
"""Set event type."""
|
|
391
|
+
self.event_type = EventType.DISCOVERY_COMPLETED
|
|
392
|
+
self.data = {
|
|
393
|
+
"job_id": self.job_id,
|
|
394
|
+
"urls_count": self.urls_count,
|
|
395
|
+
"pages_crawled": self.pages_crawled,
|
|
396
|
+
"duration_seconds": self.duration_seconds,
|
|
397
|
+
"errors": self.errors,
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
|
|
401
|
+
@dataclass
|
|
402
|
+
class DiscoveryFailedEvent(Event):
|
|
403
|
+
"""Event emitted when discovery fails.
|
|
404
|
+
|
|
405
|
+
Attributes:
|
|
406
|
+
job_id: The discovery job ID.
|
|
407
|
+
error: Error message describing the failure.
|
|
408
|
+
"""
|
|
409
|
+
|
|
410
|
+
job_id: str = ""
|
|
411
|
+
error: str = ""
|
|
412
|
+
|
|
413
|
+
def __post_init__(self) -> None:
|
|
414
|
+
"""Set event type."""
|
|
415
|
+
self.event_type = EventType.DISCOVERY_FAILED
|
|
416
|
+
self.data = {
|
|
417
|
+
"job_id": self.job_id,
|
|
418
|
+
"error": self.error,
|
|
419
|
+
}
|
|
@@ -405,3 +405,230 @@ class DocumentNotFoundError(KnowledgeError):
|
|
|
405
405
|
self.details["doc_id"] = doc_id
|
|
406
406
|
if index_name:
|
|
407
407
|
self.details["index_name"] = index_name
|
|
408
|
+
|
|
409
|
+
|
|
410
|
+
# === Memory Exceptions ===
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
class MemoryError(KnowledgeError):
|
|
414
|
+
"""Base exception for memory operations.
|
|
415
|
+
|
|
416
|
+
Raised when memory operations fail.
|
|
417
|
+
"""
|
|
418
|
+
|
|
419
|
+
pass
|
|
420
|
+
|
|
421
|
+
|
|
422
|
+
class ContainerNotFoundError(MemoryError):
|
|
423
|
+
"""Container does not exist.
|
|
424
|
+
|
|
425
|
+
Raised when a memory container cannot be found.
|
|
426
|
+
"""
|
|
427
|
+
|
|
428
|
+
def __init__(
|
|
429
|
+
self,
|
|
430
|
+
message: str = "Container not found",
|
|
431
|
+
*,
|
|
432
|
+
container_id: str | None = None,
|
|
433
|
+
**kwargs: Any,
|
|
434
|
+
) -> None:
|
|
435
|
+
super().__init__(message, **kwargs)
|
|
436
|
+
self.container_id = container_id
|
|
437
|
+
if container_id:
|
|
438
|
+
self.details["container_id"] = container_id
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
class ContainerExistsError(MemoryError):
|
|
442
|
+
"""Container already exists.
|
|
443
|
+
|
|
444
|
+
Raised when attempting to create a container that already exists.
|
|
445
|
+
"""
|
|
446
|
+
|
|
447
|
+
def __init__(
|
|
448
|
+
self,
|
|
449
|
+
message: str = "Container already exists",
|
|
450
|
+
*,
|
|
451
|
+
container_name: str | None = None,
|
|
452
|
+
**kwargs: Any,
|
|
453
|
+
) -> None:
|
|
454
|
+
super().__init__(message, **kwargs)
|
|
455
|
+
self.container_name = container_name
|
|
456
|
+
if container_name:
|
|
457
|
+
self.details["container_name"] = container_name
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
class SessionNotFoundError(MemoryError):
|
|
461
|
+
"""Session does not exist.
|
|
462
|
+
|
|
463
|
+
Raised when a session cannot be found.
|
|
464
|
+
"""
|
|
465
|
+
|
|
466
|
+
def __init__(
|
|
467
|
+
self,
|
|
468
|
+
message: str = "Session not found",
|
|
469
|
+
*,
|
|
470
|
+
session_id: str | None = None,
|
|
471
|
+
container_id: str | None = None,
|
|
472
|
+
**kwargs: Any,
|
|
473
|
+
) -> None:
|
|
474
|
+
super().__init__(message, **kwargs)
|
|
475
|
+
self.session_id = session_id
|
|
476
|
+
self.container_id = container_id
|
|
477
|
+
if session_id:
|
|
478
|
+
self.details["session_id"] = session_id
|
|
479
|
+
if container_id:
|
|
480
|
+
self.details["container_id"] = container_id
|
|
481
|
+
|
|
482
|
+
|
|
483
|
+
class InferenceError(MemoryError):
|
|
484
|
+
"""LLM inference failed.
|
|
485
|
+
|
|
486
|
+
Raised when LLM inference for memory extraction fails.
|
|
487
|
+
"""
|
|
488
|
+
|
|
489
|
+
def __init__(
|
|
490
|
+
self,
|
|
491
|
+
message: str = "LLM inference failed",
|
|
492
|
+
*,
|
|
493
|
+
model_id: str | None = None,
|
|
494
|
+
strategy: str | None = None,
|
|
495
|
+
**kwargs: Any,
|
|
496
|
+
) -> None:
|
|
497
|
+
super().__init__(message, **kwargs)
|
|
498
|
+
self.model_id = model_id
|
|
499
|
+
self.strategy = strategy
|
|
500
|
+
if model_id:
|
|
501
|
+
self.details["model_id"] = model_id
|
|
502
|
+
if strategy:
|
|
503
|
+
self.details["strategy"] = strategy
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
class InferenceTimeoutError(InferenceError):
|
|
507
|
+
"""LLM inference timed out.
|
|
508
|
+
|
|
509
|
+
Raised when LLM inference exceeds the configured timeout.
|
|
510
|
+
"""
|
|
511
|
+
|
|
512
|
+
def __init__(
|
|
513
|
+
self,
|
|
514
|
+
message: str = "LLM inference timed out",
|
|
515
|
+
*,
|
|
516
|
+
timeout_seconds: float | None = None,
|
|
517
|
+
**kwargs: Any,
|
|
518
|
+
) -> None:
|
|
519
|
+
super().__init__(message, **kwargs)
|
|
520
|
+
self.timeout_seconds = timeout_seconds
|
|
521
|
+
if timeout_seconds:
|
|
522
|
+
self.details["timeout_seconds"] = timeout_seconds
|
|
523
|
+
|
|
524
|
+
|
|
525
|
+
class MemoryConfigurationError(MemoryError):
|
|
526
|
+
"""Memory is not properly configured.
|
|
527
|
+
|
|
528
|
+
Raised when memory configuration is missing or invalid.
|
|
529
|
+
"""
|
|
530
|
+
|
|
531
|
+
def __init__(
|
|
532
|
+
self,
|
|
533
|
+
message: str = "Memory configuration error",
|
|
534
|
+
*,
|
|
535
|
+
missing_config: list[str] | None = None,
|
|
536
|
+
**kwargs: Any,
|
|
537
|
+
) -> None:
|
|
538
|
+
super().__init__(message, **kwargs)
|
|
539
|
+
self.missing_config = missing_config
|
|
540
|
+
if missing_config:
|
|
541
|
+
self.details["missing_config"] = missing_config
|
|
542
|
+
|
|
543
|
+
|
|
544
|
+
# === Discovery Exceptions ===
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
class DiscoveryError(KnowledgeError):
|
|
548
|
+
"""Base exception for discovery operations.
|
|
549
|
+
|
|
550
|
+
Raised when website discovery fails.
|
|
551
|
+
All discovery-related exceptions inherit from this class.
|
|
552
|
+
"""
|
|
553
|
+
|
|
554
|
+
def __init__(
|
|
555
|
+
self,
|
|
556
|
+
message: str = "Discovery error",
|
|
557
|
+
*,
|
|
558
|
+
job_id: str | None = None,
|
|
559
|
+
source: str | None = None,
|
|
560
|
+
**kwargs: Any,
|
|
561
|
+
) -> None:
|
|
562
|
+
"""Initialize the exception.
|
|
563
|
+
|
|
564
|
+
Args:
|
|
565
|
+
message: Human-readable error message.
|
|
566
|
+
job_id: The discovery job ID if available.
|
|
567
|
+
source: The source URL being discovered.
|
|
568
|
+
**kwargs: Additional arguments for parent class.
|
|
569
|
+
"""
|
|
570
|
+
super().__init__(message, **kwargs)
|
|
571
|
+
self.job_id = job_id
|
|
572
|
+
self.source = source
|
|
573
|
+
if job_id:
|
|
574
|
+
self.details["job_id"] = job_id
|
|
575
|
+
if source:
|
|
576
|
+
self.details["source"] = source
|
|
577
|
+
|
|
578
|
+
|
|
579
|
+
class DiscoveryTimeoutError(DiscoveryError):
|
|
580
|
+
"""Discovery job timed out.
|
|
581
|
+
|
|
582
|
+
Raised when a discovery job exceeds its configured timeout
|
|
583
|
+
while waiting for completion.
|
|
584
|
+
"""
|
|
585
|
+
|
|
586
|
+
def __init__(
|
|
587
|
+
self,
|
|
588
|
+
message: str = "Discovery job timed out",
|
|
589
|
+
*,
|
|
590
|
+
elapsed: float | None = None,
|
|
591
|
+
timeout: float | None = None,
|
|
592
|
+
**kwargs: Any,
|
|
593
|
+
) -> None:
|
|
594
|
+
"""Initialize the exception.
|
|
595
|
+
|
|
596
|
+
Args:
|
|
597
|
+
message: Human-readable error message.
|
|
598
|
+
elapsed: Time elapsed before timeout.
|
|
599
|
+
timeout: The timeout value that was exceeded.
|
|
600
|
+
**kwargs: Additional arguments for parent class.
|
|
601
|
+
"""
|
|
602
|
+
super().__init__(message, **kwargs)
|
|
603
|
+
self.elapsed = elapsed
|
|
604
|
+
self.timeout = timeout
|
|
605
|
+
if elapsed is not None:
|
|
606
|
+
self.details["elapsed"] = elapsed
|
|
607
|
+
if timeout is not None:
|
|
608
|
+
self.details["timeout"] = timeout
|
|
609
|
+
|
|
610
|
+
|
|
611
|
+
class DiscoveryJobFailedError(DiscoveryError):
|
|
612
|
+
"""Discovery job failed on the server.
|
|
613
|
+
|
|
614
|
+
Raised when a discovery job completes with a failed or cancelled status.
|
|
615
|
+
"""
|
|
616
|
+
|
|
617
|
+
def __init__(
|
|
618
|
+
self,
|
|
619
|
+
message: str = "Discovery job failed",
|
|
620
|
+
*,
|
|
621
|
+
status: str | None = None,
|
|
622
|
+
**kwargs: Any,
|
|
623
|
+
) -> None:
|
|
624
|
+
"""Initialize the exception.
|
|
625
|
+
|
|
626
|
+
Args:
|
|
627
|
+
message: Human-readable error message.
|
|
628
|
+
status: The final job status.
|
|
629
|
+
**kwargs: Additional arguments for parent class.
|
|
630
|
+
"""
|
|
631
|
+
super().__init__(message, **kwargs)
|
|
632
|
+
self.status = status
|
|
633
|
+
if status:
|
|
634
|
+
self.details["status"] = status
|
|
@@ -5,16 +5,33 @@ from gnosisllm_knowledge.core.interfaces.chunker import ITextChunker
|
|
|
5
5
|
from gnosisllm_knowledge.core.interfaces.fetcher import FetchResult, IContentFetcher
|
|
6
6
|
from gnosisllm_knowledge.core.interfaces.indexer import IDocumentIndexer
|
|
7
7
|
from gnosisllm_knowledge.core.interfaces.loader import IContentLoader
|
|
8
|
+
from gnosisllm_knowledge.core.interfaces.memory import (
|
|
9
|
+
IHistoryRetriever,
|
|
10
|
+
IMemoryContainerManager,
|
|
11
|
+
IMemoryRetriever,
|
|
12
|
+
IMemoryStats,
|
|
13
|
+
IMemoryStore,
|
|
14
|
+
ISessionManager,
|
|
15
|
+
)
|
|
8
16
|
from gnosisllm_knowledge.core.interfaces.searcher import IKnowledgeSearcher
|
|
9
17
|
from gnosisllm_knowledge.core.interfaces.setup import ISetupAdapter
|
|
10
18
|
|
|
11
19
|
__all__ = [
|
|
20
|
+
# Content loading
|
|
12
21
|
"IContentLoader",
|
|
13
22
|
"IContentFetcher",
|
|
14
23
|
"FetchResult",
|
|
15
24
|
"ITextChunker",
|
|
25
|
+
# Indexing and search
|
|
16
26
|
"IDocumentIndexer",
|
|
17
27
|
"IKnowledgeSearcher",
|
|
18
28
|
"IAgenticSearcher",
|
|
19
29
|
"ISetupAdapter",
|
|
30
|
+
# Memory
|
|
31
|
+
"IMemoryContainerManager",
|
|
32
|
+
"IMemoryStore",
|
|
33
|
+
"IMemoryRetriever",
|
|
34
|
+
"IHistoryRetriever",
|
|
35
|
+
"ISessionManager",
|
|
36
|
+
"IMemoryStats",
|
|
20
37
|
]
|
|
@@ -1,4 +1,11 @@
|
|
|
1
|
-
"""Agentic searcher protocol - Interface for AI-powered search operations.
|
|
1
|
+
"""Agentic searcher protocol - Interface for AI-powered search operations.
|
|
2
|
+
|
|
3
|
+
Note:
|
|
4
|
+
This library is tenant-agnostic. Multi-tenancy is achieved through index
|
|
5
|
+
isolation (e.g., `knowledge-{account_id}`). Agentic searcher implementations
|
|
6
|
+
should not include tenant filtering logic - callers should use tenant-specific
|
|
7
|
+
indices.
|
|
8
|
+
"""
|
|
2
9
|
|
|
3
10
|
from __future__ import annotations
|
|
4
11
|
|
|
@@ -15,6 +22,9 @@ if TYPE_CHECKING:
|
|
|
15
22
|
class IAgenticSearcher(Protocol):
|
|
16
23
|
"""Protocol for agentic search operations using AI agents.
|
|
17
24
|
|
|
25
|
+
This protocol is tenant-agnostic. Multi-tenancy is achieved through index
|
|
26
|
+
isolation by using tenant-specific index names.
|
|
27
|
+
|
|
18
28
|
Agentic searchers are responsible for:
|
|
19
29
|
- Understanding natural language queries
|
|
20
30
|
- Automatically constructing optimal search strategies
|
|
@@ -107,13 +117,11 @@ class IAgenticSearcher(Protocol):
|
|
|
107
117
|
|
|
108
118
|
async def list_conversations(
|
|
109
119
|
self,
|
|
110
|
-
account_id: str | None = None,
|
|
111
120
|
limit: int = 100,
|
|
112
121
|
) -> list[dict[str, Any]]:
|
|
113
122
|
"""List active conversations.
|
|
114
123
|
|
|
115
124
|
Args:
|
|
116
|
-
account_id: Filter by account (multi-tenant).
|
|
117
125
|
limit: Maximum number of conversations.
|
|
118
126
|
|
|
119
127
|
Returns:
|
|
@@ -1,4 +1,10 @@
|
|
|
1
|
-
"""Document indexer protocol - Interface Segregation Principle.
|
|
1
|
+
"""Document indexer protocol - Interface Segregation Principle.
|
|
2
|
+
|
|
3
|
+
Note:
|
|
4
|
+
This library is tenant-agnostic. Multi-tenancy is achieved through index
|
|
5
|
+
isolation (e.g., `knowledge-{account_id}`). Indexer implementations should
|
|
6
|
+
not include tenant filtering logic - callers should use tenant-specific indices.
|
|
7
|
+
"""
|
|
2
8
|
|
|
3
9
|
from __future__ import annotations
|
|
4
10
|
|
|
@@ -14,6 +20,9 @@ if TYPE_CHECKING:
|
|
|
14
20
|
class IDocumentIndexer(Protocol):
|
|
15
21
|
"""Protocol for indexing documents into a search backend.
|
|
16
22
|
|
|
23
|
+
This protocol is tenant-agnostic. Multi-tenancy is achieved through index
|
|
24
|
+
isolation by using tenant-specific index names.
|
|
25
|
+
|
|
17
26
|
Document indexers are responsible for:
|
|
18
27
|
- Generating embeddings for documents
|
|
19
28
|
- Storing documents in the search backend
|