gnosisllm-knowledge 0.2.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. gnosisllm_knowledge/__init__.py +91 -39
  2. gnosisllm_knowledge/api/__init__.py +3 -2
  3. gnosisllm_knowledge/api/knowledge.py +502 -32
  4. gnosisllm_knowledge/api/memory.py +966 -0
  5. gnosisllm_knowledge/backends/__init__.py +14 -5
  6. gnosisllm_knowledge/backends/memory/indexer.py +27 -2
  7. gnosisllm_knowledge/backends/memory/searcher.py +111 -10
  8. gnosisllm_knowledge/backends/opensearch/agentic.py +355 -48
  9. gnosisllm_knowledge/backends/opensearch/config.py +49 -28
  10. gnosisllm_knowledge/backends/opensearch/indexer.py +49 -3
  11. gnosisllm_knowledge/backends/opensearch/mappings.py +14 -5
  12. gnosisllm_knowledge/backends/opensearch/memory/__init__.py +12 -0
  13. gnosisllm_knowledge/backends/opensearch/memory/client.py +1380 -0
  14. gnosisllm_knowledge/backends/opensearch/memory/config.py +127 -0
  15. gnosisllm_knowledge/backends/opensearch/memory/setup.py +322 -0
  16. gnosisllm_knowledge/backends/opensearch/queries.py +33 -33
  17. gnosisllm_knowledge/backends/opensearch/searcher.py +238 -0
  18. gnosisllm_knowledge/backends/opensearch/setup.py +308 -148
  19. gnosisllm_knowledge/cli/app.py +436 -31
  20. gnosisllm_knowledge/cli/commands/agentic.py +26 -9
  21. gnosisllm_knowledge/cli/commands/load.py +169 -19
  22. gnosisllm_knowledge/cli/commands/memory.py +733 -0
  23. gnosisllm_knowledge/cli/commands/search.py +9 -10
  24. gnosisllm_knowledge/cli/commands/setup.py +49 -23
  25. gnosisllm_knowledge/cli/display/service.py +43 -0
  26. gnosisllm_knowledge/cli/utils/config.py +62 -4
  27. gnosisllm_knowledge/core/domain/__init__.py +54 -0
  28. gnosisllm_knowledge/core/domain/discovery.py +166 -0
  29. gnosisllm_knowledge/core/domain/document.py +19 -19
  30. gnosisllm_knowledge/core/domain/memory.py +440 -0
  31. gnosisllm_knowledge/core/domain/result.py +11 -3
  32. gnosisllm_knowledge/core/domain/search.py +12 -25
  33. gnosisllm_knowledge/core/domain/source.py +11 -12
  34. gnosisllm_knowledge/core/events/__init__.py +8 -0
  35. gnosisllm_knowledge/core/events/types.py +198 -5
  36. gnosisllm_knowledge/core/exceptions.py +227 -0
  37. gnosisllm_knowledge/core/interfaces/__init__.py +17 -0
  38. gnosisllm_knowledge/core/interfaces/agentic.py +11 -3
  39. gnosisllm_knowledge/core/interfaces/indexer.py +10 -1
  40. gnosisllm_knowledge/core/interfaces/memory.py +524 -0
  41. gnosisllm_knowledge/core/interfaces/searcher.py +10 -1
  42. gnosisllm_knowledge/core/interfaces/streaming.py +133 -0
  43. gnosisllm_knowledge/core/streaming/__init__.py +36 -0
  44. gnosisllm_knowledge/core/streaming/pipeline.py +228 -0
  45. gnosisllm_knowledge/fetchers/__init__.py +8 -0
  46. gnosisllm_knowledge/fetchers/config.py +27 -0
  47. gnosisllm_knowledge/fetchers/neoreader.py +31 -3
  48. gnosisllm_knowledge/fetchers/neoreader_discovery.py +505 -0
  49. gnosisllm_knowledge/loaders/__init__.py +5 -1
  50. gnosisllm_knowledge/loaders/base.py +3 -4
  51. gnosisllm_knowledge/loaders/discovery.py +338 -0
  52. gnosisllm_knowledge/loaders/discovery_streaming.py +343 -0
  53. gnosisllm_knowledge/loaders/factory.py +46 -0
  54. gnosisllm_knowledge/loaders/sitemap.py +129 -1
  55. gnosisllm_knowledge/loaders/sitemap_streaming.py +258 -0
  56. gnosisllm_knowledge/services/indexing.py +100 -93
  57. gnosisllm_knowledge/services/search.py +84 -31
  58. gnosisllm_knowledge/services/streaming_pipeline.py +334 -0
  59. {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/METADATA +73 -10
  60. gnosisllm_knowledge-0.4.0.dist-info/RECORD +81 -0
  61. gnosisllm_knowledge-0.2.0.dist-info/RECORD +0 -64
  62. {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/WHEEL +0 -0
  63. {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.4.0.dist-info}/entry_points.txt +0 -0
@@ -10,6 +10,11 @@ from typing import Any
10
10
  class SourceConfig:
11
11
  """Configuration for a content source.
12
12
 
13
+ Note:
14
+ This library is tenant-agnostic. Multi-tenancy is achieved through index
15
+ isolation (e.g., `knowledge-{account_id}`). Tenant information should be
16
+ managed by the caller, not embedded in source configuration.
17
+
13
18
  Attributes:
14
19
  url: The source URL or path.
15
20
  source_type: Type of source (website, sitemap, file, etc.).
@@ -26,8 +31,7 @@ class SourceConfig:
26
31
  remove_selector: CSS selector for elements to remove.
27
32
  timeout: Request timeout in seconds.
28
33
 
29
- Multi-tenancy:
30
- account_id: Account/tenant identifier.
34
+ Collection:
31
35
  collection_id: Collection identifier.
32
36
  source_id: Source identifier within collection.
33
37
  """
@@ -47,8 +51,7 @@ class SourceConfig:
47
51
  remove_selector: str | None = None
48
52
  timeout: int | None = None
49
53
 
50
- # Multi-tenancy
51
- account_id: str | None = None
54
+ # Collection
52
55
  collection_id: str | None = None
53
56
  source_id: str | None = None
54
57
 
@@ -73,26 +76,23 @@ class SourceConfig:
73
76
  target_selector=self.target_selector,
74
77
  remove_selector=self.remove_selector,
75
78
  timeout=self.timeout,
76
- account_id=self.account_id,
77
79
  collection_id=self.collection_id,
78
80
  source_id=self.source_id,
79
81
  )
80
82
 
81
- def with_tenant(
83
+ def with_collection(
82
84
  self,
83
- account_id: str,
84
- collection_id: str | None = None,
85
+ collection_id: str,
85
86
  source_id: str | None = None,
86
87
  ) -> SourceConfig:
87
- """Create a copy with tenant information.
88
+ """Create a copy with collection information.
88
89
 
89
90
  Args:
90
- account_id: Account/tenant identifier.
91
91
  collection_id: Collection identifier.
92
92
  source_id: Source identifier.
93
93
 
94
94
  Returns:
95
- New SourceConfig with tenant information.
95
+ New SourceConfig with collection information.
96
96
  """
97
97
  return SourceConfig(
98
98
  url=self.url,
@@ -105,7 +105,6 @@ class SourceConfig:
105
105
  target_selector=self.target_selector,
106
106
  remove_selector=self.remove_selector,
107
107
  timeout=self.timeout,
108
- account_id=account_id,
109
108
  collection_id=collection_id,
110
109
  source_id=source_id,
111
110
  )
@@ -4,6 +4,10 @@ from gnosisllm_knowledge.core.events.emitter import EventEmitter
4
4
  from gnosisllm_knowledge.core.events.types import (
5
5
  BatchCompletedEvent,
6
6
  BatchStartedEvent,
7
+ DiscoveryCompletedEvent,
8
+ DiscoveryFailedEvent,
9
+ DiscoveryProgressEvent,
10
+ DiscoveryStartedEvent,
7
11
  DocumentIndexedEvent,
8
12
  DocumentLoadedEvent,
9
13
  Event,
@@ -20,4 +24,8 @@ __all__ = [
20
24
  "SitemapDiscoveryEvent",
21
25
  "BatchStartedEvent",
22
26
  "BatchCompletedEvent",
27
+ "DiscoveryStartedEvent",
28
+ "DiscoveryProgressEvent",
29
+ "DiscoveryCompletedEvent",
30
+ "DiscoveryFailedEvent",
23
31
  ]
@@ -14,6 +14,7 @@ class EventType(str, Enum):
14
14
  Events are organized by category:
15
15
  - Loading events: Document and content loading
16
16
  - Indexing events: Document indexing operations
17
+ - Streaming events: Streaming pipeline progress
17
18
  - Search events: Search and retrieval operations
18
19
  - Agentic events: AI-powered operations
19
20
  - Setup events: Backend setup operations
@@ -33,6 +34,17 @@ class EventType(str, Enum):
33
34
  LOAD_FAILED = "load_failed"
34
35
  SITEMAP_DISCOVERED = "sitemap_discovered"
35
36
 
37
+ # Discovery events
38
+ DISCOVERY_STARTED = "discovery_started"
39
+ DISCOVERY_PROGRESS = "discovery_progress"
40
+ DISCOVERY_COMPLETED = "discovery_completed"
41
+ DISCOVERY_FAILED = "discovery_failed"
42
+
43
+ # Streaming events
44
+ STREAMING_PROGRESS = "streaming_progress"
45
+ URL_BATCH_PROCESSED = "url_batch_processed"
46
+ STREAMING_COMPLETED = "streaming_completed"
47
+
36
48
  # Indexing events
37
49
  INDEX_STARTED = "index_started"
38
50
  DOCUMENT_INDEXED = "document_indexed"
@@ -85,11 +97,14 @@ class EventType(str, Enum):
85
97
  class Event:
86
98
  """Base event class.
87
99
 
100
+ Note:
101
+ This library is tenant-agnostic. Multi-tenancy is achieved through index
102
+ isolation. Any tenant-specific context should be passed in the data dict.
103
+
88
104
  Attributes:
89
105
  event_type: The type of event.
90
106
  timestamp: When the event occurred.
91
- data: Additional event data.
92
- account_id: Account ID for multi-tenant context.
107
+ data: Additional event data (can include tenant context for audit).
93
108
  user_id: User ID if applicable.
94
109
  request_id: Request ID for tracing.
95
110
  trace_id: Distributed trace ID.
@@ -101,7 +116,6 @@ class Event:
101
116
  data: dict[str, Any] = field(default_factory=dict)
102
117
 
103
118
  # Context
104
- account_id: str | None = None
105
119
  user_id: str | None = None
106
120
  request_id: str | None = None
107
121
 
@@ -111,7 +125,6 @@ class Event:
111
125
 
112
126
  def with_context(
113
127
  self,
114
- account_id: str | None = None,
115
128
  user_id: str | None = None,
116
129
  request_id: str | None = None,
117
130
  ) -> Event:
@@ -120,7 +133,6 @@ class Event:
120
133
  event_type=self.event_type,
121
134
  timestamp=self.timestamp,
122
135
  data=self.data.copy(),
123
- account_id=account_id or self.account_id,
124
136
  user_id=user_id or self.user_id,
125
137
  request_id=request_id or self.request_id,
126
138
  trace_id=self.trace_id,
@@ -224,3 +236,184 @@ class BatchCompletedEvent(Event):
224
236
  "failure_count": self.failure_count,
225
237
  "duration_ms": self.duration_ms,
226
238
  }
239
+
240
+
241
+ @dataclass
242
+ class StreamingProgressEvent(Event):
243
+ """Progress event for streaming operations.
244
+
245
+ Emitted periodically during streaming pipeline execution to
246
+ provide visibility into progress.
247
+ """
248
+
249
+ urls_discovered: int = 0
250
+ urls_processed: int = 0
251
+ documents_indexed: int = 0
252
+ documents_failed: int = 0
253
+ phase: str = "unknown"
254
+ memory_mb: float | None = None
255
+
256
+ def __post_init__(self) -> None:
257
+ """Set event type."""
258
+ self.event_type = EventType.STREAMING_PROGRESS
259
+ self.data = {
260
+ "urls_discovered": self.urls_discovered,
261
+ "urls_processed": self.urls_processed,
262
+ "documents_indexed": self.documents_indexed,
263
+ "documents_failed": self.documents_failed,
264
+ "phase": self.phase,
265
+ "memory_mb": self.memory_mb,
266
+ }
267
+
268
+
269
+ @dataclass
270
+ class UrlBatchProcessedEvent(Event):
271
+ """Event emitted when a batch of URLs is processed."""
272
+
273
+ batch_index: int = 0
274
+ urls_in_batch: int = 0
275
+ documents_created: int = 0
276
+ total_urls_processed: int = 0
277
+
278
+ def __post_init__(self) -> None:
279
+ """Set event type."""
280
+ self.event_type = EventType.URL_BATCH_PROCESSED
281
+ self.data = {
282
+ "batch_index": self.batch_index,
283
+ "urls_in_batch": self.urls_in_batch,
284
+ "documents_created": self.documents_created,
285
+ "total_urls_processed": self.total_urls_processed,
286
+ }
287
+
288
+
289
+ @dataclass
290
+ class StreamingCompletedEvent(Event):
291
+ """Event emitted when streaming pipeline completes."""
292
+
293
+ total_urls: int = 0
294
+ total_documents: int = 0
295
+ indexed_count: int = 0
296
+ failed_count: int = 0
297
+ duration_ms: float = 0.0
298
+
299
+ def __post_init__(self) -> None:
300
+ """Set event type."""
301
+ self.event_type = EventType.STREAMING_COMPLETED
302
+ self.data = {
303
+ "total_urls": self.total_urls,
304
+ "total_documents": self.total_documents,
305
+ "indexed_count": self.indexed_count,
306
+ "failed_count": self.failed_count,
307
+ "duration_ms": self.duration_ms,
308
+ }
309
+
310
+
311
+ # === Discovery Events ===
312
+
313
+
314
+ @dataclass
315
+ class DiscoveryStartedEvent(Event):
316
+ """Event emitted when a discovery job starts.
317
+
318
+ Attributes:
319
+ url: The starting URL for discovery.
320
+ job_id: The discovery job ID.
321
+ config: Discovery configuration as dictionary.
322
+ """
323
+
324
+ url: str = ""
325
+ job_id: str = ""
326
+ config: dict[str, Any] = field(default_factory=dict)
327
+
328
+ def __post_init__(self) -> None:
329
+ """Set event type."""
330
+ self.event_type = EventType.DISCOVERY_STARTED
331
+ self.data = {
332
+ "url": self.url,
333
+ "job_id": self.job_id,
334
+ "config": self.config,
335
+ }
336
+
337
+
338
+ @dataclass
339
+ class DiscoveryProgressEvent(Event):
340
+ """Event emitted during discovery progress updates.
341
+
342
+ Attributes:
343
+ job_id: The discovery job ID.
344
+ percent: Progress percentage (0-100).
345
+ pages_crawled: Number of pages crawled so far.
346
+ urls_discovered: Number of URLs discovered so far.
347
+ current_depth: Current crawl depth.
348
+ message: Human-readable progress message.
349
+ """
350
+
351
+ job_id: str = ""
352
+ percent: int = 0
353
+ pages_crawled: int = 0
354
+ urls_discovered: int = 0
355
+ current_depth: int = 0
356
+ message: str = ""
357
+
358
+ def __post_init__(self) -> None:
359
+ """Set event type."""
360
+ self.event_type = EventType.DISCOVERY_PROGRESS
361
+ self.data = {
362
+ "job_id": self.job_id,
363
+ "percent": self.percent,
364
+ "pages_crawled": self.pages_crawled,
365
+ "urls_discovered": self.urls_discovered,
366
+ "current_depth": self.current_depth,
367
+ "message": self.message,
368
+ }
369
+
370
+
371
+ @dataclass
372
+ class DiscoveryCompletedEvent(Event):
373
+ """Event emitted when discovery completes successfully.
374
+
375
+ Attributes:
376
+ job_id: The discovery job ID.
377
+ urls_count: Total number of URLs discovered.
378
+ pages_crawled: Total number of pages crawled.
379
+ duration_seconds: Total discovery duration.
380
+ errors: Number of errors encountered during discovery.
381
+ """
382
+
383
+ job_id: str = ""
384
+ urls_count: int = 0
385
+ pages_crawled: int = 0
386
+ duration_seconds: float = 0.0
387
+ errors: int = 0
388
+
389
+ def __post_init__(self) -> None:
390
+ """Set event type."""
391
+ self.event_type = EventType.DISCOVERY_COMPLETED
392
+ self.data = {
393
+ "job_id": self.job_id,
394
+ "urls_count": self.urls_count,
395
+ "pages_crawled": self.pages_crawled,
396
+ "duration_seconds": self.duration_seconds,
397
+ "errors": self.errors,
398
+ }
399
+
400
+
401
+ @dataclass
402
+ class DiscoveryFailedEvent(Event):
403
+ """Event emitted when discovery fails.
404
+
405
+ Attributes:
406
+ job_id: The discovery job ID.
407
+ error: Error message describing the failure.
408
+ """
409
+
410
+ job_id: str = ""
411
+ error: str = ""
412
+
413
+ def __post_init__(self) -> None:
414
+ """Set event type."""
415
+ self.event_type = EventType.DISCOVERY_FAILED
416
+ self.data = {
417
+ "job_id": self.job_id,
418
+ "error": self.error,
419
+ }
@@ -405,3 +405,230 @@ class DocumentNotFoundError(KnowledgeError):
405
405
  self.details["doc_id"] = doc_id
406
406
  if index_name:
407
407
  self.details["index_name"] = index_name
408
+
409
+
410
+ # === Memory Exceptions ===
411
+
412
+
413
+ class MemoryError(KnowledgeError):
414
+ """Base exception for memory operations.
415
+
416
+ Raised when memory operations fail.
417
+ """
418
+
419
+ pass
420
+
421
+
422
+ class ContainerNotFoundError(MemoryError):
423
+ """Container does not exist.
424
+
425
+ Raised when a memory container cannot be found.
426
+ """
427
+
428
+ def __init__(
429
+ self,
430
+ message: str = "Container not found",
431
+ *,
432
+ container_id: str | None = None,
433
+ **kwargs: Any,
434
+ ) -> None:
435
+ super().__init__(message, **kwargs)
436
+ self.container_id = container_id
437
+ if container_id:
438
+ self.details["container_id"] = container_id
439
+
440
+
441
+ class ContainerExistsError(MemoryError):
442
+ """Container already exists.
443
+
444
+ Raised when attempting to create a container that already exists.
445
+ """
446
+
447
+ def __init__(
448
+ self,
449
+ message: str = "Container already exists",
450
+ *,
451
+ container_name: str | None = None,
452
+ **kwargs: Any,
453
+ ) -> None:
454
+ super().__init__(message, **kwargs)
455
+ self.container_name = container_name
456
+ if container_name:
457
+ self.details["container_name"] = container_name
458
+
459
+
460
+ class SessionNotFoundError(MemoryError):
461
+ """Session does not exist.
462
+
463
+ Raised when a session cannot be found.
464
+ """
465
+
466
+ def __init__(
467
+ self,
468
+ message: str = "Session not found",
469
+ *,
470
+ session_id: str | None = None,
471
+ container_id: str | None = None,
472
+ **kwargs: Any,
473
+ ) -> None:
474
+ super().__init__(message, **kwargs)
475
+ self.session_id = session_id
476
+ self.container_id = container_id
477
+ if session_id:
478
+ self.details["session_id"] = session_id
479
+ if container_id:
480
+ self.details["container_id"] = container_id
481
+
482
+
483
+ class InferenceError(MemoryError):
484
+ """LLM inference failed.
485
+
486
+ Raised when LLM inference for memory extraction fails.
487
+ """
488
+
489
+ def __init__(
490
+ self,
491
+ message: str = "LLM inference failed",
492
+ *,
493
+ model_id: str | None = None,
494
+ strategy: str | None = None,
495
+ **kwargs: Any,
496
+ ) -> None:
497
+ super().__init__(message, **kwargs)
498
+ self.model_id = model_id
499
+ self.strategy = strategy
500
+ if model_id:
501
+ self.details["model_id"] = model_id
502
+ if strategy:
503
+ self.details["strategy"] = strategy
504
+
505
+
506
+ class InferenceTimeoutError(InferenceError):
507
+ """LLM inference timed out.
508
+
509
+ Raised when LLM inference exceeds the configured timeout.
510
+ """
511
+
512
+ def __init__(
513
+ self,
514
+ message: str = "LLM inference timed out",
515
+ *,
516
+ timeout_seconds: float | None = None,
517
+ **kwargs: Any,
518
+ ) -> None:
519
+ super().__init__(message, **kwargs)
520
+ self.timeout_seconds = timeout_seconds
521
+ if timeout_seconds:
522
+ self.details["timeout_seconds"] = timeout_seconds
523
+
524
+
525
+ class MemoryConfigurationError(MemoryError):
526
+ """Memory is not properly configured.
527
+
528
+ Raised when memory configuration is missing or invalid.
529
+ """
530
+
531
+ def __init__(
532
+ self,
533
+ message: str = "Memory configuration error",
534
+ *,
535
+ missing_config: list[str] | None = None,
536
+ **kwargs: Any,
537
+ ) -> None:
538
+ super().__init__(message, **kwargs)
539
+ self.missing_config = missing_config
540
+ if missing_config:
541
+ self.details["missing_config"] = missing_config
542
+
543
+
544
+ # === Discovery Exceptions ===
545
+
546
+
547
+ class DiscoveryError(KnowledgeError):
548
+ """Base exception for discovery operations.
549
+
550
+ Raised when website discovery fails.
551
+ All discovery-related exceptions inherit from this class.
552
+ """
553
+
554
+ def __init__(
555
+ self,
556
+ message: str = "Discovery error",
557
+ *,
558
+ job_id: str | None = None,
559
+ source: str | None = None,
560
+ **kwargs: Any,
561
+ ) -> None:
562
+ """Initialize the exception.
563
+
564
+ Args:
565
+ message: Human-readable error message.
566
+ job_id: The discovery job ID if available.
567
+ source: The source URL being discovered.
568
+ **kwargs: Additional arguments for parent class.
569
+ """
570
+ super().__init__(message, **kwargs)
571
+ self.job_id = job_id
572
+ self.source = source
573
+ if job_id:
574
+ self.details["job_id"] = job_id
575
+ if source:
576
+ self.details["source"] = source
577
+
578
+
579
+ class DiscoveryTimeoutError(DiscoveryError):
580
+ """Discovery job timed out.
581
+
582
+ Raised when a discovery job exceeds its configured timeout
583
+ while waiting for completion.
584
+ """
585
+
586
+ def __init__(
587
+ self,
588
+ message: str = "Discovery job timed out",
589
+ *,
590
+ elapsed: float | None = None,
591
+ timeout: float | None = None,
592
+ **kwargs: Any,
593
+ ) -> None:
594
+ """Initialize the exception.
595
+
596
+ Args:
597
+ message: Human-readable error message.
598
+ elapsed: Time elapsed before timeout.
599
+ timeout: The timeout value that was exceeded.
600
+ **kwargs: Additional arguments for parent class.
601
+ """
602
+ super().__init__(message, **kwargs)
603
+ self.elapsed = elapsed
604
+ self.timeout = timeout
605
+ if elapsed is not None:
606
+ self.details["elapsed"] = elapsed
607
+ if timeout is not None:
608
+ self.details["timeout"] = timeout
609
+
610
+
611
+ class DiscoveryJobFailedError(DiscoveryError):
612
+ """Discovery job failed on the server.
613
+
614
+ Raised when a discovery job completes with a failed or cancelled status.
615
+ """
616
+
617
+ def __init__(
618
+ self,
619
+ message: str = "Discovery job failed",
620
+ *,
621
+ status: str | None = None,
622
+ **kwargs: Any,
623
+ ) -> None:
624
+ """Initialize the exception.
625
+
626
+ Args:
627
+ message: Human-readable error message.
628
+ status: The final job status.
629
+ **kwargs: Additional arguments for parent class.
630
+ """
631
+ super().__init__(message, **kwargs)
632
+ self.status = status
633
+ if status:
634
+ self.details["status"] = status
@@ -5,16 +5,33 @@ from gnosisllm_knowledge.core.interfaces.chunker import ITextChunker
5
5
  from gnosisllm_knowledge.core.interfaces.fetcher import FetchResult, IContentFetcher
6
6
  from gnosisllm_knowledge.core.interfaces.indexer import IDocumentIndexer
7
7
  from gnosisllm_knowledge.core.interfaces.loader import IContentLoader
8
+ from gnosisllm_knowledge.core.interfaces.memory import (
9
+ IHistoryRetriever,
10
+ IMemoryContainerManager,
11
+ IMemoryRetriever,
12
+ IMemoryStats,
13
+ IMemoryStore,
14
+ ISessionManager,
15
+ )
8
16
  from gnosisllm_knowledge.core.interfaces.searcher import IKnowledgeSearcher
9
17
  from gnosisllm_knowledge.core.interfaces.setup import ISetupAdapter
10
18
 
11
19
  __all__ = [
20
+ # Content loading
12
21
  "IContentLoader",
13
22
  "IContentFetcher",
14
23
  "FetchResult",
15
24
  "ITextChunker",
25
+ # Indexing and search
16
26
  "IDocumentIndexer",
17
27
  "IKnowledgeSearcher",
18
28
  "IAgenticSearcher",
19
29
  "ISetupAdapter",
30
+ # Memory
31
+ "IMemoryContainerManager",
32
+ "IMemoryStore",
33
+ "IMemoryRetriever",
34
+ "IHistoryRetriever",
35
+ "ISessionManager",
36
+ "IMemoryStats",
20
37
  ]
@@ -1,4 +1,11 @@
1
- """Agentic searcher protocol - Interface for AI-powered search operations."""
1
+ """Agentic searcher protocol - Interface for AI-powered search operations.
2
+
3
+ Note:
4
+ This library is tenant-agnostic. Multi-tenancy is achieved through index
5
+ isolation (e.g., `knowledge-{account_id}`). Agentic searcher implementations
6
+ should not include tenant filtering logic - callers should use tenant-specific
7
+ indices.
8
+ """
2
9
 
3
10
  from __future__ import annotations
4
11
 
@@ -15,6 +22,9 @@ if TYPE_CHECKING:
15
22
  class IAgenticSearcher(Protocol):
16
23
  """Protocol for agentic search operations using AI agents.
17
24
 
25
+ This protocol is tenant-agnostic. Multi-tenancy is achieved through index
26
+ isolation by using tenant-specific index names.
27
+
18
28
  Agentic searchers are responsible for:
19
29
  - Understanding natural language queries
20
30
  - Automatically constructing optimal search strategies
@@ -107,13 +117,11 @@ class IAgenticSearcher(Protocol):
107
117
 
108
118
  async def list_conversations(
109
119
  self,
110
- account_id: str | None = None,
111
120
  limit: int = 100,
112
121
  ) -> list[dict[str, Any]]:
113
122
  """List active conversations.
114
123
 
115
124
  Args:
116
- account_id: Filter by account (multi-tenant).
117
125
  limit: Maximum number of conversations.
118
126
 
119
127
  Returns:
@@ -1,4 +1,10 @@
1
- """Document indexer protocol - Interface Segregation Principle."""
1
+ """Document indexer protocol - Interface Segregation Principle.
2
+
3
+ Note:
4
+ This library is tenant-agnostic. Multi-tenancy is achieved through index
5
+ isolation (e.g., `knowledge-{account_id}`). Indexer implementations should
6
+ not include tenant filtering logic - callers should use tenant-specific indices.
7
+ """
2
8
 
3
9
  from __future__ import annotations
4
10
 
@@ -14,6 +20,9 @@ if TYPE_CHECKING:
14
20
  class IDocumentIndexer(Protocol):
15
21
  """Protocol for indexing documents into a search backend.
16
22
 
23
+ This protocol is tenant-agnostic. Multi-tenancy is achieved through index
24
+ isolation by using tenant-specific index names.
25
+
17
26
  Document indexers are responsible for:
18
27
  - Generating embeddings for documents
19
28
  - Storing documents in the search backend