gnosisllm-knowledge 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. gnosisllm_knowledge/__init__.py +91 -39
  2. gnosisllm_knowledge/api/__init__.py +3 -2
  3. gnosisllm_knowledge/api/knowledge.py +287 -7
  4. gnosisllm_knowledge/api/memory.py +966 -0
  5. gnosisllm_knowledge/backends/__init__.py +14 -5
  6. gnosisllm_knowledge/backends/opensearch/agentic.py +341 -39
  7. gnosisllm_knowledge/backends/opensearch/config.py +49 -28
  8. gnosisllm_knowledge/backends/opensearch/indexer.py +1 -0
  9. gnosisllm_knowledge/backends/opensearch/mappings.py +2 -1
  10. gnosisllm_knowledge/backends/opensearch/memory/__init__.py +12 -0
  11. gnosisllm_knowledge/backends/opensearch/memory/client.py +1380 -0
  12. gnosisllm_knowledge/backends/opensearch/memory/config.py +127 -0
  13. gnosisllm_knowledge/backends/opensearch/memory/setup.py +322 -0
  14. gnosisllm_knowledge/backends/opensearch/searcher.py +235 -0
  15. gnosisllm_knowledge/backends/opensearch/setup.py +308 -148
  16. gnosisllm_knowledge/cli/app.py +378 -12
  17. gnosisllm_knowledge/cli/commands/agentic.py +11 -0
  18. gnosisllm_knowledge/cli/commands/memory.py +723 -0
  19. gnosisllm_knowledge/cli/commands/setup.py +24 -22
  20. gnosisllm_knowledge/cli/display/service.py +43 -0
  21. gnosisllm_knowledge/cli/utils/config.py +58 -0
  22. gnosisllm_knowledge/core/domain/__init__.py +41 -0
  23. gnosisllm_knowledge/core/domain/document.py +5 -0
  24. gnosisllm_knowledge/core/domain/memory.py +440 -0
  25. gnosisllm_knowledge/core/domain/result.py +11 -3
  26. gnosisllm_knowledge/core/domain/search.py +2 -0
  27. gnosisllm_knowledge/core/events/types.py +76 -0
  28. gnosisllm_knowledge/core/exceptions.py +134 -0
  29. gnosisllm_knowledge/core/interfaces/__init__.py +17 -0
  30. gnosisllm_knowledge/core/interfaces/memory.py +524 -0
  31. gnosisllm_knowledge/core/interfaces/streaming.py +127 -0
  32. gnosisllm_knowledge/core/streaming/__init__.py +36 -0
  33. gnosisllm_knowledge/core/streaming/pipeline.py +228 -0
  34. gnosisllm_knowledge/loaders/base.py +3 -4
  35. gnosisllm_knowledge/loaders/sitemap.py +129 -1
  36. gnosisllm_knowledge/loaders/sitemap_streaming.py +258 -0
  37. gnosisllm_knowledge/services/indexing.py +67 -75
  38. gnosisllm_knowledge/services/search.py +47 -11
  39. gnosisllm_knowledge/services/streaming_pipeline.py +302 -0
  40. {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.3.0.dist-info}/METADATA +44 -1
  41. gnosisllm_knowledge-0.3.0.dist-info/RECORD +77 -0
  42. gnosisllm_knowledge-0.2.0.dist-info/RECORD +0 -64
  43. {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.3.0.dist-info}/WHEEL +0 -0
  44. {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.3.0.dist-info}/entry_points.txt +0 -0
@@ -26,12 +26,12 @@ if TYPE_CHECKING:
26
26
 
27
27
  async def setup_command(
28
28
  display: RichDisplayService,
29
- host: str = "localhost",
30
- port: int = 9200,
29
+ host: str | None = None,
30
+ port: int | None = None,
31
31
  username: str | None = None,
32
32
  password: str | None = None,
33
- use_ssl: bool = False,
34
- verify_certs: bool = False,
33
+ use_ssl: bool | None = None,
34
+ verify_certs: bool | None = None,
35
35
  force: bool = False,
36
36
  no_sample_data: bool = False,
37
37
  no_hybrid: bool = False,
@@ -40,24 +40,26 @@ async def setup_command(
40
40
 
41
41
  Args:
42
42
  display: Display service for output.
43
- host: OpenSearch host.
44
- port: OpenSearch port.
45
- username: OpenSearch username.
46
- password: OpenSearch password.
47
- use_ssl: Enable SSL.
48
- verify_certs: Verify SSL certificates.
43
+ host: OpenSearch host (overrides env).
44
+ port: OpenSearch port (overrides env).
45
+ username: OpenSearch username (overrides env).
46
+ password: OpenSearch password (overrides env).
47
+ use_ssl: Enable SSL (overrides env).
48
+ verify_certs: Verify SSL certificates (overrides env).
49
49
  force: Clean up existing resources first.
50
50
  no_sample_data: Skip sample data ingestion.
51
51
  no_hybrid: Skip hybrid search pipeline.
52
52
  """
53
- # Load configuration
53
+ # Load configuration from environment
54
54
  cli_config = CliConfig.from_env()
55
55
 
56
- # Override with CLI arguments
57
- final_host = host or cli_config.opensearch_host
58
- final_port = port or cli_config.opensearch_port
59
- final_username = username or cli_config.opensearch_username
60
- final_password = password or cli_config.opensearch_password
56
+ # CLI arguments override environment variables (only if explicitly provided)
57
+ final_host = host if host is not None else cli_config.opensearch_host
58
+ final_port = port if port is not None else cli_config.opensearch_port
59
+ final_username = username if username is not None else cli_config.opensearch_username
60
+ final_password = password if password is not None else cli_config.opensearch_password
61
+ final_use_ssl = use_ssl if use_ssl is not None else cli_config.opensearch_use_ssl
62
+ final_verify_certs = verify_certs if verify_certs is not None else cli_config.opensearch_verify_certs
61
63
 
62
64
  # Validate required config
63
65
  if not cli_config.openai_api_key:
@@ -79,7 +81,7 @@ async def setup_command(
79
81
  "Configuration",
80
82
  [
81
83
  ("Host", f"{final_host}:{final_port}"),
82
- ("SSL", "Enabled" if use_ssl else "Disabled"),
84
+ ("SSL", "Enabled" if final_use_ssl else "Disabled"),
83
85
  ("Auth", "Configured" if final_username else "None"),
84
86
  ("Hybrid Search", "Disabled" if no_hybrid else "Enabled"),
85
87
  ("Force Recreate", "Yes" if force else "No"),
@@ -94,8 +96,8 @@ async def setup_command(
94
96
  port=final_port,
95
97
  username=final_username,
96
98
  password=final_password,
97
- use_ssl=use_ssl,
98
- verify_certs=verify_certs,
99
+ use_ssl=final_use_ssl,
100
+ verify_certs=final_verify_certs,
99
101
  openai_api_key=cli_config.openai_api_key,
100
102
  embedding_model=cli_config.openai_embedding_model,
101
103
  embedding_dimension=cli_config.openai_embedding_dimension,
@@ -109,8 +111,8 @@ async def setup_command(
109
111
  client = AsyncOpenSearch(
110
112
  hosts=[{"host": final_host, "port": final_port}],
111
113
  http_auth=http_auth,
112
- use_ssl=use_ssl,
113
- verify_certs=verify_certs,
114
+ use_ssl=final_use_ssl,
115
+ verify_certs=final_verify_certs,
114
116
  ssl_show_warn=False,
115
117
  )
116
118
 
@@ -124,7 +126,7 @@ async def setup_command(
124
126
  display.format_error_with_suggestion(
125
127
  error=f"Cannot connect to OpenSearch at {final_host}:{final_port}",
126
128
  suggestion="Ensure OpenSearch is running and accessible.",
127
- command=f"curl http{'s' if use_ssl else ''}://{final_host}:{final_port}",
129
+ command=f"curl http{'s' if final_use_ssl else ''}://{final_host}:{final_port}",
128
130
  )
129
131
  sys.exit(1)
130
132
 
@@ -553,3 +553,46 @@ class RichDisplayService:
553
553
  suggestion="Run agentic setup to create agents.",
554
554
  command="gnosisllm-knowledge agentic setup",
555
555
  )
556
+
557
+ def memory_status(
558
+ self,
559
+ llm_model_id: str | None,
560
+ embedding_model_id: str | None,
561
+ llm_model: str = "gpt-4o",
562
+ embedding_model: str = "text-embedding-3-small",
563
+ ) -> None:
564
+ """Display agentic memory configuration status.
565
+
566
+ Args:
567
+ llm_model_id: LLM model ID if configured.
568
+ embedding_model_id: Embedding model ID if configured.
569
+ llm_model: LLM model name for fact extraction.
570
+ embedding_model: Embedding model name.
571
+ """
572
+ status_rows = []
573
+
574
+ # LLM Model
575
+ if llm_model_id:
576
+ status_rows.append(("LLM Model", "[green]Configured[/green]"))
577
+ status_rows.append((" ID", f"[dim]{llm_model_id}[/dim]"))
578
+ status_rows.append((" Model", llm_model))
579
+ else:
580
+ status_rows.append(("LLM Model", "[red]Not configured[/red]"))
581
+
582
+ # Embedding Model
583
+ if embedding_model_id:
584
+ status_rows.append(("Embedding Model", "[green]Configured[/green]"))
585
+ status_rows.append((" ID", f"[dim]{embedding_model_id}[/dim]"))
586
+ status_rows.append((" Model", embedding_model))
587
+ else:
588
+ status_rows.append(("Embedding Model", "[red]Not configured[/red]"))
589
+
590
+ self.table("Agentic Memory Configuration", status_rows)
591
+
592
+ if not llm_model_id or not embedding_model_id:
593
+ self.newline()
594
+ self.format_error_with_suggestion(
595
+ error="Memory models not configured.",
596
+ suggestion="Run memory setup to create connectors and models.",
597
+ command="gnosisllm-knowledge memory setup --openai-key sk-...",
598
+ )
@@ -42,6 +42,13 @@ class CliConfig:
42
42
  agentic_max_iterations: int = 5
43
43
  agentic_timeout_seconds: int = 60
44
44
 
45
+ # Agentic Memory
46
+ memory_llm_model_id: str | None = None
47
+ memory_embedding_model_id: str | None = None
48
+ memory_llm_model: str = "gpt-4o"
49
+ memory_embedding_model: str = "text-embedding-3-small"
50
+ memory_embedding_dimension: int = 1536
51
+
45
52
  # Neoreader
46
53
  neoreader_host: str = "https://api.neoreader.dev"
47
54
 
@@ -86,6 +93,12 @@ class CliConfig:
86
93
  agentic_llm_model=os.getenv("AGENTIC_LLM_MODEL", "gpt-4o"),
87
94
  agentic_max_iterations=int(os.getenv("AGENTIC_MAX_ITERATIONS", "5")),
88
95
  agentic_timeout_seconds=int(os.getenv("AGENTIC_TIMEOUT_SECONDS", "60")),
96
+ # Agentic Memory configuration
97
+ memory_llm_model_id=os.getenv("OPENSEARCH_MEMORY_LLM_MODEL_ID"),
98
+ memory_embedding_model_id=os.getenv("OPENSEARCH_MEMORY_EMBEDDING_MODEL_ID"),
99
+ memory_llm_model=os.getenv("MEMORY_LLM_MODEL", "gpt-4o"),
100
+ memory_embedding_model=os.getenv("MEMORY_EMBEDDING_MODEL", "text-embedding-3-small"),
101
+ memory_embedding_dimension=int(os.getenv("MEMORY_EMBEDDING_DIMENSION", "1536")),
89
102
  neoreader_host=os.getenv("NEOREADER_HOST", "https://api.neoreader.dev"),
90
103
  )
91
104
 
@@ -205,3 +218,48 @@ class CliConfig:
205
218
  def has_conversational_agent(self) -> bool:
206
219
  """Check if conversational agent is configured."""
207
220
  return bool(self.opensearch_conversational_agent_id)
221
+
222
+ # === Memory Configuration ===
223
+
224
+ def validate_for_memory(self) -> list[str]:
225
+ """Validate configuration for memory commands.
226
+
227
+ Returns:
228
+ List of validation errors (empty if valid).
229
+ """
230
+ errors = []
231
+ if not self.memory_llm_model_id:
232
+ errors.append(
233
+ "OPENSEARCH_MEMORY_LLM_MODEL_ID is required for memory operations. "
234
+ "Run 'gnosisllm-knowledge memory setup' first."
235
+ )
236
+ if not self.memory_embedding_model_id:
237
+ errors.append(
238
+ "OPENSEARCH_MEMORY_EMBEDDING_MODEL_ID is required for memory operations. "
239
+ "Run 'gnosisllm-knowledge memory setup' first."
240
+ )
241
+ return errors
242
+
243
+ def validate_for_memory_setup(self) -> list[str]:
244
+ """Validate configuration for memory setup command.
245
+
246
+ Returns:
247
+ List of validation errors (empty if valid).
248
+ """
249
+ errors = []
250
+ if not self.openai_api_key:
251
+ errors.append(
252
+ "OPENAI_API_KEY is required for memory setup. "
253
+ "Use --openai-key or set the environment variable."
254
+ )
255
+ return errors
256
+
257
+ @property
258
+ def has_memory_models(self) -> bool:
259
+ """Check if memory models are configured."""
260
+ return bool(self.memory_llm_model_id and self.memory_embedding_model_id)
261
+
262
+ @property
263
+ def memory_is_configured(self) -> bool:
264
+ """Check if memory is fully configured for operations."""
265
+ return self.has_memory_models
@@ -1,6 +1,27 @@
1
1
  """Domain models - Value objects and entities."""
2
2
 
3
3
  from gnosisllm_knowledge.core.domain.document import Document, DocumentStatus, TextChunk
4
+ from gnosisllm_knowledge.core.domain.memory import (
5
+ ContainerConfig,
6
+ ContainerIndexSettings,
7
+ ContainerInfo,
8
+ EmbeddingModelType,
9
+ HistoryAction,
10
+ HistoryEntry,
11
+ IndexSettings,
12
+ MemoryEntry,
13
+ MemoryStats,
14
+ MemoryStrategy,
15
+ MemoryType,
16
+ Message,
17
+ Namespace,
18
+ PayloadType,
19
+ RecallResult,
20
+ SessionInfo,
21
+ StoreRequest,
22
+ StoreResult,
23
+ StrategyConfig,
24
+ )
4
25
  from gnosisllm_knowledge.core.domain.result import (
5
26
  BatchResult,
6
27
  IndexResult,
@@ -24,6 +45,26 @@ __all__ = [
24
45
  "Document",
25
46
  "DocumentStatus",
26
47
  "TextChunk",
48
+ # Memory
49
+ "MemoryStrategy",
50
+ "MemoryType",
51
+ "PayloadType",
52
+ "EmbeddingModelType",
53
+ "HistoryAction",
54
+ "StrategyConfig",
55
+ "IndexSettings",
56
+ "ContainerIndexSettings",
57
+ "ContainerConfig",
58
+ "ContainerInfo",
59
+ "Message",
60
+ "Namespace",
61
+ "StoreRequest",
62
+ "StoreResult",
63
+ "MemoryEntry",
64
+ "RecallResult",
65
+ "SessionInfo",
66
+ "HistoryEntry",
67
+ "MemoryStats",
27
68
  # Result
28
69
  "LoadResult",
29
70
  "IndexResult",
@@ -73,6 +73,7 @@ class Document:
73
73
  # Multi-tenancy fields
74
74
  account_id: str | None = None
75
75
  collection_id: str | None = None
76
+ collection_name: str | None = None # For display in aggregations
76
77
  source_id: str | None = None
77
78
 
78
79
  # Chunking info
@@ -140,6 +141,7 @@ class Document:
140
141
  metadata=self.metadata.copy(),
141
142
  account_id=self.account_id,
142
143
  collection_id=self.collection_id,
144
+ collection_name=self.collection_name,
143
145
  source_id=self.source_id,
144
146
  chunk_index=chunk_index,
145
147
  total_chunks=total_chunks,
@@ -156,6 +158,7 @@ class Document:
156
158
  self,
157
159
  account_id: str,
158
160
  collection_id: str | None = None,
161
+ collection_name: str | None = None,
159
162
  source_id: str | None = None,
160
163
  ) -> Document:
161
164
  """Create a new document with tenant information.
@@ -163,6 +166,7 @@ class Document:
163
166
  Args:
164
167
  account_id: Account/tenant identifier.
165
168
  collection_id: Collection identifier.
169
+ collection_name: Collection name for display.
166
170
  source_id: Source identifier.
167
171
 
168
172
  Returns:
@@ -177,6 +181,7 @@ class Document:
177
181
  metadata=self.metadata.copy(),
178
182
  account_id=account_id,
179
183
  collection_id=collection_id or self.collection_id,
184
+ collection_name=collection_name or self.collection_name,
180
185
  source_id=source_id or self.source_id,
181
186
  chunk_index=self.chunk_index,
182
187
  total_chunks=self.total_chunks,