gnosisllm-knowledge 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gnosisllm_knowledge/__init__.py +91 -39
- gnosisllm_knowledge/api/__init__.py +3 -2
- gnosisllm_knowledge/api/knowledge.py +287 -7
- gnosisllm_knowledge/api/memory.py +966 -0
- gnosisllm_knowledge/backends/__init__.py +14 -5
- gnosisllm_knowledge/backends/opensearch/agentic.py +341 -39
- gnosisllm_knowledge/backends/opensearch/config.py +49 -28
- gnosisllm_knowledge/backends/opensearch/indexer.py +1 -0
- gnosisllm_knowledge/backends/opensearch/mappings.py +2 -1
- gnosisllm_knowledge/backends/opensearch/memory/__init__.py +12 -0
- gnosisllm_knowledge/backends/opensearch/memory/client.py +1380 -0
- gnosisllm_knowledge/backends/opensearch/memory/config.py +127 -0
- gnosisllm_knowledge/backends/opensearch/memory/setup.py +322 -0
- gnosisllm_knowledge/backends/opensearch/searcher.py +235 -0
- gnosisllm_knowledge/backends/opensearch/setup.py +308 -148
- gnosisllm_knowledge/cli/app.py +378 -12
- gnosisllm_knowledge/cli/commands/agentic.py +11 -0
- gnosisllm_knowledge/cli/commands/memory.py +723 -0
- gnosisllm_knowledge/cli/commands/setup.py +24 -22
- gnosisllm_knowledge/cli/display/service.py +43 -0
- gnosisllm_knowledge/cli/utils/config.py +58 -0
- gnosisllm_knowledge/core/domain/__init__.py +41 -0
- gnosisllm_knowledge/core/domain/document.py +5 -0
- gnosisllm_knowledge/core/domain/memory.py +440 -0
- gnosisllm_knowledge/core/domain/result.py +11 -3
- gnosisllm_knowledge/core/domain/search.py +2 -0
- gnosisllm_knowledge/core/events/types.py +76 -0
- gnosisllm_knowledge/core/exceptions.py +134 -0
- gnosisllm_knowledge/core/interfaces/__init__.py +17 -0
- gnosisllm_knowledge/core/interfaces/memory.py +524 -0
- gnosisllm_knowledge/core/interfaces/streaming.py +127 -0
- gnosisllm_knowledge/core/streaming/__init__.py +36 -0
- gnosisllm_knowledge/core/streaming/pipeline.py +228 -0
- gnosisllm_knowledge/loaders/base.py +3 -4
- gnosisllm_knowledge/loaders/sitemap.py +129 -1
- gnosisllm_knowledge/loaders/sitemap_streaming.py +258 -0
- gnosisllm_knowledge/services/indexing.py +67 -75
- gnosisllm_knowledge/services/search.py +47 -11
- gnosisllm_knowledge/services/streaming_pipeline.py +302 -0
- {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.3.0.dist-info}/METADATA +44 -1
- gnosisllm_knowledge-0.3.0.dist-info/RECORD +77 -0
- gnosisllm_knowledge-0.2.0.dist-info/RECORD +0 -64
- {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.3.0.dist-info}/WHEEL +0 -0
- {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.3.0.dist-info}/entry_points.txt +0 -0
|
@@ -26,12 +26,12 @@ if TYPE_CHECKING:
|
|
|
26
26
|
|
|
27
27
|
async def setup_command(
|
|
28
28
|
display: RichDisplayService,
|
|
29
|
-
host: str =
|
|
30
|
-
port: int =
|
|
29
|
+
host: str | None = None,
|
|
30
|
+
port: int | None = None,
|
|
31
31
|
username: str | None = None,
|
|
32
32
|
password: str | None = None,
|
|
33
|
-
use_ssl: bool =
|
|
34
|
-
verify_certs: bool =
|
|
33
|
+
use_ssl: bool | None = None,
|
|
34
|
+
verify_certs: bool | None = None,
|
|
35
35
|
force: bool = False,
|
|
36
36
|
no_sample_data: bool = False,
|
|
37
37
|
no_hybrid: bool = False,
|
|
@@ -40,24 +40,26 @@ async def setup_command(
|
|
|
40
40
|
|
|
41
41
|
Args:
|
|
42
42
|
display: Display service for output.
|
|
43
|
-
host: OpenSearch host.
|
|
44
|
-
port: OpenSearch port.
|
|
45
|
-
username: OpenSearch username.
|
|
46
|
-
password: OpenSearch password.
|
|
47
|
-
use_ssl: Enable SSL.
|
|
48
|
-
verify_certs: Verify SSL certificates.
|
|
43
|
+
host: OpenSearch host (overrides env).
|
|
44
|
+
port: OpenSearch port (overrides env).
|
|
45
|
+
username: OpenSearch username (overrides env).
|
|
46
|
+
password: OpenSearch password (overrides env).
|
|
47
|
+
use_ssl: Enable SSL (overrides env).
|
|
48
|
+
verify_certs: Verify SSL certificates (overrides env).
|
|
49
49
|
force: Clean up existing resources first.
|
|
50
50
|
no_sample_data: Skip sample data ingestion.
|
|
51
51
|
no_hybrid: Skip hybrid search pipeline.
|
|
52
52
|
"""
|
|
53
|
-
# Load configuration
|
|
53
|
+
# Load configuration from environment
|
|
54
54
|
cli_config = CliConfig.from_env()
|
|
55
55
|
|
|
56
|
-
#
|
|
57
|
-
final_host = host
|
|
58
|
-
final_port = port
|
|
59
|
-
final_username = username
|
|
60
|
-
final_password = password
|
|
56
|
+
# CLI arguments override environment variables (only if explicitly provided)
|
|
57
|
+
final_host = host if host is not None else cli_config.opensearch_host
|
|
58
|
+
final_port = port if port is not None else cli_config.opensearch_port
|
|
59
|
+
final_username = username if username is not None else cli_config.opensearch_username
|
|
60
|
+
final_password = password if password is not None else cli_config.opensearch_password
|
|
61
|
+
final_use_ssl = use_ssl if use_ssl is not None else cli_config.opensearch_use_ssl
|
|
62
|
+
final_verify_certs = verify_certs if verify_certs is not None else cli_config.opensearch_verify_certs
|
|
61
63
|
|
|
62
64
|
# Validate required config
|
|
63
65
|
if not cli_config.openai_api_key:
|
|
@@ -79,7 +81,7 @@ async def setup_command(
|
|
|
79
81
|
"Configuration",
|
|
80
82
|
[
|
|
81
83
|
("Host", f"{final_host}:{final_port}"),
|
|
82
|
-
("SSL", "Enabled" if
|
|
84
|
+
("SSL", "Enabled" if final_use_ssl else "Disabled"),
|
|
83
85
|
("Auth", "Configured" if final_username else "None"),
|
|
84
86
|
("Hybrid Search", "Disabled" if no_hybrid else "Enabled"),
|
|
85
87
|
("Force Recreate", "Yes" if force else "No"),
|
|
@@ -94,8 +96,8 @@ async def setup_command(
|
|
|
94
96
|
port=final_port,
|
|
95
97
|
username=final_username,
|
|
96
98
|
password=final_password,
|
|
97
|
-
use_ssl=
|
|
98
|
-
verify_certs=
|
|
99
|
+
use_ssl=final_use_ssl,
|
|
100
|
+
verify_certs=final_verify_certs,
|
|
99
101
|
openai_api_key=cli_config.openai_api_key,
|
|
100
102
|
embedding_model=cli_config.openai_embedding_model,
|
|
101
103
|
embedding_dimension=cli_config.openai_embedding_dimension,
|
|
@@ -109,8 +111,8 @@ async def setup_command(
|
|
|
109
111
|
client = AsyncOpenSearch(
|
|
110
112
|
hosts=[{"host": final_host, "port": final_port}],
|
|
111
113
|
http_auth=http_auth,
|
|
112
|
-
use_ssl=
|
|
113
|
-
verify_certs=
|
|
114
|
+
use_ssl=final_use_ssl,
|
|
115
|
+
verify_certs=final_verify_certs,
|
|
114
116
|
ssl_show_warn=False,
|
|
115
117
|
)
|
|
116
118
|
|
|
@@ -124,7 +126,7 @@ async def setup_command(
|
|
|
124
126
|
display.format_error_with_suggestion(
|
|
125
127
|
error=f"Cannot connect to OpenSearch at {final_host}:{final_port}",
|
|
126
128
|
suggestion="Ensure OpenSearch is running and accessible.",
|
|
127
|
-
command=f"curl http{'s' if
|
|
129
|
+
command=f"curl http{'s' if final_use_ssl else ''}://{final_host}:{final_port}",
|
|
128
130
|
)
|
|
129
131
|
sys.exit(1)
|
|
130
132
|
|
|
@@ -553,3 +553,46 @@ class RichDisplayService:
|
|
|
553
553
|
suggestion="Run agentic setup to create agents.",
|
|
554
554
|
command="gnosisllm-knowledge agentic setup",
|
|
555
555
|
)
|
|
556
|
+
|
|
557
|
+
def memory_status(
|
|
558
|
+
self,
|
|
559
|
+
llm_model_id: str | None,
|
|
560
|
+
embedding_model_id: str | None,
|
|
561
|
+
llm_model: str = "gpt-4o",
|
|
562
|
+
embedding_model: str = "text-embedding-3-small",
|
|
563
|
+
) -> None:
|
|
564
|
+
"""Display agentic memory configuration status.
|
|
565
|
+
|
|
566
|
+
Args:
|
|
567
|
+
llm_model_id: LLM model ID if configured.
|
|
568
|
+
embedding_model_id: Embedding model ID if configured.
|
|
569
|
+
llm_model: LLM model name for fact extraction.
|
|
570
|
+
embedding_model: Embedding model name.
|
|
571
|
+
"""
|
|
572
|
+
status_rows = []
|
|
573
|
+
|
|
574
|
+
# LLM Model
|
|
575
|
+
if llm_model_id:
|
|
576
|
+
status_rows.append(("LLM Model", "[green]Configured[/green]"))
|
|
577
|
+
status_rows.append((" ID", f"[dim]{llm_model_id}[/dim]"))
|
|
578
|
+
status_rows.append((" Model", llm_model))
|
|
579
|
+
else:
|
|
580
|
+
status_rows.append(("LLM Model", "[red]Not configured[/red]"))
|
|
581
|
+
|
|
582
|
+
# Embedding Model
|
|
583
|
+
if embedding_model_id:
|
|
584
|
+
status_rows.append(("Embedding Model", "[green]Configured[/green]"))
|
|
585
|
+
status_rows.append((" ID", f"[dim]{embedding_model_id}[/dim]"))
|
|
586
|
+
status_rows.append((" Model", embedding_model))
|
|
587
|
+
else:
|
|
588
|
+
status_rows.append(("Embedding Model", "[red]Not configured[/red]"))
|
|
589
|
+
|
|
590
|
+
self.table("Agentic Memory Configuration", status_rows)
|
|
591
|
+
|
|
592
|
+
if not llm_model_id or not embedding_model_id:
|
|
593
|
+
self.newline()
|
|
594
|
+
self.format_error_with_suggestion(
|
|
595
|
+
error="Memory models not configured.",
|
|
596
|
+
suggestion="Run memory setup to create connectors and models.",
|
|
597
|
+
command="gnosisllm-knowledge memory setup --openai-key sk-...",
|
|
598
|
+
)
|
|
@@ -42,6 +42,13 @@ class CliConfig:
|
|
|
42
42
|
agentic_max_iterations: int = 5
|
|
43
43
|
agentic_timeout_seconds: int = 60
|
|
44
44
|
|
|
45
|
+
# Agentic Memory
|
|
46
|
+
memory_llm_model_id: str | None = None
|
|
47
|
+
memory_embedding_model_id: str | None = None
|
|
48
|
+
memory_llm_model: str = "gpt-4o"
|
|
49
|
+
memory_embedding_model: str = "text-embedding-3-small"
|
|
50
|
+
memory_embedding_dimension: int = 1536
|
|
51
|
+
|
|
45
52
|
# Neoreader
|
|
46
53
|
neoreader_host: str = "https://api.neoreader.dev"
|
|
47
54
|
|
|
@@ -86,6 +93,12 @@ class CliConfig:
|
|
|
86
93
|
agentic_llm_model=os.getenv("AGENTIC_LLM_MODEL", "gpt-4o"),
|
|
87
94
|
agentic_max_iterations=int(os.getenv("AGENTIC_MAX_ITERATIONS", "5")),
|
|
88
95
|
agentic_timeout_seconds=int(os.getenv("AGENTIC_TIMEOUT_SECONDS", "60")),
|
|
96
|
+
# Agentic Memory configuration
|
|
97
|
+
memory_llm_model_id=os.getenv("OPENSEARCH_MEMORY_LLM_MODEL_ID"),
|
|
98
|
+
memory_embedding_model_id=os.getenv("OPENSEARCH_MEMORY_EMBEDDING_MODEL_ID"),
|
|
99
|
+
memory_llm_model=os.getenv("MEMORY_LLM_MODEL", "gpt-4o"),
|
|
100
|
+
memory_embedding_model=os.getenv("MEMORY_EMBEDDING_MODEL", "text-embedding-3-small"),
|
|
101
|
+
memory_embedding_dimension=int(os.getenv("MEMORY_EMBEDDING_DIMENSION", "1536")),
|
|
89
102
|
neoreader_host=os.getenv("NEOREADER_HOST", "https://api.neoreader.dev"),
|
|
90
103
|
)
|
|
91
104
|
|
|
@@ -205,3 +218,48 @@ class CliConfig:
|
|
|
205
218
|
def has_conversational_agent(self) -> bool:
|
|
206
219
|
"""Check if conversational agent is configured."""
|
|
207
220
|
return bool(self.opensearch_conversational_agent_id)
|
|
221
|
+
|
|
222
|
+
# === Memory Configuration ===
|
|
223
|
+
|
|
224
|
+
def validate_for_memory(self) -> list[str]:
|
|
225
|
+
"""Validate configuration for memory commands.
|
|
226
|
+
|
|
227
|
+
Returns:
|
|
228
|
+
List of validation errors (empty if valid).
|
|
229
|
+
"""
|
|
230
|
+
errors = []
|
|
231
|
+
if not self.memory_llm_model_id:
|
|
232
|
+
errors.append(
|
|
233
|
+
"OPENSEARCH_MEMORY_LLM_MODEL_ID is required for memory operations. "
|
|
234
|
+
"Run 'gnosisllm-knowledge memory setup' first."
|
|
235
|
+
)
|
|
236
|
+
if not self.memory_embedding_model_id:
|
|
237
|
+
errors.append(
|
|
238
|
+
"OPENSEARCH_MEMORY_EMBEDDING_MODEL_ID is required for memory operations. "
|
|
239
|
+
"Run 'gnosisllm-knowledge memory setup' first."
|
|
240
|
+
)
|
|
241
|
+
return errors
|
|
242
|
+
|
|
243
|
+
def validate_for_memory_setup(self) -> list[str]:
|
|
244
|
+
"""Validate configuration for memory setup command.
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
List of validation errors (empty if valid).
|
|
248
|
+
"""
|
|
249
|
+
errors = []
|
|
250
|
+
if not self.openai_api_key:
|
|
251
|
+
errors.append(
|
|
252
|
+
"OPENAI_API_KEY is required for memory setup. "
|
|
253
|
+
"Use --openai-key or set the environment variable."
|
|
254
|
+
)
|
|
255
|
+
return errors
|
|
256
|
+
|
|
257
|
+
@property
|
|
258
|
+
def has_memory_models(self) -> bool:
|
|
259
|
+
"""Check if memory models are configured."""
|
|
260
|
+
return bool(self.memory_llm_model_id and self.memory_embedding_model_id)
|
|
261
|
+
|
|
262
|
+
@property
|
|
263
|
+
def memory_is_configured(self) -> bool:
|
|
264
|
+
"""Check if memory is fully configured for operations."""
|
|
265
|
+
return self.has_memory_models
|
|
@@ -1,6 +1,27 @@
|
|
|
1
1
|
"""Domain models - Value objects and entities."""
|
|
2
2
|
|
|
3
3
|
from gnosisllm_knowledge.core.domain.document import Document, DocumentStatus, TextChunk
|
|
4
|
+
from gnosisllm_knowledge.core.domain.memory import (
|
|
5
|
+
ContainerConfig,
|
|
6
|
+
ContainerIndexSettings,
|
|
7
|
+
ContainerInfo,
|
|
8
|
+
EmbeddingModelType,
|
|
9
|
+
HistoryAction,
|
|
10
|
+
HistoryEntry,
|
|
11
|
+
IndexSettings,
|
|
12
|
+
MemoryEntry,
|
|
13
|
+
MemoryStats,
|
|
14
|
+
MemoryStrategy,
|
|
15
|
+
MemoryType,
|
|
16
|
+
Message,
|
|
17
|
+
Namespace,
|
|
18
|
+
PayloadType,
|
|
19
|
+
RecallResult,
|
|
20
|
+
SessionInfo,
|
|
21
|
+
StoreRequest,
|
|
22
|
+
StoreResult,
|
|
23
|
+
StrategyConfig,
|
|
24
|
+
)
|
|
4
25
|
from gnosisllm_knowledge.core.domain.result import (
|
|
5
26
|
BatchResult,
|
|
6
27
|
IndexResult,
|
|
@@ -24,6 +45,26 @@ __all__ = [
|
|
|
24
45
|
"Document",
|
|
25
46
|
"DocumentStatus",
|
|
26
47
|
"TextChunk",
|
|
48
|
+
# Memory
|
|
49
|
+
"MemoryStrategy",
|
|
50
|
+
"MemoryType",
|
|
51
|
+
"PayloadType",
|
|
52
|
+
"EmbeddingModelType",
|
|
53
|
+
"HistoryAction",
|
|
54
|
+
"StrategyConfig",
|
|
55
|
+
"IndexSettings",
|
|
56
|
+
"ContainerIndexSettings",
|
|
57
|
+
"ContainerConfig",
|
|
58
|
+
"ContainerInfo",
|
|
59
|
+
"Message",
|
|
60
|
+
"Namespace",
|
|
61
|
+
"StoreRequest",
|
|
62
|
+
"StoreResult",
|
|
63
|
+
"MemoryEntry",
|
|
64
|
+
"RecallResult",
|
|
65
|
+
"SessionInfo",
|
|
66
|
+
"HistoryEntry",
|
|
67
|
+
"MemoryStats",
|
|
27
68
|
# Result
|
|
28
69
|
"LoadResult",
|
|
29
70
|
"IndexResult",
|
|
@@ -73,6 +73,7 @@ class Document:
|
|
|
73
73
|
# Multi-tenancy fields
|
|
74
74
|
account_id: str | None = None
|
|
75
75
|
collection_id: str | None = None
|
|
76
|
+
collection_name: str | None = None # For display in aggregations
|
|
76
77
|
source_id: str | None = None
|
|
77
78
|
|
|
78
79
|
# Chunking info
|
|
@@ -140,6 +141,7 @@ class Document:
|
|
|
140
141
|
metadata=self.metadata.copy(),
|
|
141
142
|
account_id=self.account_id,
|
|
142
143
|
collection_id=self.collection_id,
|
|
144
|
+
collection_name=self.collection_name,
|
|
143
145
|
source_id=self.source_id,
|
|
144
146
|
chunk_index=chunk_index,
|
|
145
147
|
total_chunks=total_chunks,
|
|
@@ -156,6 +158,7 @@ class Document:
|
|
|
156
158
|
self,
|
|
157
159
|
account_id: str,
|
|
158
160
|
collection_id: str | None = None,
|
|
161
|
+
collection_name: str | None = None,
|
|
159
162
|
source_id: str | None = None,
|
|
160
163
|
) -> Document:
|
|
161
164
|
"""Create a new document with tenant information.
|
|
@@ -163,6 +166,7 @@ class Document:
|
|
|
163
166
|
Args:
|
|
164
167
|
account_id: Account/tenant identifier.
|
|
165
168
|
collection_id: Collection identifier.
|
|
169
|
+
collection_name: Collection name for display.
|
|
166
170
|
source_id: Source identifier.
|
|
167
171
|
|
|
168
172
|
Returns:
|
|
@@ -177,6 +181,7 @@ class Document:
|
|
|
177
181
|
metadata=self.metadata.copy(),
|
|
178
182
|
account_id=account_id,
|
|
179
183
|
collection_id=collection_id or self.collection_id,
|
|
184
|
+
collection_name=collection_name or self.collection_name,
|
|
180
185
|
source_id=source_id or self.source_id,
|
|
181
186
|
chunk_index=self.chunk_index,
|
|
182
187
|
total_chunks=self.total_chunks,
|