gnosisllm-knowledge 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gnosisllm_knowledge/__init__.py +91 -39
- gnosisllm_knowledge/api/__init__.py +3 -2
- gnosisllm_knowledge/api/knowledge.py +287 -7
- gnosisllm_knowledge/api/memory.py +966 -0
- gnosisllm_knowledge/backends/__init__.py +14 -5
- gnosisllm_knowledge/backends/opensearch/agentic.py +341 -39
- gnosisllm_knowledge/backends/opensearch/config.py +49 -28
- gnosisllm_knowledge/backends/opensearch/indexer.py +1 -0
- gnosisllm_knowledge/backends/opensearch/mappings.py +2 -1
- gnosisllm_knowledge/backends/opensearch/memory/__init__.py +12 -0
- gnosisllm_knowledge/backends/opensearch/memory/client.py +1380 -0
- gnosisllm_knowledge/backends/opensearch/memory/config.py +127 -0
- gnosisllm_knowledge/backends/opensearch/memory/setup.py +322 -0
- gnosisllm_knowledge/backends/opensearch/searcher.py +235 -0
- gnosisllm_knowledge/backends/opensearch/setup.py +308 -148
- gnosisllm_knowledge/cli/app.py +378 -12
- gnosisllm_knowledge/cli/commands/agentic.py +11 -0
- gnosisllm_knowledge/cli/commands/memory.py +723 -0
- gnosisllm_knowledge/cli/commands/setup.py +24 -22
- gnosisllm_knowledge/cli/display/service.py +43 -0
- gnosisllm_knowledge/cli/utils/config.py +58 -0
- gnosisllm_knowledge/core/domain/__init__.py +41 -0
- gnosisllm_knowledge/core/domain/document.py +5 -0
- gnosisllm_knowledge/core/domain/memory.py +440 -0
- gnosisllm_knowledge/core/domain/result.py +11 -3
- gnosisllm_knowledge/core/domain/search.py +2 -0
- gnosisllm_knowledge/core/events/types.py +76 -0
- gnosisllm_knowledge/core/exceptions.py +134 -0
- gnosisllm_knowledge/core/interfaces/__init__.py +17 -0
- gnosisllm_knowledge/core/interfaces/memory.py +524 -0
- gnosisllm_knowledge/core/interfaces/streaming.py +127 -0
- gnosisllm_knowledge/core/streaming/__init__.py +36 -0
- gnosisllm_knowledge/core/streaming/pipeline.py +228 -0
- gnosisllm_knowledge/loaders/base.py +3 -4
- gnosisllm_knowledge/loaders/sitemap.py +129 -1
- gnosisllm_knowledge/loaders/sitemap_streaming.py +258 -0
- gnosisllm_knowledge/services/indexing.py +67 -75
- gnosisllm_knowledge/services/search.py +47 -11
- gnosisllm_knowledge/services/streaming_pipeline.py +302 -0
- {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.3.0.dist-info}/METADATA +44 -1
- gnosisllm_knowledge-0.3.0.dist-info/RECORD +77 -0
- gnosisllm_knowledge-0.2.0.dist-info/RECORD +0 -64
- {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.3.0.dist-info}/WHEEL +0 -0
- {gnosisllm_knowledge-0.2.0.dist-info → gnosisllm_knowledge-0.3.0.dist-info}/entry_points.txt +0 -0
|
@@ -249,7 +249,10 @@ class OpenSearchSetupAdapter:
|
|
|
249
249
|
self._model_id = self._config.model_id
|
|
250
250
|
|
|
251
251
|
# Step 4: Create ingest pipeline
|
|
252
|
-
|
|
252
|
+
# Only create ingest pipeline for global setup (not per-account)
|
|
253
|
+
# Account indices should use the global pipeline to ensure consistent model
|
|
254
|
+
is_global_setup = self._config.index_prefix == "gnosisllm"
|
|
255
|
+
if self._model_id and is_global_setup:
|
|
253
256
|
try:
|
|
254
257
|
await self._create_ingest_pipeline()
|
|
255
258
|
pipeline_name = self._config.ingest_pipeline_name or f"{self._config.index_prefix}-ingest-pipeline"
|
|
@@ -258,28 +261,35 @@ class OpenSearchSetupAdapter:
|
|
|
258
261
|
errors.append(f"Failed to create ingest pipeline: {e}")
|
|
259
262
|
logger.error(f"Failed to create ingest pipeline: {e}")
|
|
260
263
|
|
|
261
|
-
# Step 5: Create search pipeline
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
264
|
+
# Step 5: Create search pipeline (only for global setup)
|
|
265
|
+
if is_global_setup:
|
|
266
|
+
try:
|
|
267
|
+
await self._create_search_pipeline()
|
|
268
|
+
pipeline_name = self._config.search_pipeline_name or f"{self._config.index_prefix}-search-pipeline"
|
|
269
|
+
steps_completed.append(f"Created search pipeline: {pipeline_name}")
|
|
270
|
+
except Exception as e:
|
|
271
|
+
errors.append(f"Failed to create search pipeline: {e}")
|
|
272
|
+
logger.error(f"Failed to create search pipeline: {e}")
|
|
269
273
|
|
|
270
|
-
# Step 6: Create index template
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
+
# Step 6: Create index template (only for global setup)
|
|
275
|
+
# Template covers all gnosisllm-* indices including per-account indices
|
|
276
|
+
if is_global_setup:
|
|
277
|
+
try:
|
|
278
|
+
template_name = f"{self._config.index_prefix}-template"
|
|
279
|
+
template_body = get_index_template(self._config)
|
|
274
280
|
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
281
|
+
# Ensure template has global pipeline for auto-index creation
|
|
282
|
+
global_pipeline = self._config.ingest_pipeline_name or "gnosisllm-ingest-pipeline"
|
|
283
|
+
template_body["template"]["settings"]["index"]["default_pipeline"] = global_pipeline
|
|
284
|
+
|
|
285
|
+
await self._client.indices.put_index_template(
|
|
286
|
+
name=template_name,
|
|
287
|
+
body=template_body,
|
|
288
|
+
)
|
|
289
|
+
steps_completed.append(f"Created index template: {template_name}")
|
|
290
|
+
except Exception as e:
|
|
291
|
+
errors.append(f"Failed to create index template: {e}")
|
|
292
|
+
logger.error(f"Failed to create index template: {e}")
|
|
283
293
|
|
|
284
294
|
# Step 7: Create knowledge index
|
|
285
295
|
try:
|
|
@@ -288,8 +298,9 @@ class OpenSearchSetupAdapter:
|
|
|
288
298
|
|
|
289
299
|
if not exists:
|
|
290
300
|
settings = get_knowledge_index_settings(self._config)
|
|
291
|
-
# Add default pipeline
|
|
292
|
-
|
|
301
|
+
# Add default pipeline - always use global pipeline for consistency
|
|
302
|
+
# This ensures all accounts use the same embedding model
|
|
303
|
+
pipeline_name = self._config.ingest_pipeline_name or "gnosisllm-ingest-pipeline"
|
|
293
304
|
settings["index"]["default_pipeline"] = pipeline_name
|
|
294
305
|
|
|
295
306
|
await self._client.indices.create(
|
|
@@ -346,6 +357,7 @@ class OpenSearchSetupAdapter:
|
|
|
346
357
|
if not self._config.openai_api_key:
|
|
347
358
|
raise SetupError(
|
|
348
359
|
message="OPENAI_API_KEY required to create connector",
|
|
360
|
+
step="connector",
|
|
349
361
|
details={"hint": "Set OPENAI_API_KEY environment variable"},
|
|
350
362
|
)
|
|
351
363
|
|
|
@@ -442,12 +454,18 @@ class OpenSearchSetupAdapter:
|
|
|
442
454
|
|
|
443
455
|
task_id = response.get("task_id")
|
|
444
456
|
if not task_id:
|
|
445
|
-
raise SetupError(
|
|
457
|
+
raise SetupError(
|
|
458
|
+
message="No task_id returned from model registration",
|
|
459
|
+
step="model_deployment",
|
|
460
|
+
)
|
|
446
461
|
|
|
447
462
|
# Wait for registration
|
|
448
463
|
model_id = await self._wait_for_task(task_id, "model registration")
|
|
449
464
|
if not model_id:
|
|
450
|
-
raise SetupError(
|
|
465
|
+
raise SetupError(
|
|
466
|
+
message="Model registration timed out",
|
|
467
|
+
step="model_deployment",
|
|
468
|
+
)
|
|
451
469
|
|
|
452
470
|
self._model_id = model_id
|
|
453
471
|
|
|
@@ -512,37 +530,44 @@ class OpenSearchSetupAdapter:
|
|
|
512
530
|
async def cleanup(self) -> SetupResult:
|
|
513
531
|
"""Clean up all resources in correct order.
|
|
514
532
|
|
|
533
|
+
Deletes all indices and templates matching the index prefix pattern.
|
|
534
|
+
|
|
515
535
|
Returns:
|
|
516
536
|
Cleanup result.
|
|
517
537
|
"""
|
|
518
538
|
steps_completed: list[str] = []
|
|
519
539
|
errors: list[str] = []
|
|
540
|
+
prefix = self._config.index_prefix
|
|
520
541
|
|
|
521
|
-
# Delete
|
|
522
|
-
try:
|
|
523
|
-
index_name = self._config.knowledge_index_name
|
|
524
|
-
if await self._client.indices.exists(index=index_name):
|
|
525
|
-
await self._client.indices.delete(index=index_name)
|
|
526
|
-
steps_completed.append(f"Deleted index: {index_name}")
|
|
527
|
-
except Exception as e:
|
|
528
|
-
errors.append(f"Failed to delete knowledge index: {e}")
|
|
529
|
-
|
|
530
|
-
# Delete memory index
|
|
542
|
+
# Delete all indices matching prefix-*
|
|
531
543
|
try:
|
|
532
|
-
|
|
533
|
-
if
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
544
|
+
index_pattern = f"{prefix}-*"
|
|
545
|
+
# Check if any indices match the pattern
|
|
546
|
+
indices_response = await self._client.indices.get(index=index_pattern)
|
|
547
|
+
if indices_response:
|
|
548
|
+
for index_name in indices_response.keys():
|
|
549
|
+
try:
|
|
550
|
+
await self._client.indices.delete(index=index_name)
|
|
551
|
+
steps_completed.append(f"Deleted index: {index_name}")
|
|
552
|
+
except Exception as e:
|
|
553
|
+
logger.warning(f"Failed to delete index {index_name}: {e}")
|
|
554
|
+
except Exception:
|
|
555
|
+
pass # No matching indices
|
|
538
556
|
|
|
539
|
-
# Delete index
|
|
557
|
+
# Delete all index templates matching prefix-*
|
|
540
558
|
try:
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
559
|
+
templates_response = await self._client.indices.get_index_template(name=f"{prefix}-*")
|
|
560
|
+
if templates_response and "index_templates" in templates_response:
|
|
561
|
+
for template_info in templates_response["index_templates"]:
|
|
562
|
+
template_name = template_info.get("name")
|
|
563
|
+
if template_name:
|
|
564
|
+
try:
|
|
565
|
+
await self._client.indices.delete_index_template(name=template_name)
|
|
566
|
+
steps_completed.append(f"Deleted template: {template_name}")
|
|
567
|
+
except Exception as e:
|
|
568
|
+
logger.warning(f"Failed to delete template {template_name}: {e}")
|
|
544
569
|
except Exception:
|
|
545
|
-
pass #
|
|
570
|
+
pass # No matching templates
|
|
546
571
|
|
|
547
572
|
# Delete search pipeline
|
|
548
573
|
try:
|
|
@@ -835,6 +860,7 @@ class OpenSearchSetupAdapter:
|
|
|
835
860
|
except Exception as e:
|
|
836
861
|
raise SetupError(
|
|
837
862
|
message=f"Failed to get cluster stats: {e}",
|
|
863
|
+
step="cluster_stats",
|
|
838
864
|
cause=e,
|
|
839
865
|
) from e
|
|
840
866
|
|
|
@@ -865,17 +891,71 @@ class OpenSearchSetupAdapter:
|
|
|
865
891
|
except Exception as e:
|
|
866
892
|
raise SetupError(
|
|
867
893
|
message=f"Failed to get index stats: {e}",
|
|
894
|
+
step="index_stats",
|
|
868
895
|
details={"index_name": index_name},
|
|
869
896
|
cause=e,
|
|
870
897
|
) from e
|
|
871
898
|
|
|
872
899
|
# === Agentic Search Setup Methods ===
|
|
873
900
|
|
|
901
|
+
async def enable_agentic_search(self) -> None:
|
|
902
|
+
"""Enable agentic search cluster settings.
|
|
903
|
+
|
|
904
|
+
The agent framework is enabled by default in OpenSearch 3.x.
|
|
905
|
+
This method verifies that required settings are enabled.
|
|
906
|
+
|
|
907
|
+
Note: The settings plugins.ml_commons.agentic_search_enabled and
|
|
908
|
+
plugins.neural_search.agentic_search_enabled do not exist in
|
|
909
|
+
OpenSearch 3.4+. The agent_framework_enabled and rag_pipeline_feature_enabled
|
|
910
|
+
settings are used instead and are enabled by default.
|
|
911
|
+
|
|
912
|
+
Raises:
|
|
913
|
+
SetupError: If required settings are not enabled.
|
|
914
|
+
"""
|
|
915
|
+
try:
|
|
916
|
+
# Check if agent framework is enabled (required for agents)
|
|
917
|
+
settings = await self._client.cluster.get_settings(
|
|
918
|
+
include_defaults=True,
|
|
919
|
+
flat_settings=True,
|
|
920
|
+
)
|
|
921
|
+
defaults = settings.get("defaults", {})
|
|
922
|
+
|
|
923
|
+
agent_enabled = defaults.get(
|
|
924
|
+
"plugins.ml_commons.agent_framework_enabled", "false"
|
|
925
|
+
)
|
|
926
|
+
rag_enabled = defaults.get(
|
|
927
|
+
"plugins.ml_commons.rag_pipeline_feature_enabled", "false"
|
|
928
|
+
)
|
|
929
|
+
|
|
930
|
+
if agent_enabled != "true":
|
|
931
|
+
raise SetupError(
|
|
932
|
+
message="Agent framework is not enabled. Set plugins.ml_commons.agent_framework_enabled=true",
|
|
933
|
+
step="enable_agentic_search",
|
|
934
|
+
)
|
|
935
|
+
|
|
936
|
+
if rag_enabled != "true":
|
|
937
|
+
logger.warning("RAG pipeline feature is not enabled, some features may be limited")
|
|
938
|
+
|
|
939
|
+
logger.info("Agent framework is enabled (agentic search ready)")
|
|
940
|
+
except SetupError:
|
|
941
|
+
raise
|
|
942
|
+
except Exception as e:
|
|
943
|
+
raise SetupError(
|
|
944
|
+
message=f"Failed to verify agentic search settings: {e}",
|
|
945
|
+
step="enable_agentic_search",
|
|
946
|
+
cause=e,
|
|
947
|
+
) from e
|
|
948
|
+
|
|
874
949
|
async def setup_flow_agent(self) -> str:
|
|
875
|
-
"""Create
|
|
950
|
+
"""Create flow agent with RAGTool for agentic search.
|
|
951
|
+
|
|
952
|
+
Flow agents use RAGTool to perform retrieval-augmented generation:
|
|
953
|
+
1. Search the knowledge base using neural/semantic search
|
|
954
|
+
2. Pass results to LLM for answer generation
|
|
955
|
+
3. Return AI-generated answer with source citations
|
|
876
956
|
|
|
877
|
-
|
|
878
|
-
|
|
957
|
+
This provides a conversational experience where users get natural
|
|
958
|
+
language answers instead of raw search results.
|
|
879
959
|
|
|
880
960
|
Returns:
|
|
881
961
|
Agent ID of the created/existing flow agent.
|
|
@@ -891,27 +971,38 @@ class OpenSearchSetupAdapter:
|
|
|
891
971
|
logger.info(f"Using existing flow agent: {existing}")
|
|
892
972
|
return existing
|
|
893
973
|
|
|
894
|
-
#
|
|
895
|
-
|
|
974
|
+
# Enable agentic search if not already enabled
|
|
975
|
+
await self.enable_agentic_search()
|
|
976
|
+
|
|
977
|
+
# Create LLM model for answer generation
|
|
978
|
+
llm_model_id = await self._setup_llm_model()
|
|
979
|
+
|
|
980
|
+
# Get embedding model ID for neural search
|
|
981
|
+
embedding_model_id = self._model_id or self._config.model_id
|
|
982
|
+
if not embedding_model_id:
|
|
896
983
|
raise SetupError(
|
|
897
|
-
message="Embedding model
|
|
984
|
+
message="Embedding model ID is required for RAGTool. Run 'gnosisllm-knowledge setup' first.",
|
|
898
985
|
step="flow_agent",
|
|
899
|
-
details={"hint": "Run 'gnosisllm-knowledge setup' first to deploy the embedding model."},
|
|
900
986
|
)
|
|
901
987
|
|
|
902
|
-
#
|
|
903
|
-
|
|
988
|
+
# Index pattern for multi-tenant knowledge bases
|
|
989
|
+
# Matches: gnosisllm-<account_id>-knowledge
|
|
990
|
+
index_pattern = f"{self._config.index_prefix}-*-knowledge"
|
|
904
991
|
|
|
905
|
-
# Create
|
|
906
|
-
|
|
907
|
-
|
|
992
|
+
# Create RAGTool configuration
|
|
993
|
+
rag_tool = self._create_rag_tool_config(
|
|
994
|
+
embedding_model_id=embedding_model_id,
|
|
995
|
+
llm_model_id=llm_model_id,
|
|
996
|
+
index_pattern=index_pattern,
|
|
997
|
+
)
|
|
908
998
|
|
|
909
|
-
# Register flow agent with
|
|
999
|
+
# Register flow agent with RAGTool
|
|
1000
|
+
# Flow agents execute tools sequentially and return the last tool's output
|
|
910
1001
|
agent_body = {
|
|
911
1002
|
"name": agent_name,
|
|
912
1003
|
"type": "flow",
|
|
913
|
-
"description": "
|
|
914
|
-
"tools": [
|
|
1004
|
+
"description": "Agentic search agent for GnosisLLM Knowledge - uses RAGTool for conversational AI answers",
|
|
1005
|
+
"tools": [rag_tool],
|
|
915
1006
|
}
|
|
916
1007
|
|
|
917
1008
|
try:
|
|
@@ -921,7 +1012,7 @@ class OpenSearchSetupAdapter:
|
|
|
921
1012
|
body=agent_body,
|
|
922
1013
|
)
|
|
923
1014
|
agent_id = response.get("agent_id")
|
|
924
|
-
logger.info(f"Created flow agent: {agent_id}")
|
|
1015
|
+
logger.info(f"Created flow agent with RAGTool: {agent_id}")
|
|
925
1016
|
return agent_id
|
|
926
1017
|
except Exception as e:
|
|
927
1018
|
raise SetupError(
|
|
@@ -931,10 +1022,11 @@ class OpenSearchSetupAdapter:
|
|
|
931
1022
|
) from e
|
|
932
1023
|
|
|
933
1024
|
async def setup_conversational_agent(self) -> str:
|
|
934
|
-
"""Create
|
|
1025
|
+
"""Create conversational agent with RAGTool and memory.
|
|
935
1026
|
|
|
936
1027
|
Conversational agents support multi-turn dialogue with memory
|
|
937
|
-
persistence. They
|
|
1028
|
+
persistence. They use RAGTool to search and generate AI answers,
|
|
1029
|
+
providing a chat-like experience with context from previous turns.
|
|
938
1030
|
|
|
939
1031
|
Returns:
|
|
940
1032
|
Agent ID of the created/existing conversational agent.
|
|
@@ -950,41 +1042,43 @@ class OpenSearchSetupAdapter:
|
|
|
950
1042
|
logger.info(f"Using existing conversational agent: {existing}")
|
|
951
1043
|
return existing
|
|
952
1044
|
|
|
953
|
-
#
|
|
954
|
-
|
|
1045
|
+
# Enable agentic search if not already enabled
|
|
1046
|
+
await self.enable_agentic_search()
|
|
1047
|
+
|
|
1048
|
+
# Create LLM model for answer generation
|
|
1049
|
+
llm_model_id = await self._setup_llm_model()
|
|
1050
|
+
|
|
1051
|
+
# Get embedding model ID for neural search
|
|
1052
|
+
embedding_model_id = self._model_id or self._config.model_id
|
|
1053
|
+
if not embedding_model_id:
|
|
955
1054
|
raise SetupError(
|
|
956
|
-
message="Embedding model
|
|
1055
|
+
message="Embedding model ID is required for RAGTool. Run 'gnosisllm-knowledge setup' first.",
|
|
957
1056
|
step="conversational_agent",
|
|
958
|
-
details={"hint": "Run 'gnosisllm-knowledge setup' first to deploy the embedding model."},
|
|
959
1057
|
)
|
|
960
1058
|
|
|
961
|
-
#
|
|
962
|
-
|
|
1059
|
+
# Index pattern for multi-tenant knowledge bases
|
|
1060
|
+
index_pattern = f"{self._config.index_prefix}-*-knowledge"
|
|
963
1061
|
|
|
964
|
-
# Create
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
1062
|
+
# Create RAGTool configuration
|
|
1063
|
+
rag_tool = self._create_rag_tool_config(
|
|
1064
|
+
embedding_model_id=embedding_model_id,
|
|
1065
|
+
llm_model_id=llm_model_id,
|
|
1066
|
+
index_pattern=index_pattern,
|
|
969
1067
|
)
|
|
970
1068
|
|
|
971
|
-
# Register conversational
|
|
972
|
-
#
|
|
973
|
-
# - Executes tools sequentially like flow agent
|
|
974
|
-
# - Supports conversation memory for multi-turn dialogue
|
|
975
|
-
# - Injects chat_history from memory when message_history_limit > 0
|
|
1069
|
+
# Register conversational agent with memory support
|
|
1070
|
+
# Use conversational_flow type for simpler tool execution without ReAct prompting
|
|
976
1071
|
agent_body = {
|
|
977
1072
|
"name": agent_name,
|
|
978
1073
|
"type": "conversational_flow",
|
|
979
|
-
"
|
|
980
|
-
"description": "Conversational agent with memory for GnosisLLM Knowledge - supports multi-turn dialogue",
|
|
1074
|
+
"description": "Conversational agentic search for GnosisLLM Knowledge - multi-turn dialogue with memory and AI answers",
|
|
981
1075
|
"llm": {
|
|
982
1076
|
"model_id": llm_model_id,
|
|
983
1077
|
"parameters": {
|
|
984
|
-
"
|
|
1078
|
+
"max_iteration": str(self._config.agentic_max_iterations),
|
|
985
1079
|
},
|
|
986
1080
|
},
|
|
987
|
-
"tools": [
|
|
1081
|
+
"tools": [rag_tool],
|
|
988
1082
|
"memory": {
|
|
989
1083
|
"type": "conversation_index",
|
|
990
1084
|
},
|
|
@@ -997,7 +1091,7 @@ class OpenSearchSetupAdapter:
|
|
|
997
1091
|
body=agent_body,
|
|
998
1092
|
)
|
|
999
1093
|
agent_id = response.get("agent_id")
|
|
1000
|
-
logger.info(f"Created conversational agent: {agent_id}")
|
|
1094
|
+
logger.info(f"Created conversational agent with RAGTool: {agent_id}")
|
|
1001
1095
|
return agent_id
|
|
1002
1096
|
except Exception as e:
|
|
1003
1097
|
raise SetupError(
|
|
@@ -1007,14 +1101,18 @@ class OpenSearchSetupAdapter:
|
|
|
1007
1101
|
) from e
|
|
1008
1102
|
|
|
1009
1103
|
async def setup_agents(self, agent_types: list[str] | None = None) -> dict[str, str]:
|
|
1010
|
-
"""Setup agentic search agents.
|
|
1104
|
+
"""Setup agentic search agents with RAGTool.
|
|
1105
|
+
|
|
1106
|
+
Creates agents using RAGTool for retrieval-augmented generation.
|
|
1107
|
+
Agents search the knowledge base and generate AI-powered answers.
|
|
1108
|
+
Also creates an agentic search pipeline for the flow agent.
|
|
1011
1109
|
|
|
1012
1110
|
Args:
|
|
1013
1111
|
agent_types: List of agent types to setup ('flow', 'conversational').
|
|
1014
1112
|
If None, sets up all agent types.
|
|
1015
1113
|
|
|
1016
1114
|
Returns:
|
|
1017
|
-
Dictionary mapping agent type to agent ID.
|
|
1115
|
+
Dictionary mapping agent type to agent ID, plus agentic_pipeline_name.
|
|
1018
1116
|
|
|
1019
1117
|
Raises:
|
|
1020
1118
|
SetupError: If any agent creation fails.
|
|
@@ -1025,7 +1123,13 @@ class OpenSearchSetupAdapter:
|
|
|
1025
1123
|
results: dict[str, str] = {}
|
|
1026
1124
|
|
|
1027
1125
|
if "flow" in agent_types:
|
|
1028
|
-
|
|
1126
|
+
flow_agent_id = await self.setup_flow_agent()
|
|
1127
|
+
results["flow_agent_id"] = flow_agent_id
|
|
1128
|
+
|
|
1129
|
+
# Create agentic search pipeline for the flow agent
|
|
1130
|
+
# This allows using agentic search via standard _search API
|
|
1131
|
+
pipeline_name = await self.setup_agentic_search_pipeline(flow_agent_id)
|
|
1132
|
+
results["agentic_pipeline_name"] = pipeline_name
|
|
1029
1133
|
|
|
1030
1134
|
if "conversational" in agent_types:
|
|
1031
1135
|
results["conversational_agent_id"] = await self.setup_conversational_agent()
|
|
@@ -1033,7 +1137,7 @@ class OpenSearchSetupAdapter:
|
|
|
1033
1137
|
return results
|
|
1034
1138
|
|
|
1035
1139
|
async def cleanup_agents(self) -> SetupResult:
|
|
1036
|
-
"""Clean up agentic search agents.
|
|
1140
|
+
"""Clean up agentic search agents and pipelines.
|
|
1037
1141
|
|
|
1038
1142
|
Returns:
|
|
1039
1143
|
Cleanup result with steps completed.
|
|
@@ -1041,6 +1145,17 @@ class OpenSearchSetupAdapter:
|
|
|
1041
1145
|
steps_completed: list[str] = []
|
|
1042
1146
|
errors: list[str] = []
|
|
1043
1147
|
|
|
1148
|
+
# Delete agentic search pipeline first
|
|
1149
|
+
pipeline_name = f"{self._config.index_prefix}-agentic-pipeline"
|
|
1150
|
+
try:
|
|
1151
|
+
await self._client.transport.perform_request(
|
|
1152
|
+
"DELETE",
|
|
1153
|
+
f"/_search/pipeline/{pipeline_name}",
|
|
1154
|
+
)
|
|
1155
|
+
steps_completed.append(f"Deleted agentic pipeline: {pipeline_name}")
|
|
1156
|
+
except Exception:
|
|
1157
|
+
pass # Pipeline may not exist
|
|
1158
|
+
|
|
1044
1159
|
# Delete flow agent
|
|
1045
1160
|
flow_agent_name = f"{self._config.index_prefix}-flow-agent"
|
|
1046
1161
|
flow_agent_id = await self._find_agent_by_name(flow_agent_name)
|
|
@@ -1104,93 +1219,135 @@ class OpenSearchSetupAdapter:
|
|
|
1104
1219
|
errors=errors if errors else None,
|
|
1105
1220
|
)
|
|
1106
1221
|
|
|
1107
|
-
def
|
|
1108
|
-
|
|
1222
|
+
def _create_rag_tool_config(
|
|
1223
|
+
self,
|
|
1224
|
+
embedding_model_id: str,
|
|
1225
|
+
llm_model_id: str,
|
|
1226
|
+
index_pattern: str,
|
|
1227
|
+
) -> dict[str, Any]:
|
|
1228
|
+
"""Create RAGTool configuration for agentic search.
|
|
1229
|
+
|
|
1230
|
+
RAGTool (OpenSearch 2.13+) performs retrieval-augmented generation:
|
|
1231
|
+
1. Searches the index using neural/semantic search
|
|
1232
|
+
2. Passes results to LLM for answer generation
|
|
1233
|
+
3. Returns AI-generated answer with source citations
|
|
1234
|
+
|
|
1235
|
+
This provides a conversational experience where users get natural
|
|
1236
|
+
language answers instead of raw search results.
|
|
1109
1237
|
|
|
1110
|
-
|
|
1111
|
-
|
|
1238
|
+
Args:
|
|
1239
|
+
embedding_model_id: Embedding model ID for neural search.
|
|
1240
|
+
llm_model_id: LLM model ID for answer generation.
|
|
1241
|
+
index_pattern: Index pattern to search (supports wildcards).
|
|
1112
1242
|
|
|
1113
1243
|
Returns:
|
|
1114
1244
|
Tool configuration dictionary.
|
|
1115
1245
|
"""
|
|
1116
|
-
|
|
1117
|
-
#
|
|
1118
|
-
|
|
1119
|
-
|
|
1246
|
+
# Prompt template for RAGTool - instructs LLM how to use retrieved context
|
|
1247
|
+
# RAGTool fills ${parameters.output:-} with search results
|
|
1248
|
+
prompt_template = (
|
|
1249
|
+
"You are a helpful assistant. Use the following context to answer the question. "
|
|
1250
|
+
"If the context doesn't contain enough information, say so.\n\n"
|
|
1251
|
+
"Context:\n${parameters.output:-}\n\n"
|
|
1252
|
+
"Question: ${parameters.question}\n\n"
|
|
1253
|
+
"Answer:"
|
|
1254
|
+
)
|
|
1255
|
+
|
|
1120
1256
|
return {
|
|
1121
|
-
"type": "
|
|
1257
|
+
"type": "RAGTool",
|
|
1122
1258
|
"name": "knowledge_search",
|
|
1123
|
-
"description": "Search
|
|
1124
|
-
"
|
|
1259
|
+
"description": "Search knowledge base and generate AI answer. "
|
|
1260
|
+
"Retrieves relevant documents and synthesizes a natural language response.",
|
|
1125
1261
|
"parameters": {
|
|
1126
|
-
"
|
|
1262
|
+
"embedding_model_id": embedding_model_id,
|
|
1263
|
+
"inference_model_id": llm_model_id,
|
|
1127
1264
|
"index": index_pattern,
|
|
1128
1265
|
"embedding_field": self._config.embedding_field,
|
|
1129
|
-
"source_field": '["content", "title", "url"
|
|
1130
|
-
"
|
|
1266
|
+
"source_field": '["content", "title", "url"]',
|
|
1267
|
+
"doc_size": "5",
|
|
1268
|
+
"query_type": "neural",
|
|
1269
|
+
"input": "${parameters.question}",
|
|
1270
|
+
"prompt": prompt_template,
|
|
1131
1271
|
},
|
|
1132
1272
|
}
|
|
1133
1273
|
|
|
1134
|
-
def
|
|
1135
|
-
self, llm_model_id: str
|
|
1274
|
+
def _create_query_planning_tool_config(
|
|
1275
|
+
self, llm_model_id: str
|
|
1136
1276
|
) -> dict[str, Any]:
|
|
1137
|
-
"""Create
|
|
1277
|
+
"""Create QueryPlanningTool configuration for agentic search.
|
|
1278
|
+
|
|
1279
|
+
QueryPlanningTool (OpenSearch 3.2+) translates natural language queries
|
|
1280
|
+
into OpenSearch DSL. The LLM decides the optimal query type based on
|
|
1281
|
+
user intent - keyword, neural, hybrid, or complex aggregations.
|
|
1282
|
+
|
|
1283
|
+
NOTE: QueryPlanningTool only generates DSL - it does NOT generate answers.
|
|
1284
|
+
Use RAGTool for conversational experience with AI-generated answers.
|
|
1138
1285
|
|
|
1139
1286
|
Args:
|
|
1140
|
-
llm_model_id: LLM model ID for
|
|
1141
|
-
include_chat_history: Include chat_history placeholder for conversational agents.
|
|
1287
|
+
llm_model_id: LLM model ID for query generation.
|
|
1142
1288
|
|
|
1143
1289
|
Returns:
|
|
1144
1290
|
Tool configuration dictionary.
|
|
1145
1291
|
"""
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
prompt = """You are a helpful assistant that answers questions based on provided context and conversation history.
|
|
1150
|
-
|
|
1151
|
-
Context from knowledge base:
|
|
1152
|
-
${parameters.knowledge_search.output}
|
|
1292
|
+
# Response filter extracts generated DSL from OpenAI chat completions format
|
|
1293
|
+
# Format: {"choices": [{"message": {"content": "<DSL JSON>"}}]}
|
|
1294
|
+
response_filter = "$.choices[0].message.content"
|
|
1153
1295
|
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1163
|
-
|
|
1164
|
-
|
|
1296
|
+
return {
|
|
1297
|
+
"type": "QueryPlanningTool",
|
|
1298
|
+
"name": "query_planner",
|
|
1299
|
+
"description": "Generate OpenSearch DSL queries from natural language. "
|
|
1300
|
+
"Supports keyword search, neural/semantic search, hybrid search, "
|
|
1301
|
+
"and complex aggregations based on user intent.",
|
|
1302
|
+
"parameters": {
|
|
1303
|
+
"model_id": llm_model_id,
|
|
1304
|
+
"response_filter": response_filter,
|
|
1305
|
+
},
|
|
1306
|
+
}
|
|
1165
1307
|
|
|
1166
|
-
|
|
1167
|
-
|
|
1168
|
-
# Simple prompt for flow agents (no conversation history)
|
|
1169
|
-
prompt = """You are a helpful assistant that answers questions based on provided context.
|
|
1308
|
+
async def setup_agentic_search_pipeline(self, agent_id: str) -> str:
|
|
1309
|
+
"""Create search pipeline with agentic query translator.
|
|
1170
1310
|
|
|
1171
|
-
|
|
1172
|
-
|
|
1311
|
+
This pipeline allows using agentic search via the standard
|
|
1312
|
+
_search API by translating natural language to DSL.
|
|
1173
1313
|
|
|
1174
|
-
|
|
1314
|
+
Args:
|
|
1315
|
+
agent_id: Agent ID to use for query translation.
|
|
1175
1316
|
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
- If the answer is not in the context, say "I don't have enough information to answer this question."
|
|
1179
|
-
- Be concise and accurate
|
|
1180
|
-
- Cite sources when possible using the URLs provided
|
|
1317
|
+
Returns:
|
|
1318
|
+
Pipeline name.
|
|
1181
1319
|
|
|
1182
|
-
|
|
1320
|
+
Raises:
|
|
1321
|
+
SetupError: If pipeline creation fails.
|
|
1322
|
+
"""
|
|
1323
|
+
pipeline_name = f"{self._config.index_prefix}-agentic-pipeline"
|
|
1183
1324
|
|
|
1184
|
-
|
|
1185
|
-
"
|
|
1186
|
-
"
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1325
|
+
pipeline_body = {
|
|
1326
|
+
"description": "GnosisLLM agentic search pipeline - translates natural language to DSL",
|
|
1327
|
+
"request_processors": [
|
|
1328
|
+
{
|
|
1329
|
+
"agentic_query_translator": {
|
|
1330
|
+
"agent_id": agent_id,
|
|
1331
|
+
}
|
|
1332
|
+
}
|
|
1333
|
+
],
|
|
1192
1334
|
}
|
|
1193
1335
|
|
|
1336
|
+
try:
|
|
1337
|
+
await self._client.transport.perform_request(
|
|
1338
|
+
"PUT",
|
|
1339
|
+
f"/_search/pipeline/{pipeline_name}",
|
|
1340
|
+
body=pipeline_body,
|
|
1341
|
+
)
|
|
1342
|
+
logger.info(f"Created agentic search pipeline: {pipeline_name}")
|
|
1343
|
+
return pipeline_name
|
|
1344
|
+
except Exception as e:
|
|
1345
|
+
raise SetupError(
|
|
1346
|
+
message=f"Failed to create agentic search pipeline: {e}",
|
|
1347
|
+
step="agentic_pipeline",
|
|
1348
|
+
cause=e,
|
|
1349
|
+
) from e
|
|
1350
|
+
|
|
1194
1351
|
async def _setup_llm_model(self) -> str:
|
|
1195
1352
|
"""Setup LLM model for agent reasoning.
|
|
1196
1353
|
|
|
@@ -1275,6 +1432,8 @@ Answer:"""
|
|
|
1275
1432
|
details={"hint": "Set OPENAI_API_KEY environment variable"},
|
|
1276
1433
|
)
|
|
1277
1434
|
|
|
1435
|
+
# Connector for RAGTool uses 'prompt' parameter
|
|
1436
|
+
# See: https://docs.opensearch.org/latest/ml-commons-plugin/agents-tools/tools/rag-tool/
|
|
1278
1437
|
connector_body = {
|
|
1279
1438
|
"name": connector_name,
|
|
1280
1439
|
"description": f"OpenAI {self._config.agentic_llm_model} connector for agent reasoning",
|
|
@@ -1295,7 +1454,8 @@ Answer:"""
|
|
|
1295
1454
|
"Authorization": "Bearer ${credential.openAI_key}",
|
|
1296
1455
|
"Content-Type": "application/json",
|
|
1297
1456
|
},
|
|
1298
|
-
|
|
1457
|
+
# RAGTool sends 'prompt' containing question + retrieved context
|
|
1458
|
+
"request_body": '{ "model": "${parameters.model}", "messages": [{"role": "user", "content": "${parameters.prompt}"}] }',
|
|
1299
1459
|
},
|
|
1300
1460
|
],
|
|
1301
1461
|
}
|