aiecs 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiecs/__init__.py +72 -0
- aiecs/__main__.py +41 -0
- aiecs/aiecs_client.py +469 -0
- aiecs/application/__init__.py +10 -0
- aiecs/application/executors/__init__.py +10 -0
- aiecs/application/executors/operation_executor.py +363 -0
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +100 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
- aiecs/application/knowledge_graph/search/reranker.py +295 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +484 -0
- aiecs/config/__init__.py +16 -0
- aiecs/config/config.py +498 -0
- aiecs/config/graph_config.py +137 -0
- aiecs/config/registry.py +23 -0
- aiecs/core/__init__.py +46 -0
- aiecs/core/interface/__init__.py +34 -0
- aiecs/core/interface/execution_interface.py +152 -0
- aiecs/core/interface/storage_interface.py +171 -0
- aiecs/domain/__init__.py +289 -0
- aiecs/domain/agent/__init__.py +189 -0
- aiecs/domain/agent/base_agent.py +697 -0
- aiecs/domain/agent/exceptions.py +103 -0
- aiecs/domain/agent/graph_aware_mixin.py +559 -0
- aiecs/domain/agent/hybrid_agent.py +490 -0
- aiecs/domain/agent/integration/__init__.py +26 -0
- aiecs/domain/agent/integration/context_compressor.py +222 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
- aiecs/domain/agent/integration/retry_policy.py +219 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +646 -0
- aiecs/domain/agent/lifecycle.py +296 -0
- aiecs/domain/agent/llm_agent.py +300 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +197 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +160 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
- aiecs/domain/agent/models.py +317 -0
- aiecs/domain/agent/observability.py +407 -0
- aiecs/domain/agent/persistence.py +289 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +161 -0
- aiecs/domain/agent/prompts/formatters.py +189 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +260 -0
- aiecs/domain/agent/tool_agent.py +257 -0
- aiecs/domain/agent/tools/__init__.py +12 -0
- aiecs/domain/agent/tools/schema_generator.py +221 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +477 -0
- aiecs/domain/community/analytics.py +481 -0
- aiecs/domain/community/collaborative_workflow.py +642 -0
- aiecs/domain/community/communication_hub.py +645 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +800 -0
- aiecs/domain/community/community_manager.py +813 -0
- aiecs/domain/community/decision_engine.py +879 -0
- aiecs/domain/community/exceptions.py +225 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +268 -0
- aiecs/domain/community/resource_manager.py +457 -0
- aiecs/domain/community/shared_context_manager.py +603 -0
- aiecs/domain/context/__init__.py +58 -0
- aiecs/domain/context/context_engine.py +989 -0
- aiecs/domain/context/conversation_models.py +354 -0
- aiecs/domain/context/graph_memory.py +467 -0
- aiecs/domain/execution/__init__.py +12 -0
- aiecs/domain/execution/model.py +57 -0
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +130 -0
- aiecs/domain/knowledge_graph/models/evidence.py +194 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
- aiecs/domain/knowledge_graph/models/path.py +179 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
- aiecs/domain/knowledge_graph/models/query.py +272 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
- aiecs/domain/knowledge_graph/models/relation.py +136 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
- aiecs/domain/task/__init__.py +13 -0
- aiecs/domain/task/dsl_processor.py +613 -0
- aiecs/domain/task/model.py +62 -0
- aiecs/domain/task/task_context.py +268 -0
- aiecs/infrastructure/__init__.py +24 -0
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +601 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
- aiecs/infrastructure/graph_storage/cache.py +429 -0
- aiecs/infrastructure/graph_storage/distributed.py +226 -0
- aiecs/infrastructure/graph_storage/error_handling.py +390 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +514 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
- aiecs/infrastructure/graph_storage/metrics.py +357 -0
- aiecs/infrastructure/graph_storage/migration.py +413 -0
- aiecs/infrastructure/graph_storage/pagination.py +471 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
- aiecs/infrastructure/graph_storage/postgres.py +871 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +623 -0
- aiecs/infrastructure/graph_storage/streaming.py +495 -0
- aiecs/infrastructure/messaging/__init__.py +13 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
- aiecs/infrastructure/messaging/websocket_manager.py +298 -0
- aiecs/infrastructure/monitoring/__init__.py +34 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
- aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
- aiecs/infrastructure/monitoring/structured_logger.py +48 -0
- aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
- aiecs/infrastructure/persistence/__init__.py +24 -0
- aiecs/infrastructure/persistence/context_engine_client.py +187 -0
- aiecs/infrastructure/persistence/database_manager.py +333 -0
- aiecs/infrastructure/persistence/file_storage.py +754 -0
- aiecs/infrastructure/persistence/redis_client.py +220 -0
- aiecs/llm/__init__.py +86 -0
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/callbacks/custom_callbacks.py +264 -0
- aiecs/llm/client_factory.py +420 -0
- aiecs/llm/clients/__init__.py +33 -0
- aiecs/llm/clients/base_client.py +193 -0
- aiecs/llm/clients/googleai_client.py +181 -0
- aiecs/llm/clients/openai_client.py +131 -0
- aiecs/llm/clients/vertex_client.py +437 -0
- aiecs/llm/clients/xai_client.py +184 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +275 -0
- aiecs/llm/config/config_validator.py +236 -0
- aiecs/llm/config/model_config.py +151 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +91 -0
- aiecs/main.py +363 -0
- aiecs/scripts/__init__.py +3 -0
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/version_manager.py +215 -0
- aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
- aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
- aiecs/scripts/dependance_check/__init__.py +17 -0
- aiecs/scripts/dependance_check/dependency_checker.py +938 -0
- aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
- aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
- aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
- aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
- aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
- aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
- aiecs/scripts/tools_develop/README.md +449 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
- aiecs/scripts/tools_develop/verify_tools.py +356 -0
- aiecs/tasks/__init__.py +1 -0
- aiecs/tasks/worker.py +172 -0
- aiecs/tools/__init__.py +299 -0
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +303 -0
- aiecs/tools/apisource/providers/__init__.py +115 -0
- aiecs/tools/apisource/providers/base.py +664 -0
- aiecs/tools/apisource/providers/census.py +401 -0
- aiecs/tools/apisource/providers/fred.py +564 -0
- aiecs/tools/apisource/providers/newsapi.py +412 -0
- aiecs/tools/apisource/providers/worldbank.py +357 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +375 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
- aiecs/tools/apisource/tool.py +850 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +338 -0
- aiecs/tools/base_tool.py +201 -0
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +599 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
- aiecs/tools/docs/content_insertion_tool.py +1333 -0
- aiecs/tools/docs/document_creator_tool.py +1317 -0
- aiecs/tools/docs/document_layout_tool.py +1166 -0
- aiecs/tools/docs/document_parser_tool.py +994 -0
- aiecs/tools/docs/document_writer_tool.py +1818 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
- aiecs/tools/langchain_adapter.py +542 -0
- aiecs/tools/schema_generator.py +275 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +589 -0
- aiecs/tools/search_tool/cache.py +260 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +216 -0
- aiecs/tools/search_tool/core.py +749 -0
- aiecs/tools/search_tool/deduplicator.py +123 -0
- aiecs/tools/search_tool/error_handler.py +271 -0
- aiecs/tools/search_tool/metrics.py +371 -0
- aiecs/tools/search_tool/rate_limiter.py +178 -0
- aiecs/tools/search_tool/schemas.py +277 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
- aiecs/tools/statistics/data_loader_tool.py +564 -0
- aiecs/tools/statistics/data_profiler_tool.py +658 -0
- aiecs/tools/statistics/data_transformer_tool.py +573 -0
- aiecs/tools/statistics/data_visualizer_tool.py +495 -0
- aiecs/tools/statistics/model_trainer_tool.py +487 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
- aiecs/tools/task_tools/__init__.py +86 -0
- aiecs/tools/task_tools/chart_tool.py +732 -0
- aiecs/tools/task_tools/classfire_tool.py +922 -0
- aiecs/tools/task_tools/image_tool.py +447 -0
- aiecs/tools/task_tools/office_tool.py +684 -0
- aiecs/tools/task_tools/pandas_tool.py +635 -0
- aiecs/tools/task_tools/report_tool.py +635 -0
- aiecs/tools/task_tools/research_tool.py +392 -0
- aiecs/tools/task_tools/scraper_tool.py +715 -0
- aiecs/tools/task_tools/stats_tool.py +688 -0
- aiecs/tools/temp_file_manager.py +130 -0
- aiecs/tools/tool_executor/__init__.py +37 -0
- aiecs/tools/tool_executor/tool_executor.py +881 -0
- aiecs/utils/LLM_output_structor.py +445 -0
- aiecs/utils/__init__.py +34 -0
- aiecs/utils/base_callback.py +47 -0
- aiecs/utils/cache_provider.py +695 -0
- aiecs/utils/execution_utils.py +184 -0
- aiecs/utils/logging.py +1 -0
- aiecs/utils/prompt_loader.py +14 -0
- aiecs/utils/token_usage_repository.py +323 -0
- aiecs/ws/__init__.py +0 -0
- aiecs/ws/socket_server.py +52 -0
- aiecs-1.5.1.dist-info/METADATA +608 -0
- aiecs-1.5.1.dist-info/RECORD +302 -0
- aiecs-1.5.1.dist-info/WHEEL +5 -0
- aiecs-1.5.1.dist-info/entry_points.txt +10 -0
- aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
- aiecs-1.5.1.dist-info/top_level.txt +1 -0
aiecs/config/config.py
ADDED
|
@@ -0,0 +1,498 @@
|
|
|
1
|
+
from pydantic import Field, ConfigDict, field_validator
|
|
2
|
+
from pydantic_settings import BaseSettings
|
|
3
|
+
from functools import lru_cache
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Literal
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Settings(BaseSettings):
|
|
12
|
+
# LLM Provider Configuration (optional until used)
|
|
13
|
+
openai_api_key: str = Field(default="", alias="OPENAI_API_KEY")
|
|
14
|
+
googleai_api_key: str = Field(default="", alias="GOOGLEAI_API_KEY")
|
|
15
|
+
vertex_project_id: str = Field(default="", alias="VERTEX_PROJECT_ID")
|
|
16
|
+
vertex_location: str = Field(default="us-central1", alias="VERTEX_LOCATION")
|
|
17
|
+
google_application_credentials: str = Field(default="", alias="GOOGLE_APPLICATION_CREDENTIALS")
|
|
18
|
+
google_api_key: str = Field(default="", alias="GOOGLE_API_KEY")
|
|
19
|
+
google_cse_id: str = Field(default="", alias="GOOGLE_CSE_ID")
|
|
20
|
+
xai_api_key: str = Field(default="", alias="XAI_API_KEY")
|
|
21
|
+
grok_api_key: str = Field(default="", alias="GROK_API_KEY") # Backward compatibility
|
|
22
|
+
|
|
23
|
+
# LLM Models Configuration
|
|
24
|
+
llm_models_config_path: str = Field(
|
|
25
|
+
default="",
|
|
26
|
+
alias="LLM_MODELS_CONFIG",
|
|
27
|
+
description="Path to LLM models YAML configuration file",
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
# Infrastructure Configuration (with sensible defaults)
|
|
31
|
+
celery_broker_url: str = Field(default="redis://localhost:6379/0", alias="CELERY_BROKER_URL")
|
|
32
|
+
cors_allowed_origins: str = Field(
|
|
33
|
+
default="http://localhost:3000,http://express-gateway:3001",
|
|
34
|
+
alias="CORS_ALLOWED_ORIGINS",
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
# PostgreSQL Database Configuration (with defaults)
|
|
38
|
+
db_host: str = Field(default="localhost", alias="DB_HOST")
|
|
39
|
+
db_user: str = Field(default="postgres", alias="DB_USER")
|
|
40
|
+
db_password: str = Field(default="", alias="DB_PASSWORD")
|
|
41
|
+
db_name: str = Field(default="aiecs", alias="DB_NAME")
|
|
42
|
+
db_port: int = Field(default=5432, alias="DB_PORT")
|
|
43
|
+
postgres_url: str = Field(default="", alias="POSTGRES_URL")
|
|
44
|
+
# Connection mode: "local" (use individual parameters) or "cloud" (use POSTGRES_URL)
|
|
45
|
+
# If "cloud" is set, POSTGRES_URL will be used; otherwise individual
|
|
46
|
+
# parameters are used
|
|
47
|
+
db_connection_mode: str = Field(default="local", alias="DB_CONNECTION_MODE")
|
|
48
|
+
|
|
49
|
+
# Google Cloud Storage Configuration (optional)
|
|
50
|
+
google_cloud_project_id: str = Field(default="", alias="GOOGLE_CLOUD_PROJECT_ID")
|
|
51
|
+
google_cloud_storage_bucket: str = Field(default="", alias="GOOGLE_CLOUD_STORAGE_BUCKET")
|
|
52
|
+
|
|
53
|
+
# Qdrant configuration (legacy)
|
|
54
|
+
qdrant_url: str = Field("http://qdrant:6333", alias="QDRANT_URL")
|
|
55
|
+
qdrant_collection: str = Field("documents", alias="QDRANT_COLLECTION")
|
|
56
|
+
|
|
57
|
+
# Vertex AI Vector Search configuration
|
|
58
|
+
vertex_index_id: str | None = Field(default=None, alias="VERTEX_INDEX_ID")
|
|
59
|
+
vertex_endpoint_id: str | None = Field(default=None, alias="VERTEX_ENDPOINT_ID")
|
|
60
|
+
vertex_deployed_index_id: str | None = Field(default=None, alias="VERTEX_DEPLOYED_INDEX_ID")
|
|
61
|
+
|
|
62
|
+
# Vector store backend selection (Qdrant deprecated, using Vertex AI by
|
|
63
|
+
# default)
|
|
64
|
+
vector_store_backend: str = Field(
|
|
65
|
+
"vertex", alias="VECTOR_STORE_BACKEND"
|
|
66
|
+
) # "vertex" (qdrant deprecated)
|
|
67
|
+
|
|
68
|
+
# Development/Server Configuration
|
|
69
|
+
reload: bool = Field(default=False, alias="RELOAD")
|
|
70
|
+
port: int = Field(default=8000, alias="PORT")
|
|
71
|
+
|
|
72
|
+
# Knowledge Graph Configuration
|
|
73
|
+
# Storage backend selection
|
|
74
|
+
kg_storage_backend: Literal["inmemory", "sqlite", "postgresql"] = Field(
|
|
75
|
+
default="inmemory",
|
|
76
|
+
alias="KG_STORAGE_BACKEND",
|
|
77
|
+
description="Knowledge graph storage backend: inmemory (default), sqlite (file-based), or postgresql (production)",
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
# SQLite configuration (for file-based persistence)
|
|
81
|
+
kg_sqlite_db_path: str = Field(
|
|
82
|
+
default="./storage/knowledge_graph.db",
|
|
83
|
+
alias="KG_SQLITE_DB_PATH",
|
|
84
|
+
description="Path to SQLite database file for knowledge graph storage",
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# PostgreSQL configuration (uses main database config by default)
|
|
88
|
+
# If you want a separate database for knowledge graph, set these:
|
|
89
|
+
kg_db_host: str = Field(default="", alias="KG_DB_HOST")
|
|
90
|
+
kg_db_port: int = Field(default=5432, alias="KG_DB_PORT")
|
|
91
|
+
kg_db_user: str = Field(default="", alias="KG_DB_USER")
|
|
92
|
+
kg_db_password: str = Field(default="", alias="KG_DB_PASSWORD")
|
|
93
|
+
kg_db_name: str = Field(default="", alias="KG_DB_NAME")
|
|
94
|
+
kg_postgres_url: str = Field(default="", alias="KG_POSTGRES_URL")
|
|
95
|
+
|
|
96
|
+
# PostgreSQL connection pool settings
|
|
97
|
+
kg_min_pool_size: int = Field(
|
|
98
|
+
default=5,
|
|
99
|
+
alias="KG_MIN_POOL_SIZE",
|
|
100
|
+
description="Minimum number of connections in PostgreSQL pool",
|
|
101
|
+
)
|
|
102
|
+
kg_max_pool_size: int = Field(
|
|
103
|
+
default=20,
|
|
104
|
+
alias="KG_MAX_POOL_SIZE",
|
|
105
|
+
description="Maximum number of connections in PostgreSQL pool",
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# PostgreSQL pgvector support
|
|
109
|
+
kg_enable_pgvector: bool = Field(
|
|
110
|
+
default=False,
|
|
111
|
+
alias="KG_ENABLE_PGVECTOR",
|
|
112
|
+
description="Enable pgvector extension for optimized vector search (requires pgvector installed)",
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# In-memory configuration
|
|
116
|
+
kg_inmemory_max_nodes: int = Field(
|
|
117
|
+
default=100000,
|
|
118
|
+
alias="KG_INMEMORY_MAX_NODES",
|
|
119
|
+
description="Maximum number of nodes for in-memory storage",
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
# Vector search configuration
|
|
123
|
+
kg_vector_dimension: int = Field(
|
|
124
|
+
default=1536,
|
|
125
|
+
alias="KG_VECTOR_DIMENSION",
|
|
126
|
+
description="Dimension of embedding vectors (default 1536 for OpenAI ada-002)",
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
# Query configuration
|
|
130
|
+
kg_default_search_limit: int = Field(
|
|
131
|
+
default=10,
|
|
132
|
+
alias="KG_DEFAULT_SEARCH_LIMIT",
|
|
133
|
+
description="Default number of results to return in searches",
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
kg_max_traversal_depth: int = Field(
|
|
137
|
+
default=5,
|
|
138
|
+
alias="KG_MAX_TRAVERSAL_DEPTH",
|
|
139
|
+
description="Maximum depth for graph traversal queries",
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
# Cache configuration
|
|
143
|
+
kg_enable_query_cache: bool = Field(
|
|
144
|
+
default=True,
|
|
145
|
+
alias="KG_ENABLE_QUERY_CACHE",
|
|
146
|
+
description="Enable caching of query results",
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
kg_cache_ttl_seconds: int = Field(
|
|
150
|
+
default=300,
|
|
151
|
+
alias="KG_CACHE_TTL_SECONDS",
|
|
152
|
+
description="Time-to-live for cached query results (seconds)",
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
# Feature flags for new capabilities
|
|
156
|
+
kg_enable_runnable_pattern: bool = Field(
|
|
157
|
+
default=True,
|
|
158
|
+
alias="KG_ENABLE_RUNNABLE_PATTERN",
|
|
159
|
+
description="Enable Runnable pattern for composable graph operations",
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
kg_enable_knowledge_fusion: bool = Field(
|
|
163
|
+
default=True,
|
|
164
|
+
alias="KG_ENABLE_KNOWLEDGE_FUSION",
|
|
165
|
+
description="Enable knowledge fusion for cross-document entity merging",
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
kg_enable_reranking: bool = Field(
|
|
169
|
+
default=True,
|
|
170
|
+
alias="KG_ENABLE_RERANKING",
|
|
171
|
+
description="Enable result reranking for improved search relevance",
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
kg_enable_logical_queries: bool = Field(
|
|
175
|
+
default=True,
|
|
176
|
+
alias="KG_ENABLE_LOGICAL_QUERIES",
|
|
177
|
+
description="Enable logical query parsing for structured queries",
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
kg_enable_structured_import: bool = Field(
|
|
181
|
+
default=True,
|
|
182
|
+
alias="KG_ENABLE_STRUCTURED_IMPORT",
|
|
183
|
+
description="Enable structured data import (CSV/JSON)",
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
# Knowledge Fusion configuration
|
|
187
|
+
kg_fusion_similarity_threshold: float = Field(
|
|
188
|
+
default=0.85,
|
|
189
|
+
alias="KG_FUSION_SIMILARITY_THRESHOLD",
|
|
190
|
+
description="Similarity threshold for entity fusion (0.0-1.0)",
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
kg_fusion_conflict_resolution: str = Field(
|
|
194
|
+
default="most_complete",
|
|
195
|
+
alias="KG_FUSION_CONFLICT_RESOLUTION",
|
|
196
|
+
description="Conflict resolution strategy: most_complete, most_recent, most_confident, longest, keep_all",
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
# Reranking configuration
|
|
200
|
+
kg_reranking_default_strategy: str = Field(
|
|
201
|
+
default="hybrid",
|
|
202
|
+
alias="KG_RERANKING_DEFAULT_STRATEGY",
|
|
203
|
+
description="Default reranking strategy: text, semantic, structural, hybrid",
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
kg_reranking_top_k: int = Field(
|
|
207
|
+
default=100,
|
|
208
|
+
alias="KG_RERANKING_TOP_K",
|
|
209
|
+
description="Top-K results to fetch before reranking",
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
# Schema cache configuration
|
|
213
|
+
kg_enable_schema_cache: bool = Field(
|
|
214
|
+
default=True,
|
|
215
|
+
alias="KG_ENABLE_SCHEMA_CACHE",
|
|
216
|
+
description="Enable schema caching for improved performance",
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
kg_schema_cache_ttl_seconds: int = Field(
|
|
220
|
+
default=3600,
|
|
221
|
+
alias="KG_SCHEMA_CACHE_TTL_SECONDS",
|
|
222
|
+
description="Time-to-live for cached schemas (seconds)",
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
# Query optimization configuration
|
|
226
|
+
kg_enable_query_optimization: bool = Field(
|
|
227
|
+
default=True,
|
|
228
|
+
alias="KG_ENABLE_QUERY_OPTIMIZATION",
|
|
229
|
+
description="Enable query optimization for better performance",
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
kg_query_optimization_strategy: str = Field(
|
|
233
|
+
default="balanced",
|
|
234
|
+
alias="KG_QUERY_OPTIMIZATION_STRATEGY",
|
|
235
|
+
description="Query optimization strategy: cost, latency, balanced",
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
model_config = ConfigDict(env_file=".env", env_file_encoding="utf-8", extra="allow")
|
|
239
|
+
|
|
240
|
+
@property
|
|
241
|
+
def database_config(self) -> dict:
|
|
242
|
+
"""
|
|
243
|
+
Get database configuration for asyncpg.
|
|
244
|
+
|
|
245
|
+
Supports both connection string (POSTGRES_URL) and individual parameters.
|
|
246
|
+
The connection mode is controlled by DB_CONNECTION_MODE:
|
|
247
|
+
- "cloud": Use POSTGRES_URL connection string (for cloud databases)
|
|
248
|
+
- "local": Use individual parameters (for local databases)
|
|
249
|
+
|
|
250
|
+
If DB_CONNECTION_MODE is "cloud" but POSTGRES_URL is not provided,
|
|
251
|
+
falls back to individual parameters with a warning.
|
|
252
|
+
"""
|
|
253
|
+
# Check connection mode
|
|
254
|
+
if self.db_connection_mode.lower() == "cloud":
|
|
255
|
+
# Use connection string for cloud databases
|
|
256
|
+
if self.postgres_url:
|
|
257
|
+
return {"dsn": self.postgres_url}
|
|
258
|
+
else:
|
|
259
|
+
logger.warning(
|
|
260
|
+
"DB_CONNECTION_MODE is set to 'cloud' but POSTGRES_URL is not provided. "
|
|
261
|
+
"Falling back to individual parameters (local mode)."
|
|
262
|
+
)
|
|
263
|
+
# Fall back to individual parameters
|
|
264
|
+
return {
|
|
265
|
+
"host": self.db_host,
|
|
266
|
+
"user": self.db_user,
|
|
267
|
+
"password": self.db_password,
|
|
268
|
+
"database": self.db_name,
|
|
269
|
+
"port": self.db_port,
|
|
270
|
+
}
|
|
271
|
+
else:
|
|
272
|
+
# Use individual parameters for local databases (default)
|
|
273
|
+
return {
|
|
274
|
+
"host": self.db_host,
|
|
275
|
+
"user": self.db_user,
|
|
276
|
+
"password": self.db_password,
|
|
277
|
+
"database": self.db_name,
|
|
278
|
+
"port": self.db_port,
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
@property
|
|
282
|
+
def file_storage_config(self) -> dict:
|
|
283
|
+
"""Get file storage configuration for Google Cloud Storage"""
|
|
284
|
+
return {
|
|
285
|
+
"gcs_project_id": self.google_cloud_project_id,
|
|
286
|
+
"gcs_bucket_name": self.google_cloud_storage_bucket,
|
|
287
|
+
"gcs_credentials_path": self.google_application_credentials,
|
|
288
|
+
"enable_local_fallback": True,
|
|
289
|
+
"local_storage_path": "./storage",
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
@property
|
|
293
|
+
def kg_database_config(self) -> dict:
|
|
294
|
+
"""
|
|
295
|
+
Get knowledge graph database configuration.
|
|
296
|
+
|
|
297
|
+
Returns configuration for the knowledge graph storage backend:
|
|
298
|
+
- For PostgreSQL: Returns connection parameters (uses main DB config if KG-specific not set)
|
|
299
|
+
- For SQLite: Returns db_path
|
|
300
|
+
- For in-memory: Returns max_nodes limit
|
|
301
|
+
"""
|
|
302
|
+
if self.kg_storage_backend == "postgresql":
|
|
303
|
+
# Use KG-specific config if provided, otherwise fall back to main
|
|
304
|
+
# DB config
|
|
305
|
+
if self.kg_postgres_url:
|
|
306
|
+
return {
|
|
307
|
+
"dsn": self.kg_postgres_url,
|
|
308
|
+
"min_pool_size": self.kg_min_pool_size,
|
|
309
|
+
"max_pool_size": self.kg_max_pool_size,
|
|
310
|
+
"enable_pgvector": self.kg_enable_pgvector,
|
|
311
|
+
}
|
|
312
|
+
elif self.kg_db_host:
|
|
313
|
+
return {
|
|
314
|
+
"host": self.kg_db_host,
|
|
315
|
+
"port": self.kg_db_port,
|
|
316
|
+
"user": self.kg_db_user,
|
|
317
|
+
"password": self.kg_db_password,
|
|
318
|
+
"database": self.kg_db_name or "aiecs_knowledge_graph",
|
|
319
|
+
"min_pool_size": self.kg_min_pool_size,
|
|
320
|
+
"max_pool_size": self.kg_max_pool_size,
|
|
321
|
+
"enable_pgvector": self.kg_enable_pgvector,
|
|
322
|
+
}
|
|
323
|
+
else:
|
|
324
|
+
# Fall back to main database config
|
|
325
|
+
db_config = self.database_config.copy()
|
|
326
|
+
db_config["min_pool_size"] = self.kg_min_pool_size
|
|
327
|
+
db_config["max_pool_size"] = self.kg_max_pool_size
|
|
328
|
+
db_config["enable_pgvector"] = self.kg_enable_pgvector
|
|
329
|
+
return db_config
|
|
330
|
+
elif self.kg_storage_backend == "sqlite":
|
|
331
|
+
return {"db_path": self.kg_sqlite_db_path}
|
|
332
|
+
else: # inmemory
|
|
333
|
+
return {"max_nodes": self.kg_inmemory_max_nodes}
|
|
334
|
+
|
|
335
|
+
@property
|
|
336
|
+
def kg_query_config(self) -> dict:
|
|
337
|
+
"""Get knowledge graph query configuration"""
|
|
338
|
+
return {
|
|
339
|
+
"default_search_limit": self.kg_default_search_limit,
|
|
340
|
+
"max_traversal_depth": self.kg_max_traversal_depth,
|
|
341
|
+
"vector_dimension": self.kg_vector_dimension,
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
@property
|
|
345
|
+
def kg_cache_config(self) -> dict:
|
|
346
|
+
"""Get knowledge graph cache configuration"""
|
|
347
|
+
return {
|
|
348
|
+
"enable_query_cache": self.kg_enable_query_cache,
|
|
349
|
+
"cache_ttl_seconds": self.kg_cache_ttl_seconds,
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
@field_validator("kg_storage_backend")
|
|
353
|
+
@classmethod
|
|
354
|
+
def validate_kg_storage_backend(cls, v: str) -> str:
|
|
355
|
+
"""Validate knowledge graph storage backend selection"""
|
|
356
|
+
valid_backends = ["inmemory", "sqlite", "postgresql"]
|
|
357
|
+
if v not in valid_backends:
|
|
358
|
+
raise ValueError(
|
|
359
|
+
f"Invalid KG_STORAGE_BACKEND: {v}. " f"Must be one of: {', '.join(valid_backends)}"
|
|
360
|
+
)
|
|
361
|
+
return v
|
|
362
|
+
|
|
363
|
+
@field_validator("kg_sqlite_db_path")
|
|
364
|
+
@classmethod
|
|
365
|
+
def validate_kg_sqlite_path(cls, v: str) -> str:
|
|
366
|
+
"""Validate and create parent directory for SQLite database"""
|
|
367
|
+
if v and v != ":memory:":
|
|
368
|
+
path = Path(v)
|
|
369
|
+
# Create parent directory if it doesn't exist
|
|
370
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
371
|
+
return v
|
|
372
|
+
|
|
373
|
+
@field_validator("kg_max_traversal_depth")
|
|
374
|
+
@classmethod
|
|
375
|
+
def validate_kg_max_traversal_depth(cls, v: int) -> int:
|
|
376
|
+
"""Validate maximum traversal depth"""
|
|
377
|
+
if v < 1:
|
|
378
|
+
raise ValueError("KG_MAX_TRAVERSAL_DEPTH must be at least 1")
|
|
379
|
+
if v > 10:
|
|
380
|
+
logger.warning(
|
|
381
|
+
f"KG_MAX_TRAVERSAL_DEPTH is set to {v}, which may cause performance issues. "
|
|
382
|
+
"Consider using a value <= 10 for production use."
|
|
383
|
+
)
|
|
384
|
+
return v
|
|
385
|
+
|
|
386
|
+
@field_validator("kg_vector_dimension")
|
|
387
|
+
@classmethod
|
|
388
|
+
def validate_kg_vector_dimension(cls, v: int) -> int:
|
|
389
|
+
"""Validate vector dimension"""
|
|
390
|
+
if v < 1:
|
|
391
|
+
raise ValueError("KG_VECTOR_DIMENSION must be at least 1")
|
|
392
|
+
# Common dimensions: 128, 256, 384, 512, 768, 1024, 1536, 3072
|
|
393
|
+
common_dims = [128, 256, 384, 512, 768, 1024, 1536, 3072]
|
|
394
|
+
if v not in common_dims:
|
|
395
|
+
logger.warning(
|
|
396
|
+
f"KG_VECTOR_DIMENSION is set to {v}, which is not a common embedding dimension. "
|
|
397
|
+
f"Common dimensions are: {common_dims}"
|
|
398
|
+
)
|
|
399
|
+
return v
|
|
400
|
+
|
|
401
|
+
def validate_llm_models_config(self) -> bool:
|
|
402
|
+
"""
|
|
403
|
+
Validate that LLM models configuration file exists.
|
|
404
|
+
|
|
405
|
+
Returns:
|
|
406
|
+
True if config file exists or can be found in default locations
|
|
407
|
+
|
|
408
|
+
Raises:
|
|
409
|
+
FileNotFoundError: If config file doesn't exist
|
|
410
|
+
"""
|
|
411
|
+
if self.llm_models_config_path:
|
|
412
|
+
config_path = Path(self.llm_models_config_path)
|
|
413
|
+
if not config_path.exists():
|
|
414
|
+
raise FileNotFoundError(f"LLM models config file not found: {config_path}")
|
|
415
|
+
return True
|
|
416
|
+
|
|
417
|
+
# Check default locations
|
|
418
|
+
current_dir = Path(__file__).parent
|
|
419
|
+
default_path = current_dir / "llm_models.yaml"
|
|
420
|
+
|
|
421
|
+
if default_path.exists():
|
|
422
|
+
return True
|
|
423
|
+
|
|
424
|
+
# If not found, it's still okay - the config loader will try to find it
|
|
425
|
+
return True
|
|
426
|
+
|
|
427
|
+
|
|
428
|
+
@lru_cache()
|
|
429
|
+
def get_settings():
|
|
430
|
+
return Settings()
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
def validate_required_settings(operation_type: str = "full") -> bool:
|
|
434
|
+
"""
|
|
435
|
+
Validate that required settings are present for specific operations
|
|
436
|
+
|
|
437
|
+
Args:
|
|
438
|
+
operation_type: Type of operation to validate for
|
|
439
|
+
- "basic": Only basic package functionality
|
|
440
|
+
- "llm": LLM provider functionality
|
|
441
|
+
- "database": Database operations
|
|
442
|
+
- "storage": Cloud storage operations
|
|
443
|
+
- "knowledge_graph": Knowledge graph operations
|
|
444
|
+
- "full": All functionality
|
|
445
|
+
|
|
446
|
+
Returns:
|
|
447
|
+
True if settings are valid, False otherwise
|
|
448
|
+
|
|
449
|
+
Raises:
|
|
450
|
+
ValueError: If required settings are missing for the operation type
|
|
451
|
+
"""
|
|
452
|
+
settings = get_settings()
|
|
453
|
+
missing = []
|
|
454
|
+
|
|
455
|
+
if operation_type in ["llm", "full"]:
|
|
456
|
+
# At least one LLM provider should be configured
|
|
457
|
+
llm_configs = [
|
|
458
|
+
("OpenAI", settings.openai_api_key),
|
|
459
|
+
(
|
|
460
|
+
"Vertex AI",
|
|
461
|
+
settings.vertex_project_id and settings.google_application_credentials,
|
|
462
|
+
),
|
|
463
|
+
("xAI", settings.xai_api_key),
|
|
464
|
+
]
|
|
465
|
+
|
|
466
|
+
if not any(config[1] for config in llm_configs):
|
|
467
|
+
missing.append("At least one LLM provider (OpenAI, Vertex AI, or xAI)")
|
|
468
|
+
|
|
469
|
+
if operation_type in ["database", "full"]:
|
|
470
|
+
if not settings.db_password:
|
|
471
|
+
missing.append("DB_PASSWORD")
|
|
472
|
+
|
|
473
|
+
if operation_type in ["storage", "full"]:
|
|
474
|
+
if settings.google_cloud_project_id and not settings.google_cloud_storage_bucket:
|
|
475
|
+
missing.append(
|
|
476
|
+
"GOOGLE_CLOUD_STORAGE_BUCKET (required when GOOGLE_CLOUD_PROJECT_ID is set)"
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
if operation_type in ["knowledge_graph", "full"]:
|
|
480
|
+
# Validate knowledge graph configuration
|
|
481
|
+
if settings.kg_storage_backend == "postgresql":
|
|
482
|
+
# Check if KG-specific or main DB config is available
|
|
483
|
+
if not (settings.kg_postgres_url or settings.kg_db_host or settings.db_password):
|
|
484
|
+
missing.append(
|
|
485
|
+
"Knowledge graph PostgreSQL configuration: "
|
|
486
|
+
"Either set KG_POSTGRES_URL, KG_DB_* parameters, or main DB_PASSWORD"
|
|
487
|
+
)
|
|
488
|
+
elif settings.kg_storage_backend == "sqlite":
|
|
489
|
+
if not settings.kg_sqlite_db_path:
|
|
490
|
+
missing.append("KG_SQLITE_DB_PATH (required for SQLite backend)")
|
|
491
|
+
|
|
492
|
+
if missing:
|
|
493
|
+
raise ValueError(
|
|
494
|
+
f"Missing required settings for {operation_type} operation: {', '.join(missing)}\n"
|
|
495
|
+
"Please check your .env file or environment variables."
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
return True
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Knowledge Graph Configuration
|
|
3
|
+
|
|
4
|
+
Configuration settings for knowledge graph storage and operations.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from enum import Enum
|
|
8
|
+
from pydantic import Field
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class GraphStorageBackend(str, Enum):
|
|
12
|
+
"""Available graph storage backends"""
|
|
13
|
+
|
|
14
|
+
INMEMORY = "inmemory"
|
|
15
|
+
SQLITE = "sqlite"
|
|
16
|
+
POSTGRESQL = "postgresql"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class KnowledgeGraphConfig:
|
|
20
|
+
"""
|
|
21
|
+
Knowledge Graph Configuration
|
|
22
|
+
|
|
23
|
+
This class provides configuration settings for knowledge graph operations.
|
|
24
|
+
It integrates with AIECS Settings through environment variables.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
# Storage backend selection
|
|
28
|
+
backend: GraphStorageBackend = Field(
|
|
29
|
+
default=GraphStorageBackend.INMEMORY,
|
|
30
|
+
description="Graph storage backend to use",
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
# SQLite configuration (for file-based persistence)
|
|
34
|
+
sqlite_db_path: str = Field(
|
|
35
|
+
default="./storage/knowledge_graph.db",
|
|
36
|
+
description="Path to SQLite database file",
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# In-memory configuration
|
|
40
|
+
inmemory_max_nodes: int = Field(
|
|
41
|
+
default=100000,
|
|
42
|
+
description="Maximum number of nodes for in-memory storage",
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# Vector search configuration
|
|
46
|
+
vector_dimension: int = Field(
|
|
47
|
+
default=1536,
|
|
48
|
+
description="Dimension of embedding vectors (default for OpenAI ada-002)",
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
# Query configuration
|
|
52
|
+
default_search_limit: int = Field(
|
|
53
|
+
default=10,
|
|
54
|
+
description="Default number of results to return in searches",
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
max_traversal_depth: int = Field(
|
|
58
|
+
default=5, description="Maximum depth for graph traversal queries"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# Cache configuration
|
|
62
|
+
enable_query_cache: bool = Field(default=True, description="Enable caching of query results")
|
|
63
|
+
|
|
64
|
+
cache_ttl_seconds: int = Field(
|
|
65
|
+
default=300,
|
|
66
|
+
description="Time-to-live for cached query results (seconds)",
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# Feature flags for new capabilities
|
|
70
|
+
enable_runnable_pattern: bool = Field(
|
|
71
|
+
default=True,
|
|
72
|
+
description="Enable Runnable pattern for composable graph operations",
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
enable_knowledge_fusion: bool = Field(
|
|
76
|
+
default=True,
|
|
77
|
+
description="Enable knowledge fusion for cross-document entity merging",
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
enable_reranking: bool = Field(
|
|
81
|
+
default=True,
|
|
82
|
+
description="Enable result reranking for improved search relevance",
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
enable_logical_queries: bool = Field(
|
|
86
|
+
default=True,
|
|
87
|
+
description="Enable logical query parsing for structured queries",
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
enable_structured_import: bool = Field(
|
|
91
|
+
default=True, description="Enable structured data import (CSV/JSON)"
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
# Knowledge Fusion configuration
|
|
95
|
+
fusion_similarity_threshold: float = Field(
|
|
96
|
+
default=0.85,
|
|
97
|
+
description="Similarity threshold for entity fusion (0.0-1.0)",
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
fusion_conflict_resolution: str = Field(
|
|
101
|
+
default="most_complete",
|
|
102
|
+
description="Conflict resolution strategy: most_complete, most_recent, most_confident, longest, keep_all",
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Reranking configuration
|
|
106
|
+
reranking_default_strategy: str = Field(
|
|
107
|
+
default="hybrid",
|
|
108
|
+
description="Default reranking strategy: text, semantic, structural, hybrid",
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
reranking_top_k: int = Field(default=100, description="Top-K results to fetch before reranking")
|
|
112
|
+
|
|
113
|
+
# Schema cache configuration
|
|
114
|
+
enable_schema_cache: bool = Field(
|
|
115
|
+
default=True,
|
|
116
|
+
description="Enable schema caching for improved performance",
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
schema_cache_ttl_seconds: int = Field(
|
|
120
|
+
default=3600, description="Time-to-live for cached schemas (seconds)"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
# Query optimization configuration
|
|
124
|
+
enable_query_optimization: bool = Field(
|
|
125
|
+
default=True,
|
|
126
|
+
description="Enable query optimization for better performance",
|
|
127
|
+
)
|
|
128
|
+
|
|
129
|
+
query_optimization_strategy: str = Field(
|
|
130
|
+
default="balanced",
|
|
131
|
+
description="Query optimization strategy: cost, latency, balanced",
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def get_graph_config() -> KnowledgeGraphConfig:
|
|
136
|
+
"""Get knowledge graph configuration singleton"""
|
|
137
|
+
return KnowledgeGraphConfig()
|
aiecs/config/registry.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
AI_SERVICE_REGISTRY = {}
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def register_ai_service(mode: str, service: str):
|
|
5
|
+
"""
|
|
6
|
+
Decorator for registering a class to the service center, so it can be found and called by (mode, service).
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
def decorator(cls):
|
|
10
|
+
AI_SERVICE_REGISTRY[(mode, service)] = cls
|
|
11
|
+
return cls
|
|
12
|
+
|
|
13
|
+
return decorator
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def get_ai_service(mode: str, service: str):
|
|
17
|
+
"""
|
|
18
|
+
Find registered service class based on mode and service name.
|
|
19
|
+
"""
|
|
20
|
+
key = (mode, service)
|
|
21
|
+
if key not in AI_SERVICE_REGISTRY:
|
|
22
|
+
raise ValueError(f"No registered service for mode '{mode}', service '{service}'")
|
|
23
|
+
return AI_SERVICE_REGISTRY[key]
|