aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiecs might be problematic. Click here for more details.
- aiecs/__init__.py +13 -16
- aiecs/__main__.py +7 -7
- aiecs/aiecs_client.py +269 -75
- aiecs/application/executors/operation_executor.py +79 -54
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
- aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
- aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
- aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +98 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
- aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
- aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
- aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
- aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
- aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
- aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
- aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
- aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
- aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
- aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
- aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
- aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
- aiecs/application/knowledge_graph/search/reranker.py +293 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +471 -0
- aiecs/config/__init__.py +20 -5
- aiecs/config/config.py +762 -31
- aiecs/config/graph_config.py +131 -0
- aiecs/config/tool_config.py +399 -0
- aiecs/core/__init__.py +29 -13
- aiecs/core/interface/__init__.py +2 -2
- aiecs/core/interface/execution_interface.py +22 -22
- aiecs/core/interface/storage_interface.py +37 -88
- aiecs/core/registry/__init__.py +31 -0
- aiecs/core/registry/service_registry.py +92 -0
- aiecs/domain/__init__.py +270 -1
- aiecs/domain/agent/__init__.py +191 -0
- aiecs/domain/agent/base_agent.py +3870 -0
- aiecs/domain/agent/exceptions.py +99 -0
- aiecs/domain/agent/graph_aware_mixin.py +569 -0
- aiecs/domain/agent/hybrid_agent.py +1435 -0
- aiecs/domain/agent/integration/__init__.py +29 -0
- aiecs/domain/agent/integration/context_compressor.py +216 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
- aiecs/domain/agent/integration/protocols.py +281 -0
- aiecs/domain/agent/integration/retry_policy.py +218 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
- aiecs/domain/agent/lifecycle.py +291 -0
- aiecs/domain/agent/llm_agent.py +692 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +1124 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +163 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
- aiecs/domain/agent/models.py +884 -0
- aiecs/domain/agent/observability.py +479 -0
- aiecs/domain/agent/persistence.py +449 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +159 -0
- aiecs/domain/agent/prompts/formatters.py +187 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +253 -0
- aiecs/domain/agent/tool_agent.py +444 -0
- aiecs/domain/agent/tools/__init__.py +15 -0
- aiecs/domain/agent/tools/schema_generator.py +364 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +469 -0
- aiecs/domain/community/analytics.py +432 -0
- aiecs/domain/community/collaborative_workflow.py +648 -0
- aiecs/domain/community/communication_hub.py +634 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +796 -0
- aiecs/domain/community/community_manager.py +803 -0
- aiecs/domain/community/decision_engine.py +849 -0
- aiecs/domain/community/exceptions.py +231 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +234 -0
- aiecs/domain/community/resource_manager.py +461 -0
- aiecs/domain/community/shared_context_manager.py +589 -0
- aiecs/domain/context/__init__.py +40 -10
- aiecs/domain/context/context_engine.py +1910 -0
- aiecs/domain/context/conversation_models.py +87 -53
- aiecs/domain/context/graph_memory.py +582 -0
- aiecs/domain/execution/model.py +12 -4
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +148 -0
- aiecs/domain/knowledge_graph/models/evidence.py +178 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
- aiecs/domain/knowledge_graph/models/path.py +171 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
- aiecs/domain/knowledge_graph/models/query.py +261 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
- aiecs/domain/knowledge_graph/models/relation.py +202 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
- aiecs/domain/task/dsl_processor.py +172 -56
- aiecs/domain/task/model.py +20 -8
- aiecs/domain/task/task_context.py +27 -24
- aiecs/infrastructure/__init__.py +0 -2
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +837 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
- aiecs/infrastructure/graph_storage/cache.py +424 -0
- aiecs/infrastructure/graph_storage/distributed.py +223 -0
- aiecs/infrastructure/graph_storage/error_handling.py +380 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
- aiecs/infrastructure/graph_storage/metrics.py +344 -0
- aiecs/infrastructure/graph_storage/migration.py +400 -0
- aiecs/infrastructure/graph_storage/pagination.py +483 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
- aiecs/infrastructure/graph_storage/postgres.py +1563 -0
- aiecs/infrastructure/graph_storage/property_storage.py +353 -0
- aiecs/infrastructure/graph_storage/protocols.py +76 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
- aiecs/infrastructure/graph_storage/streaming.py +487 -0
- aiecs/infrastructure/graph_storage/tenant.py +412 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
- aiecs/infrastructure/messaging/websocket_manager.py +51 -35
- aiecs/infrastructure/monitoring/__init__.py +22 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
- aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
- aiecs/infrastructure/monitoring/structured_logger.py +3 -7
- aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
- aiecs/infrastructure/persistence/__init__.py +14 -1
- aiecs/infrastructure/persistence/context_engine_client.py +184 -0
- aiecs/infrastructure/persistence/database_manager.py +67 -43
- aiecs/infrastructure/persistence/file_storage.py +180 -103
- aiecs/infrastructure/persistence/redis_client.py +74 -21
- aiecs/llm/__init__.py +73 -25
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
- aiecs/llm/client_factory.py +224 -36
- aiecs/llm/client_resolver.py +155 -0
- aiecs/llm/clients/__init__.py +38 -0
- aiecs/llm/clients/base_client.py +324 -0
- aiecs/llm/clients/google_function_calling_mixin.py +457 -0
- aiecs/llm/clients/googleai_client.py +241 -0
- aiecs/llm/clients/openai_client.py +158 -0
- aiecs/llm/clients/openai_compatible_mixin.py +367 -0
- aiecs/llm/clients/vertex_client.py +897 -0
- aiecs/llm/clients/xai_client.py +201 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +272 -0
- aiecs/llm/config/config_validator.py +206 -0
- aiecs/llm/config/model_config.py +143 -0
- aiecs/llm/protocols.py +149 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +89 -0
- aiecs/main.py +140 -121
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/module_checker.py +499 -0
- aiecs/scripts/aid/version_manager.py +235 -0
- aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
- aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
- aiecs/scripts/dependance_check/__init__.py +15 -0
- aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
- aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
- aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
- aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
- aiecs/scripts/knowledge_graph/__init__.py +3 -0
- aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
- aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
- aiecs/scripts/tools_develop/README.md +671 -0
- aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
- aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
- aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
- aiecs/scripts/tools_develop/schema_coverage.py +511 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
- aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
- aiecs/scripts/tools_develop/verify_tools.py +352 -0
- aiecs/tasks/__init__.py +0 -1
- aiecs/tasks/worker.py +115 -47
- aiecs/tools/__init__.py +194 -72
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +330 -0
- aiecs/tools/apisource/providers/__init__.py +112 -0
- aiecs/tools/apisource/providers/base.py +671 -0
- aiecs/tools/apisource/providers/census.py +397 -0
- aiecs/tools/apisource/providers/fred.py +535 -0
- aiecs/tools/apisource/providers/newsapi.py +409 -0
- aiecs/tools/apisource/providers/worldbank.py +352 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +363 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
- aiecs/tools/apisource/tool.py +832 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +334 -0
- aiecs/tools/base_tool.py +415 -21
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +607 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
- aiecs/tools/docs/content_insertion_tool.py +1320 -0
- aiecs/tools/docs/document_creator_tool.py +1323 -0
- aiecs/tools/docs/document_layout_tool.py +1160 -0
- aiecs/tools/docs/document_parser_tool.py +1011 -0
- aiecs/tools/docs/document_writer_tool.py +1829 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
- aiecs/tools/langchain_adapter.py +300 -138
- aiecs/tools/schema_generator.py +455 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +581 -0
- aiecs/tools/search_tool/cache.py +264 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +224 -0
- aiecs/tools/search_tool/core.py +778 -0
- aiecs/tools/search_tool/deduplicator.py +119 -0
- aiecs/tools/search_tool/error_handler.py +242 -0
- aiecs/tools/search_tool/metrics.py +343 -0
- aiecs/tools/search_tool/rate_limiter.py +172 -0
- aiecs/tools/search_tool/schemas.py +275 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
- aiecs/tools/statistics/data_loader_tool.py +555 -0
- aiecs/tools/statistics/data_profiler_tool.py +638 -0
- aiecs/tools/statistics/data_transformer_tool.py +580 -0
- aiecs/tools/statistics/data_visualizer_tool.py +498 -0
- aiecs/tools/statistics/model_trainer_tool.py +507 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
- aiecs/tools/task_tools/__init__.py +49 -36
- aiecs/tools/task_tools/chart_tool.py +200 -184
- aiecs/tools/task_tools/classfire_tool.py +268 -267
- aiecs/tools/task_tools/image_tool.py +175 -131
- aiecs/tools/task_tools/office_tool.py +226 -146
- aiecs/tools/task_tools/pandas_tool.py +477 -121
- aiecs/tools/task_tools/report_tool.py +390 -142
- aiecs/tools/task_tools/research_tool.py +149 -79
- aiecs/tools/task_tools/scraper_tool.py +339 -145
- aiecs/tools/task_tools/stats_tool.py +448 -209
- aiecs/tools/temp_file_manager.py +26 -24
- aiecs/tools/tool_executor/__init__.py +18 -16
- aiecs/tools/tool_executor/tool_executor.py +364 -52
- aiecs/utils/LLM_output_structor.py +74 -48
- aiecs/utils/__init__.py +14 -3
- aiecs/utils/base_callback.py +0 -3
- aiecs/utils/cache_provider.py +696 -0
- aiecs/utils/execution_utils.py +50 -31
- aiecs/utils/prompt_loader.py +1 -0
- aiecs/utils/token_usage_repository.py +37 -11
- aiecs/ws/socket_server.py +14 -4
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
- aiecs-1.7.6.dist-info/RECORD +337 -0
- aiecs-1.7.6.dist-info/entry_points.txt +13 -0
- aiecs/config/registry.py +0 -19
- aiecs/domain/context/content_engine.py +0 -982
- aiecs/llm/base_client.py +0 -99
- aiecs/llm/openai_client.py +0 -125
- aiecs/llm/vertex_client.py +0 -186
- aiecs/llm/xai_client.py +0 -184
- aiecs/scripts/dependency_checker.py +0 -857
- aiecs/scripts/quick_dependency_check.py +0 -269
- aiecs/tools/task_tools/search_api.py +0 -7
- aiecs-1.0.1.dist-info/RECORD +0 -90
- aiecs-1.0.1.dist-info/entry_points.txt +0 -7
- /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
- /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
- /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
- /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Optimized Property Storage for Knowledge Graph Entities
|
|
3
|
+
|
|
4
|
+
Provides optimized storage for entities with large property sets (200+ properties):
|
|
5
|
+
- Sparse property storage for optional properties
|
|
6
|
+
- Property compression for large property sets
|
|
7
|
+
- Property indexing for frequently queried properties
|
|
8
|
+
|
|
9
|
+
This module helps reduce memory footprint and improve query performance
|
|
10
|
+
when dealing with entities that have many properties.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import zlib
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from typing import Any, Dict, List, Optional, Set
|
|
17
|
+
from collections import defaultdict
|
|
18
|
+
import logging
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
# Threshold for property compression (number of properties)
|
|
24
|
+
COMPRESSION_THRESHOLD = 200
|
|
25
|
+
|
|
26
|
+
# Threshold for property count to consider a set as large
|
|
27
|
+
LARGE_PROPERTY_SET_THRESHOLD = 50
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class PropertyStorageConfig:
|
|
32
|
+
"""Configuration for property storage optimization"""
|
|
33
|
+
|
|
34
|
+
# Enable sparse storage (only store non-null values)
|
|
35
|
+
enable_sparse_storage: bool = True
|
|
36
|
+
|
|
37
|
+
# Enable compression for large property sets
|
|
38
|
+
enable_compression: bool = True
|
|
39
|
+
|
|
40
|
+
# Minimum properties before compression is applied
|
|
41
|
+
compression_threshold: int = COMPRESSION_THRESHOLD
|
|
42
|
+
|
|
43
|
+
# Compression level (1-9, higher = better compression but slower)
|
|
44
|
+
compression_level: int = 6
|
|
45
|
+
|
|
46
|
+
# Properties to always index for fast lookup
|
|
47
|
+
indexed_properties: Set[str] = field(default_factory=set)
|
|
48
|
+
|
|
49
|
+
# Track query frequency for auto-indexing
|
|
50
|
+
auto_index_threshold: int = 100 # Queries before auto-indexing
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass
|
|
54
|
+
class CompressedProperties:
|
|
55
|
+
"""Represents compressed property storage"""
|
|
56
|
+
|
|
57
|
+
# Compressed property data
|
|
58
|
+
data: bytes
|
|
59
|
+
|
|
60
|
+
# Number of properties
|
|
61
|
+
property_count: int
|
|
62
|
+
|
|
63
|
+
# Original size in bytes
|
|
64
|
+
original_size: int
|
|
65
|
+
|
|
66
|
+
# Compressed size in bytes
|
|
67
|
+
compressed_size: int
|
|
68
|
+
|
|
69
|
+
@property
|
|
70
|
+
def compression_ratio(self) -> float:
|
|
71
|
+
"""Get compression ratio (0-1, lower is better)"""
|
|
72
|
+
if self.original_size == 0:
|
|
73
|
+
return 1.0
|
|
74
|
+
return self.compressed_size / self.original_size
|
|
75
|
+
|
|
76
|
+
def decompress(self) -> Dict[str, Any]:
|
|
77
|
+
"""Decompress and return properties dict"""
|
|
78
|
+
decompressed = zlib.decompress(self.data)
|
|
79
|
+
return json.loads(decompressed.decode('utf-8'))
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class PropertyIndex:
|
|
83
|
+
"""
|
|
84
|
+
Index for fast property-based lookups
|
|
85
|
+
|
|
86
|
+
Maintains reverse index from property values to entity IDs.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
def __init__(self):
|
|
90
|
+
# property_name -> value -> set of entity_ids
|
|
91
|
+
self._index: Dict[str, Dict[Any, Set[str]]] = defaultdict(lambda: defaultdict(set))
|
|
92
|
+
|
|
93
|
+
# Track indexed properties
|
|
94
|
+
self._indexed_properties: Set[str] = set()
|
|
95
|
+
|
|
96
|
+
# Query frequency tracking for auto-indexing
|
|
97
|
+
self._query_counts: Dict[str, int] = defaultdict(int)
|
|
98
|
+
|
|
99
|
+
def add_to_index(self, entity_id: str, property_name: str, value: Any) -> None:
|
|
100
|
+
"""Add a property value to the index"""
|
|
101
|
+
if property_name not in self._indexed_properties:
|
|
102
|
+
return
|
|
103
|
+
|
|
104
|
+
# Convert value to hashable type if needed
|
|
105
|
+
hashable_value = self._make_hashable(value)
|
|
106
|
+
if hashable_value is not None:
|
|
107
|
+
self._index[property_name][hashable_value].add(entity_id)
|
|
108
|
+
|
|
109
|
+
def remove_from_index(self, entity_id: str, property_name: str, value: Any) -> None:
|
|
110
|
+
"""Remove a property value from the index"""
|
|
111
|
+
if property_name not in self._indexed_properties:
|
|
112
|
+
return
|
|
113
|
+
|
|
114
|
+
hashable_value = self._make_hashable(value)
|
|
115
|
+
if hashable_value is not None and hashable_value in self._index[property_name]:
|
|
116
|
+
self._index[property_name][hashable_value].discard(entity_id)
|
|
117
|
+
|
|
118
|
+
def lookup(self, property_name: str, value: Any) -> Set[str]:
|
|
119
|
+
"""Look up entity IDs by property value"""
|
|
120
|
+
# Track query frequency
|
|
121
|
+
self._query_counts[property_name] += 1
|
|
122
|
+
|
|
123
|
+
if property_name not in self._indexed_properties:
|
|
124
|
+
return set()
|
|
125
|
+
|
|
126
|
+
hashable_value = self._make_hashable(value)
|
|
127
|
+
if hashable_value is None:
|
|
128
|
+
return set()
|
|
129
|
+
|
|
130
|
+
return self._index[property_name].get(hashable_value, set()).copy()
|
|
131
|
+
|
|
132
|
+
def add_indexed_property(self, property_name: str) -> None:
|
|
133
|
+
"""Mark a property as indexed"""
|
|
134
|
+
self._indexed_properties.add(property_name)
|
|
135
|
+
|
|
136
|
+
def remove_indexed_property(self, property_name: str) -> None:
|
|
137
|
+
"""Remove a property from indexing"""
|
|
138
|
+
self._indexed_properties.discard(property_name)
|
|
139
|
+
if property_name in self._index:
|
|
140
|
+
del self._index[property_name]
|
|
141
|
+
|
|
142
|
+
def get_query_counts(self) -> Dict[str, int]:
|
|
143
|
+
"""Get query counts for all properties"""
|
|
144
|
+
return dict(self._query_counts)
|
|
145
|
+
|
|
146
|
+
def _make_hashable(self, value: Any) -> Optional[Any]:
|
|
147
|
+
"""Convert value to hashable type, or None if not possible"""
|
|
148
|
+
if value is None:
|
|
149
|
+
return None
|
|
150
|
+
if isinstance(value, (str, int, float, bool)):
|
|
151
|
+
return value
|
|
152
|
+
if isinstance(value, (list, tuple)):
|
|
153
|
+
return tuple(value) if all(isinstance(v, (str, int, float, bool, type(None))) for v in value) else None
|
|
154
|
+
return None
|
|
155
|
+
|
|
156
|
+
def clear(self) -> None:
|
|
157
|
+
"""Clear all indexes"""
|
|
158
|
+
self._index.clear()
|
|
159
|
+
self._query_counts.clear()
|
|
160
|
+
# Keep indexed properties
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
class PropertyOptimizer:
|
|
164
|
+
"""
|
|
165
|
+
Optimizes property storage for entities
|
|
166
|
+
|
|
167
|
+
Provides:
|
|
168
|
+
- Sparse storage (only non-null values)
|
|
169
|
+
- Compression for large property sets
|
|
170
|
+
- Indexing for fast property lookups
|
|
171
|
+
|
|
172
|
+
Example:
|
|
173
|
+
```python
|
|
174
|
+
optimizer = PropertyOptimizer()
|
|
175
|
+
|
|
176
|
+
# Compress large property set
|
|
177
|
+
properties = {"col1": 1, "col2": 2, ..., "col250": 250}
|
|
178
|
+
compressed = optimizer.compress_properties(properties)
|
|
179
|
+
|
|
180
|
+
# Decompress when needed
|
|
181
|
+
original = optimizer.decompress_properties(compressed)
|
|
182
|
+
```
|
|
183
|
+
"""
|
|
184
|
+
|
|
185
|
+
def __init__(self, config: Optional[PropertyStorageConfig] = None):
|
|
186
|
+
"""
|
|
187
|
+
Initialize property optimizer
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
config: Configuration for optimization behavior
|
|
191
|
+
"""
|
|
192
|
+
self.config = config or PropertyStorageConfig()
|
|
193
|
+
self._property_index = PropertyIndex()
|
|
194
|
+
|
|
195
|
+
# Add configured indexed properties
|
|
196
|
+
for prop in self.config.indexed_properties:
|
|
197
|
+
self._property_index.add_indexed_property(prop)
|
|
198
|
+
|
|
199
|
+
def compress_properties(self, properties: Dict[str, Any]) -> CompressedProperties:
|
|
200
|
+
"""
|
|
201
|
+
Compress a property dictionary
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
properties: Property dictionary to compress
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
CompressedProperties object
|
|
208
|
+
"""
|
|
209
|
+
# Apply sparse storage first (filter out None values)
|
|
210
|
+
if self.config.enable_sparse_storage:
|
|
211
|
+
properties = {k: v for k, v in properties.items() if v is not None}
|
|
212
|
+
|
|
213
|
+
# Serialize to JSON
|
|
214
|
+
json_str = json.dumps(properties, separators=(',', ':'))
|
|
215
|
+
json_bytes = json_str.encode('utf-8')
|
|
216
|
+
original_size = len(json_bytes)
|
|
217
|
+
|
|
218
|
+
# Compress
|
|
219
|
+
compressed = zlib.compress(json_bytes, level=self.config.compression_level)
|
|
220
|
+
|
|
221
|
+
return CompressedProperties(
|
|
222
|
+
data=compressed,
|
|
223
|
+
property_count=len(properties),
|
|
224
|
+
original_size=original_size,
|
|
225
|
+
compressed_size=len(compressed)
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
def decompress_properties(self, compressed: CompressedProperties) -> Dict[str, Any]:
|
|
229
|
+
"""
|
|
230
|
+
Decompress a CompressedProperties object
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
compressed: CompressedProperties to decompress
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
Original property dictionary
|
|
237
|
+
"""
|
|
238
|
+
return compressed.decompress()
|
|
239
|
+
|
|
240
|
+
def should_compress(self, properties: Dict[str, Any]) -> bool:
|
|
241
|
+
"""
|
|
242
|
+
Determine if properties should be compressed
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
properties: Property dictionary
|
|
246
|
+
|
|
247
|
+
Returns:
|
|
248
|
+
True if compression would be beneficial
|
|
249
|
+
"""
|
|
250
|
+
if not self.config.enable_compression:
|
|
251
|
+
return False
|
|
252
|
+
|
|
253
|
+
return len(properties) >= self.config.compression_threshold
|
|
254
|
+
|
|
255
|
+
def optimize_properties(self, properties: Dict[str, Any]) -> Dict[str, Any]:
|
|
256
|
+
"""
|
|
257
|
+
Apply sparse storage optimization
|
|
258
|
+
|
|
259
|
+
Removes None values from properties dict.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
properties: Property dictionary
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
Optimized property dictionary
|
|
266
|
+
"""
|
|
267
|
+
if not self.config.enable_sparse_storage:
|
|
268
|
+
return properties
|
|
269
|
+
|
|
270
|
+
return {k: v for k, v in properties.items() if v is not None}
|
|
271
|
+
|
|
272
|
+
def estimate_memory_savings(self, properties: Dict[str, Any]) -> Dict[str, Any]:
|
|
273
|
+
"""
|
|
274
|
+
Estimate memory savings from optimization
|
|
275
|
+
|
|
276
|
+
Args:
|
|
277
|
+
properties: Property dictionary
|
|
278
|
+
|
|
279
|
+
Returns:
|
|
280
|
+
Dictionary with memory statistics
|
|
281
|
+
"""
|
|
282
|
+
import sys
|
|
283
|
+
|
|
284
|
+
# Original size
|
|
285
|
+
original_json = json.dumps(properties)
|
|
286
|
+
original_size = sys.getsizeof(properties) + sys.getsizeof(original_json)
|
|
287
|
+
|
|
288
|
+
# Sparse storage
|
|
289
|
+
sparse_props = self.optimize_properties(properties)
|
|
290
|
+
sparse_json = json.dumps(sparse_props)
|
|
291
|
+
sparse_size = sys.getsizeof(sparse_props) + sys.getsizeof(sparse_json)
|
|
292
|
+
|
|
293
|
+
# Compressed
|
|
294
|
+
compressed = self.compress_properties(properties)
|
|
295
|
+
compressed_size = sys.getsizeof(compressed.data)
|
|
296
|
+
|
|
297
|
+
return {
|
|
298
|
+
"original_size_bytes": original_size,
|
|
299
|
+
"sparse_size_bytes": sparse_size,
|
|
300
|
+
"compressed_size_bytes": compressed_size,
|
|
301
|
+
"property_count": len(properties),
|
|
302
|
+
"non_null_count": len(sparse_props),
|
|
303
|
+
"null_count": len(properties) - len(sparse_props),
|
|
304
|
+
"sparse_reduction_pct": (1 - sparse_size / original_size) * 100 if original_size > 0 else 0,
|
|
305
|
+
"compression_ratio": compressed.compression_ratio,
|
|
306
|
+
"compression_reduction_pct": (1 - compressed.compression_ratio) * 100
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
# Index operations
|
|
310
|
+
@property
|
|
311
|
+
def property_index(self) -> PropertyIndex:
|
|
312
|
+
"""Get the property index"""
|
|
313
|
+
return self._property_index
|
|
314
|
+
|
|
315
|
+
def index_entity(self, entity_id: str, properties: Dict[str, Any]) -> None:
|
|
316
|
+
"""
|
|
317
|
+
Index all properties for an entity
|
|
318
|
+
|
|
319
|
+
Args:
|
|
320
|
+
entity_id: Entity ID
|
|
321
|
+
properties: Entity properties
|
|
322
|
+
"""
|
|
323
|
+
for prop_name, value in properties.items():
|
|
324
|
+
self._property_index.add_to_index(entity_id, prop_name, value)
|
|
325
|
+
|
|
326
|
+
def unindex_entity(self, entity_id: str, properties: Dict[str, Any]) -> None:
|
|
327
|
+
"""
|
|
328
|
+
Remove entity from all property indexes
|
|
329
|
+
|
|
330
|
+
Args:
|
|
331
|
+
entity_id: Entity ID
|
|
332
|
+
properties: Entity properties
|
|
333
|
+
"""
|
|
334
|
+
for prop_name, value in properties.items():
|
|
335
|
+
self._property_index.remove_from_index(entity_id, prop_name, value)
|
|
336
|
+
|
|
337
|
+
def lookup_by_property(self, property_name: str, value: Any) -> Set[str]:
|
|
338
|
+
"""
|
|
339
|
+
Look up entity IDs by property value
|
|
340
|
+
|
|
341
|
+
Args:
|
|
342
|
+
property_name: Property name to search
|
|
343
|
+
value: Property value to match
|
|
344
|
+
|
|
345
|
+
Returns:
|
|
346
|
+
Set of matching entity IDs
|
|
347
|
+
"""
|
|
348
|
+
return self._property_index.lookup(property_name, value)
|
|
349
|
+
|
|
350
|
+
def add_indexed_property(self, property_name: str) -> None:
|
|
351
|
+
"""Add a property to the index"""
|
|
352
|
+
self._property_index.add_indexed_property(property_name)
|
|
353
|
+
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Protocol definitions for graph storage mixins
|
|
3
|
+
|
|
4
|
+
These protocols define the interfaces that mixin classes expect from
|
|
5
|
+
the classes they're mixed into, allowing proper type checking.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Protocol, Optional, List, TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from aiecs.domain.knowledge_graph.models.entity import Entity
|
|
12
|
+
from aiecs.domain.knowledge_graph.models.relation import Relation
|
|
13
|
+
import asyncpg # type: ignore[import-untyped]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class PaginationMixinProtocol(Protocol):
|
|
17
|
+
"""Protocol for classes that PaginationMixin expects"""
|
|
18
|
+
|
|
19
|
+
async def get_all_entities(
|
|
20
|
+
self, entity_type: Optional[str] = None, limit: Optional[int] = None
|
|
21
|
+
) -> List["Entity"]:
|
|
22
|
+
"""Get all entities, optionally filtered by type and limited"""
|
|
23
|
+
...
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class LazyLoadingMixinProtocol(Protocol):
|
|
27
|
+
"""Protocol for classes that LazyLoadingMixin expects"""
|
|
28
|
+
|
|
29
|
+
async def get_entity(self, entity_id: str) -> Optional["Entity"]:
|
|
30
|
+
"""Get entity by ID"""
|
|
31
|
+
...
|
|
32
|
+
|
|
33
|
+
async def get_all_entities(
|
|
34
|
+
self, entity_type: Optional[str] = None, limit: Optional[int] = None
|
|
35
|
+
) -> List["Entity"]:
|
|
36
|
+
"""Get all entities, optionally filtered by type and limited"""
|
|
37
|
+
...
|
|
38
|
+
|
|
39
|
+
async def get_neighbors(
|
|
40
|
+
self,
|
|
41
|
+
entity_id: str,
|
|
42
|
+
relation_type: Optional[str] = None,
|
|
43
|
+
direction: str = "outgoing",
|
|
44
|
+
) -> List["Entity"]:
|
|
45
|
+
"""Get neighboring entities"""
|
|
46
|
+
...
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class BatchOperationsMixinProtocol(Protocol):
|
|
50
|
+
"""Protocol for classes that BatchOperationsMixin expects"""
|
|
51
|
+
|
|
52
|
+
if TYPE_CHECKING:
|
|
53
|
+
pool: Optional["asyncpg.Pool"]
|
|
54
|
+
|
|
55
|
+
def _serialize_embedding(self, embedding: List[float]) -> Optional[bytes]:
|
|
56
|
+
"""Serialize embedding to bytes for storage"""
|
|
57
|
+
...
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class GraphMemoryMixinProtocol(Protocol):
|
|
61
|
+
"""Protocol for classes that GraphMemoryMixin expects"""
|
|
62
|
+
|
|
63
|
+
if TYPE_CHECKING:
|
|
64
|
+
from aiecs.infrastructure.graph_storage.base import GraphStore
|
|
65
|
+
|
|
66
|
+
graph_store: Optional["GraphStore"]
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class GraphAwareAgentMixinProtocol(Protocol):
|
|
70
|
+
"""Protocol for classes that GraphAwareAgentMixin expects"""
|
|
71
|
+
|
|
72
|
+
if TYPE_CHECKING:
|
|
73
|
+
from aiecs.infrastructure.graph_storage.base import GraphStore
|
|
74
|
+
|
|
75
|
+
graph_store: Optional["GraphStore"]
|
|
76
|
+
|