aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of aiecs might be problematic. Click here for more details.
- aiecs/__init__.py +13 -16
- aiecs/__main__.py +7 -7
- aiecs/aiecs_client.py +269 -75
- aiecs/application/executors/operation_executor.py +79 -54
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
- aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
- aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
- aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +98 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
- aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
- aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
- aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
- aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
- aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
- aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
- aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
- aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
- aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
- aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
- aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
- aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
- aiecs/application/knowledge_graph/search/reranker.py +293 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +471 -0
- aiecs/config/__init__.py +20 -5
- aiecs/config/config.py +762 -31
- aiecs/config/graph_config.py +131 -0
- aiecs/config/tool_config.py +399 -0
- aiecs/core/__init__.py +29 -13
- aiecs/core/interface/__init__.py +2 -2
- aiecs/core/interface/execution_interface.py +22 -22
- aiecs/core/interface/storage_interface.py +37 -88
- aiecs/core/registry/__init__.py +31 -0
- aiecs/core/registry/service_registry.py +92 -0
- aiecs/domain/__init__.py +270 -1
- aiecs/domain/agent/__init__.py +191 -0
- aiecs/domain/agent/base_agent.py +3870 -0
- aiecs/domain/agent/exceptions.py +99 -0
- aiecs/domain/agent/graph_aware_mixin.py +569 -0
- aiecs/domain/agent/hybrid_agent.py +1435 -0
- aiecs/domain/agent/integration/__init__.py +29 -0
- aiecs/domain/agent/integration/context_compressor.py +216 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
- aiecs/domain/agent/integration/protocols.py +281 -0
- aiecs/domain/agent/integration/retry_policy.py +218 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
- aiecs/domain/agent/lifecycle.py +291 -0
- aiecs/domain/agent/llm_agent.py +692 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +1124 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +163 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
- aiecs/domain/agent/models.py +884 -0
- aiecs/domain/agent/observability.py +479 -0
- aiecs/domain/agent/persistence.py +449 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +159 -0
- aiecs/domain/agent/prompts/formatters.py +187 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +253 -0
- aiecs/domain/agent/tool_agent.py +444 -0
- aiecs/domain/agent/tools/__init__.py +15 -0
- aiecs/domain/agent/tools/schema_generator.py +364 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +469 -0
- aiecs/domain/community/analytics.py +432 -0
- aiecs/domain/community/collaborative_workflow.py +648 -0
- aiecs/domain/community/communication_hub.py +634 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +796 -0
- aiecs/domain/community/community_manager.py +803 -0
- aiecs/domain/community/decision_engine.py +849 -0
- aiecs/domain/community/exceptions.py +231 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +234 -0
- aiecs/domain/community/resource_manager.py +461 -0
- aiecs/domain/community/shared_context_manager.py +589 -0
- aiecs/domain/context/__init__.py +40 -10
- aiecs/domain/context/context_engine.py +1910 -0
- aiecs/domain/context/conversation_models.py +87 -53
- aiecs/domain/context/graph_memory.py +582 -0
- aiecs/domain/execution/model.py +12 -4
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +148 -0
- aiecs/domain/knowledge_graph/models/evidence.py +178 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
- aiecs/domain/knowledge_graph/models/path.py +171 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
- aiecs/domain/knowledge_graph/models/query.py +261 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
- aiecs/domain/knowledge_graph/models/relation.py +202 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
- aiecs/domain/task/dsl_processor.py +172 -56
- aiecs/domain/task/model.py +20 -8
- aiecs/domain/task/task_context.py +27 -24
- aiecs/infrastructure/__init__.py +0 -2
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +837 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
- aiecs/infrastructure/graph_storage/cache.py +424 -0
- aiecs/infrastructure/graph_storage/distributed.py +223 -0
- aiecs/infrastructure/graph_storage/error_handling.py +380 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
- aiecs/infrastructure/graph_storage/metrics.py +344 -0
- aiecs/infrastructure/graph_storage/migration.py +400 -0
- aiecs/infrastructure/graph_storage/pagination.py +483 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
- aiecs/infrastructure/graph_storage/postgres.py +1563 -0
- aiecs/infrastructure/graph_storage/property_storage.py +353 -0
- aiecs/infrastructure/graph_storage/protocols.py +76 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
- aiecs/infrastructure/graph_storage/streaming.py +487 -0
- aiecs/infrastructure/graph_storage/tenant.py +412 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
- aiecs/infrastructure/messaging/websocket_manager.py +51 -35
- aiecs/infrastructure/monitoring/__init__.py +22 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
- aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
- aiecs/infrastructure/monitoring/structured_logger.py +3 -7
- aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
- aiecs/infrastructure/persistence/__init__.py +14 -1
- aiecs/infrastructure/persistence/context_engine_client.py +184 -0
- aiecs/infrastructure/persistence/database_manager.py +67 -43
- aiecs/infrastructure/persistence/file_storage.py +180 -103
- aiecs/infrastructure/persistence/redis_client.py +74 -21
- aiecs/llm/__init__.py +73 -25
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
- aiecs/llm/client_factory.py +224 -36
- aiecs/llm/client_resolver.py +155 -0
- aiecs/llm/clients/__init__.py +38 -0
- aiecs/llm/clients/base_client.py +324 -0
- aiecs/llm/clients/google_function_calling_mixin.py +457 -0
- aiecs/llm/clients/googleai_client.py +241 -0
- aiecs/llm/clients/openai_client.py +158 -0
- aiecs/llm/clients/openai_compatible_mixin.py +367 -0
- aiecs/llm/clients/vertex_client.py +897 -0
- aiecs/llm/clients/xai_client.py +201 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +272 -0
- aiecs/llm/config/config_validator.py +206 -0
- aiecs/llm/config/model_config.py +143 -0
- aiecs/llm/protocols.py +149 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +89 -0
- aiecs/main.py +140 -121
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/module_checker.py +499 -0
- aiecs/scripts/aid/version_manager.py +235 -0
- aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
- aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
- aiecs/scripts/dependance_check/__init__.py +15 -0
- aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
- aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
- aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
- aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
- aiecs/scripts/knowledge_graph/__init__.py +3 -0
- aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
- aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
- aiecs/scripts/tools_develop/README.md +671 -0
- aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
- aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
- aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
- aiecs/scripts/tools_develop/schema_coverage.py +511 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
- aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
- aiecs/scripts/tools_develop/verify_tools.py +352 -0
- aiecs/tasks/__init__.py +0 -1
- aiecs/tasks/worker.py +115 -47
- aiecs/tools/__init__.py +194 -72
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +330 -0
- aiecs/tools/apisource/providers/__init__.py +112 -0
- aiecs/tools/apisource/providers/base.py +671 -0
- aiecs/tools/apisource/providers/census.py +397 -0
- aiecs/tools/apisource/providers/fred.py +535 -0
- aiecs/tools/apisource/providers/newsapi.py +409 -0
- aiecs/tools/apisource/providers/worldbank.py +352 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +363 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
- aiecs/tools/apisource/tool.py +832 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +334 -0
- aiecs/tools/base_tool.py +415 -21
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +607 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
- aiecs/tools/docs/content_insertion_tool.py +1320 -0
- aiecs/tools/docs/document_creator_tool.py +1323 -0
- aiecs/tools/docs/document_layout_tool.py +1160 -0
- aiecs/tools/docs/document_parser_tool.py +1011 -0
- aiecs/tools/docs/document_writer_tool.py +1829 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
- aiecs/tools/langchain_adapter.py +300 -138
- aiecs/tools/schema_generator.py +455 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +581 -0
- aiecs/tools/search_tool/cache.py +264 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +224 -0
- aiecs/tools/search_tool/core.py +778 -0
- aiecs/tools/search_tool/deduplicator.py +119 -0
- aiecs/tools/search_tool/error_handler.py +242 -0
- aiecs/tools/search_tool/metrics.py +343 -0
- aiecs/tools/search_tool/rate_limiter.py +172 -0
- aiecs/tools/search_tool/schemas.py +275 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
- aiecs/tools/statistics/data_loader_tool.py +555 -0
- aiecs/tools/statistics/data_profiler_tool.py +638 -0
- aiecs/tools/statistics/data_transformer_tool.py +580 -0
- aiecs/tools/statistics/data_visualizer_tool.py +498 -0
- aiecs/tools/statistics/model_trainer_tool.py +507 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
- aiecs/tools/task_tools/__init__.py +49 -36
- aiecs/tools/task_tools/chart_tool.py +200 -184
- aiecs/tools/task_tools/classfire_tool.py +268 -267
- aiecs/tools/task_tools/image_tool.py +175 -131
- aiecs/tools/task_tools/office_tool.py +226 -146
- aiecs/tools/task_tools/pandas_tool.py +477 -121
- aiecs/tools/task_tools/report_tool.py +390 -142
- aiecs/tools/task_tools/research_tool.py +149 -79
- aiecs/tools/task_tools/scraper_tool.py +339 -145
- aiecs/tools/task_tools/stats_tool.py +448 -209
- aiecs/tools/temp_file_manager.py +26 -24
- aiecs/tools/tool_executor/__init__.py +18 -16
- aiecs/tools/tool_executor/tool_executor.py +364 -52
- aiecs/utils/LLM_output_structor.py +74 -48
- aiecs/utils/__init__.py +14 -3
- aiecs/utils/base_callback.py +0 -3
- aiecs/utils/cache_provider.py +696 -0
- aiecs/utils/execution_utils.py +50 -31
- aiecs/utils/prompt_loader.py +1 -0
- aiecs/utils/token_usage_repository.py +37 -11
- aiecs/ws/socket_server.py +14 -4
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
- aiecs-1.7.6.dist-info/RECORD +337 -0
- aiecs-1.7.6.dist-info/entry_points.txt +13 -0
- aiecs/config/registry.py +0 -19
- aiecs/domain/context/content_engine.py +0 -982
- aiecs/llm/base_client.py +0 -99
- aiecs/llm/openai_client.py +0 -125
- aiecs/llm/vertex_client.py +0 -186
- aiecs/llm/xai_client.py +0 -184
- aiecs/scripts/dependency_checker.py +0 -857
- aiecs/scripts/quick_dependency_check.py +0 -269
- aiecs/tools/task_tools/search_api.py +0 -7
- aiecs-1.0.1.dist-info/RECORD +0 -90
- aiecs-1.0.1.dist-info/entry_points.txt +0 -7
- /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
- /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
- /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
- /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
- {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,458 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Batch Operations for Graph Storage
|
|
3
|
+
|
|
4
|
+
Provides efficient batch operations for bulk inserts, updates, and deletes.
|
|
5
|
+
Uses PostgreSQL COPY and multi-row INSERT for optimal performance.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import asyncpg # type: ignore[import-untyped]
|
|
9
|
+
import logging
|
|
10
|
+
import io
|
|
11
|
+
from typing import List, TYPE_CHECKING, Optional
|
|
12
|
+
import json
|
|
13
|
+
|
|
14
|
+
from aiecs.domain.knowledge_graph.models.entity import Entity
|
|
15
|
+
from aiecs.domain.knowledge_graph.models.relation import Relation
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
from aiecs.infrastructure.graph_storage.protocols import BatchOperationsMixinProtocol
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class BatchOperationsMixin:
|
|
24
|
+
"""
|
|
25
|
+
Mixin providing batch operations for graph stores
|
|
26
|
+
|
|
27
|
+
This mixin adds efficient batch insert/update/delete methods
|
|
28
|
+
using PostgreSQL-specific optimizations like COPY and multi-row INSERT.
|
|
29
|
+
|
|
30
|
+
This mixin expects the class it's mixed into to implement `BatchOperationsMixinProtocol`,
|
|
31
|
+
specifically the `pool` attribute and `_serialize_embedding()` method.
|
|
32
|
+
|
|
33
|
+
Example:
|
|
34
|
+
```python
|
|
35
|
+
class MyGraphStore(GraphStore, BatchOperationsMixin):
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
store = MyGraphStore()
|
|
39
|
+
await store.batch_add_entities([entity1, entity2, ...], batch_size=1000)
|
|
40
|
+
```
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
if TYPE_CHECKING:
|
|
44
|
+
# Type hints for mypy: this mixin expects BatchOperationsMixinProtocol
|
|
45
|
+
pool: Optional[asyncpg.Pool]
|
|
46
|
+
|
|
47
|
+
def _serialize_embedding(self, embedding: List[float]) -> Optional[bytes]:
|
|
48
|
+
"""Expected method from BatchOperationsMixinProtocol"""
|
|
49
|
+
...
|
|
50
|
+
|
|
51
|
+
async def batch_add_entities(
|
|
52
|
+
self,
|
|
53
|
+
entities: List[Entity],
|
|
54
|
+
batch_size: int = 1000,
|
|
55
|
+
use_copy: bool = True,
|
|
56
|
+
) -> int:
|
|
57
|
+
"""
|
|
58
|
+
Add multiple entities efficiently
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
entities: List of entities to add
|
|
62
|
+
batch_size: Number of entities per batch
|
|
63
|
+
use_copy: Use PostgreSQL COPY for better performance
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
Number of entities added
|
|
67
|
+
|
|
68
|
+
Example:
|
|
69
|
+
```python
|
|
70
|
+
entities = [
|
|
71
|
+
Entity(id="e1", entity_type="Person", properties={"name": "Alice"}),
|
|
72
|
+
Entity(id="e2", entity_type="Person", properties={"name": "Bob"}),
|
|
73
|
+
# ... thousands more
|
|
74
|
+
]
|
|
75
|
+
count = await store.batch_add_entities(entities, batch_size=1000)
|
|
76
|
+
```
|
|
77
|
+
"""
|
|
78
|
+
if not entities:
|
|
79
|
+
return 0
|
|
80
|
+
|
|
81
|
+
if not hasattr(self, "pool") or not self.pool:
|
|
82
|
+
raise RuntimeError("GraphStore not initialized")
|
|
83
|
+
|
|
84
|
+
total_added = 0
|
|
85
|
+
|
|
86
|
+
if use_copy:
|
|
87
|
+
# Use COPY for maximum performance
|
|
88
|
+
total_added = await self._batch_add_entities_copy(entities)
|
|
89
|
+
else:
|
|
90
|
+
# Use multi-row INSERT
|
|
91
|
+
for i in range(0, len(entities), batch_size):
|
|
92
|
+
batch = entities[i : i + batch_size]
|
|
93
|
+
added = await self._batch_add_entities_insert(batch)
|
|
94
|
+
total_added += added
|
|
95
|
+
|
|
96
|
+
logger.info(f"Batch added {total_added} entities")
|
|
97
|
+
return total_added
|
|
98
|
+
|
|
99
|
+
async def _batch_add_entities_copy(self, entities: List[Entity]) -> int:
|
|
100
|
+
"""
|
|
101
|
+
Add entities using PostgreSQL COPY (fastest method)
|
|
102
|
+
|
|
103
|
+
Args:
|
|
104
|
+
entities: List of entities to add
|
|
105
|
+
|
|
106
|
+
Returns:
|
|
107
|
+
Number of entities added
|
|
108
|
+
"""
|
|
109
|
+
if not entities:
|
|
110
|
+
return 0
|
|
111
|
+
|
|
112
|
+
# Prepare data for COPY
|
|
113
|
+
copy_data = io.StringIO()
|
|
114
|
+
for entity in entities:
|
|
115
|
+
# Serialize data
|
|
116
|
+
properties_json = json.dumps(entity.properties)
|
|
117
|
+
embedding_bytes = self._serialize_embedding(entity.embedding) if hasattr(entity, "embedding") and entity.embedding else None
|
|
118
|
+
|
|
119
|
+
# Write tab-separated values
|
|
120
|
+
# Format: id \t entity_type \t properties \t embedding
|
|
121
|
+
embedding_hex = embedding_bytes.hex() if embedding_bytes else "\\N"
|
|
122
|
+
copy_data.write(f"{entity.id}\t{entity.entity_type}\t{properties_json}\t{embedding_hex}\n")
|
|
123
|
+
|
|
124
|
+
copy_data.seek(0)
|
|
125
|
+
|
|
126
|
+
# Execute COPY
|
|
127
|
+
if self.pool is None:
|
|
128
|
+
raise RuntimeError("Connection pool not initialized")
|
|
129
|
+
async with self.pool.acquire() as conn:
|
|
130
|
+
try:
|
|
131
|
+
result = await conn.copy_to_table(
|
|
132
|
+
"graph_entities",
|
|
133
|
+
source=copy_data,
|
|
134
|
+
columns=["id", "entity_type", "properties", "embedding"],
|
|
135
|
+
format="text",
|
|
136
|
+
)
|
|
137
|
+
# Parse result to get row count
|
|
138
|
+
# Result format: "COPY n" where n is number of rows
|
|
139
|
+
if result and result.startswith("COPY"):
|
|
140
|
+
return int(result.split()[1])
|
|
141
|
+
return len(entities)
|
|
142
|
+
except asyncpg.UniqueViolationError as e:
|
|
143
|
+
logger.warning(f"Duplicate entities in batch: {e}")
|
|
144
|
+
# Fall back to individual inserts with ON CONFLICT
|
|
145
|
+
return await self._batch_add_entities_insert(entities)
|
|
146
|
+
except Exception as e:
|
|
147
|
+
logger.error(f"COPY failed: {e}")
|
|
148
|
+
# Fall back to INSERT
|
|
149
|
+
return await self._batch_add_entities_insert(entities)
|
|
150
|
+
|
|
151
|
+
async def _batch_add_entities_insert(self, entities: List[Entity]) -> int:
|
|
152
|
+
"""
|
|
153
|
+
Add entities using multi-row INSERT with ON CONFLICT
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
entities: List of entities to add
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
Number of entities added/updated
|
|
160
|
+
"""
|
|
161
|
+
if not entities:
|
|
162
|
+
return 0
|
|
163
|
+
|
|
164
|
+
# Build multi-row INSERT
|
|
165
|
+
values_placeholders = []
|
|
166
|
+
values = []
|
|
167
|
+
|
|
168
|
+
for i, entity in enumerate(entities):
|
|
169
|
+
base_idx = i * 4
|
|
170
|
+
values_placeholders.append(f"(${base_idx+1}, ${base_idx+2}, ${base_idx+3}::jsonb, ${base_idx+4})")
|
|
171
|
+
|
|
172
|
+
properties_json = json.dumps(entity.properties)
|
|
173
|
+
embedding_blob = self._serialize_embedding(entity.embedding) if hasattr(entity, "embedding") and entity.embedding else None
|
|
174
|
+
|
|
175
|
+
values.extend(
|
|
176
|
+
[
|
|
177
|
+
entity.id,
|
|
178
|
+
entity.entity_type,
|
|
179
|
+
properties_json,
|
|
180
|
+
embedding_blob,
|
|
181
|
+
]
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
query = f"""
|
|
185
|
+
INSERT INTO graph_entities (id, entity_type, properties, embedding)
|
|
186
|
+
VALUES {', '.join(values_placeholders)}
|
|
187
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
188
|
+
entity_type = EXCLUDED.entity_type,
|
|
189
|
+
properties = EXCLUDED.properties,
|
|
190
|
+
embedding = EXCLUDED.embedding,
|
|
191
|
+
updated_at = CURRENT_TIMESTAMP
|
|
192
|
+
"""
|
|
193
|
+
|
|
194
|
+
if self.pool is None:
|
|
195
|
+
raise RuntimeError("Connection pool not initialized")
|
|
196
|
+
async with self.pool.acquire() as conn:
|
|
197
|
+
try:
|
|
198
|
+
await conn.execute(query, *values)
|
|
199
|
+
return len(entities)
|
|
200
|
+
except Exception as e:
|
|
201
|
+
logger.error(f"Batch insert failed: {e}")
|
|
202
|
+
raise
|
|
203
|
+
|
|
204
|
+
async def batch_add_relations(
|
|
205
|
+
self,
|
|
206
|
+
relations: List[Relation],
|
|
207
|
+
batch_size: int = 1000,
|
|
208
|
+
use_copy: bool = True,
|
|
209
|
+
) -> int:
|
|
210
|
+
"""
|
|
211
|
+
Add multiple relations efficiently
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
relations: List of relations to add
|
|
215
|
+
batch_size: Number of relations per batch
|
|
216
|
+
use_copy: Use PostgreSQL COPY for better performance
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
Number of relations added
|
|
220
|
+
|
|
221
|
+
Example:
|
|
222
|
+
```python
|
|
223
|
+
relations = [
|
|
224
|
+
Relation(id="r1", source_id="e1", target_id="e2", relation_type="KNOWS", properties={}),
|
|
225
|
+
Relation(id="r2", source_id="e2", target_id="e3", relation_type="KNOWS", properties={}),
|
|
226
|
+
# ... thousands more
|
|
227
|
+
]
|
|
228
|
+
count = await store.batch_add_relations(relations, batch_size=1000)
|
|
229
|
+
```
|
|
230
|
+
"""
|
|
231
|
+
if not relations:
|
|
232
|
+
return 0
|
|
233
|
+
|
|
234
|
+
if not hasattr(self, "pool") or not self.pool:
|
|
235
|
+
raise RuntimeError("GraphStore not initialized")
|
|
236
|
+
|
|
237
|
+
total_added = 0
|
|
238
|
+
|
|
239
|
+
if use_copy:
|
|
240
|
+
# Use COPY for maximum performance
|
|
241
|
+
total_added = await self._batch_add_relations_copy(relations)
|
|
242
|
+
else:
|
|
243
|
+
# Use multi-row INSERT
|
|
244
|
+
for i in range(0, len(relations), batch_size):
|
|
245
|
+
batch = relations[i : i + batch_size]
|
|
246
|
+
added = await self._batch_add_relations_insert(batch)
|
|
247
|
+
total_added += added
|
|
248
|
+
|
|
249
|
+
logger.info(f"Batch added {total_added} relations")
|
|
250
|
+
return total_added
|
|
251
|
+
|
|
252
|
+
async def _batch_add_relations_copy(self, relations: List[Relation]) -> int:
|
|
253
|
+
"""
|
|
254
|
+
Add relations using PostgreSQL COPY
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
relations: List of relations to add
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
Number of relations added
|
|
261
|
+
"""
|
|
262
|
+
if not relations:
|
|
263
|
+
return 0
|
|
264
|
+
|
|
265
|
+
# Prepare data for COPY
|
|
266
|
+
copy_data = io.StringIO()
|
|
267
|
+
for relation in relations:
|
|
268
|
+
properties_json = json.dumps(relation.properties)
|
|
269
|
+
|
|
270
|
+
# Write tab-separated values
|
|
271
|
+
# Format: id \t relation_type \t source_id \t target_id \t
|
|
272
|
+
# properties \t weight
|
|
273
|
+
copy_data.write(f"{relation.id}\t{relation.relation_type}\t{relation.source_id}\t" f"{relation.target_id}\t{properties_json}\t{relation.weight}\n")
|
|
274
|
+
|
|
275
|
+
copy_data.seek(0)
|
|
276
|
+
|
|
277
|
+
# Execute COPY
|
|
278
|
+
if self.pool is None:
|
|
279
|
+
raise RuntimeError("Connection pool not initialized")
|
|
280
|
+
async with self.pool.acquire() as conn:
|
|
281
|
+
try:
|
|
282
|
+
result = await conn.copy_to_table(
|
|
283
|
+
"graph_relations",
|
|
284
|
+
source=copy_data,
|
|
285
|
+
columns=[
|
|
286
|
+
"id",
|
|
287
|
+
"relation_type",
|
|
288
|
+
"source_id",
|
|
289
|
+
"target_id",
|
|
290
|
+
"properties",
|
|
291
|
+
"weight",
|
|
292
|
+
],
|
|
293
|
+
format="text",
|
|
294
|
+
)
|
|
295
|
+
if result and result.startswith("COPY"):
|
|
296
|
+
return int(result.split()[1])
|
|
297
|
+
return len(relations)
|
|
298
|
+
except asyncpg.UniqueViolationError as e:
|
|
299
|
+
logger.warning(f"Duplicate relations in batch: {e}")
|
|
300
|
+
return await self._batch_add_relations_insert(relations)
|
|
301
|
+
except asyncpg.ForeignKeyViolationError as e:
|
|
302
|
+
logger.error(f"Foreign key violation in batch: {e}")
|
|
303
|
+
# Some entities don't exist, fall back to individual inserts
|
|
304
|
+
return await self._batch_add_relations_insert(relations)
|
|
305
|
+
except Exception as e:
|
|
306
|
+
logger.error(f"COPY failed: {e}")
|
|
307
|
+
return await self._batch_add_relations_insert(relations)
|
|
308
|
+
|
|
309
|
+
async def _batch_add_relations_insert(self, relations: List[Relation]) -> int:
|
|
310
|
+
"""
|
|
311
|
+
Add relations using multi-row INSERT
|
|
312
|
+
|
|
313
|
+
Args:
|
|
314
|
+
relations: List of relations to add
|
|
315
|
+
|
|
316
|
+
Returns:
|
|
317
|
+
Number of relations added/updated
|
|
318
|
+
"""
|
|
319
|
+
if not relations:
|
|
320
|
+
return 0
|
|
321
|
+
|
|
322
|
+
# Build multi-row INSERT
|
|
323
|
+
values_placeholders = []
|
|
324
|
+
values = []
|
|
325
|
+
|
|
326
|
+
for i, relation in enumerate(relations):
|
|
327
|
+
base_idx = i * 6
|
|
328
|
+
values_placeholders.append(f"(${base_idx+1}, ${base_idx+2}, ${base_idx+3}, ${base_idx+4}, ${base_idx+5}::jsonb, ${base_idx+6})")
|
|
329
|
+
|
|
330
|
+
properties_json = json.dumps(relation.properties)
|
|
331
|
+
|
|
332
|
+
values.extend(
|
|
333
|
+
[
|
|
334
|
+
relation.id,
|
|
335
|
+
relation.relation_type,
|
|
336
|
+
relation.source_id,
|
|
337
|
+
relation.target_id,
|
|
338
|
+
properties_json,
|
|
339
|
+
relation.weight,
|
|
340
|
+
]
|
|
341
|
+
)
|
|
342
|
+
|
|
343
|
+
query = f"""
|
|
344
|
+
INSERT INTO graph_relations (id, relation_type, source_id, target_id, properties, weight)
|
|
345
|
+
VALUES {', '.join(values_placeholders)}
|
|
346
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
347
|
+
relation_type = EXCLUDED.relation_type,
|
|
348
|
+
source_id = EXCLUDED.source_id,
|
|
349
|
+
target_id = EXCLUDED.target_id,
|
|
350
|
+
properties = EXCLUDED.properties,
|
|
351
|
+
weight = EXCLUDED.weight,
|
|
352
|
+
updated_at = CURRENT_TIMESTAMP
|
|
353
|
+
"""
|
|
354
|
+
|
|
355
|
+
if self.pool is None:
|
|
356
|
+
raise RuntimeError("Connection pool not initialized")
|
|
357
|
+
async with self.pool.acquire() as conn:
|
|
358
|
+
try:
|
|
359
|
+
await conn.execute(query, *values)
|
|
360
|
+
return len(relations)
|
|
361
|
+
except Exception as e:
|
|
362
|
+
logger.error(f"Batch insert failed: {e}")
|
|
363
|
+
raise
|
|
364
|
+
|
|
365
|
+
async def batch_delete_entities(self, entity_ids: List[str], batch_size: int = 1000) -> int:
|
|
366
|
+
"""
|
|
367
|
+
Delete multiple entities efficiently
|
|
368
|
+
|
|
369
|
+
Args:
|
|
370
|
+
entity_ids: List of entity IDs to delete
|
|
371
|
+
batch_size: Number of entities per batch
|
|
372
|
+
|
|
373
|
+
Returns:
|
|
374
|
+
Number of entities deleted
|
|
375
|
+
"""
|
|
376
|
+
if not entity_ids:
|
|
377
|
+
return 0
|
|
378
|
+
|
|
379
|
+
if not hasattr(self, "pool") or not self.pool:
|
|
380
|
+
raise RuntimeError("GraphStore not initialized")
|
|
381
|
+
|
|
382
|
+
total_deleted = 0
|
|
383
|
+
|
|
384
|
+
for i in range(0, len(entity_ids), batch_size):
|
|
385
|
+
batch = entity_ids[i : i + batch_size]
|
|
386
|
+
|
|
387
|
+
# Use ANY() for efficient batch delete
|
|
388
|
+
query = "DELETE FROM graph_entities WHERE id = ANY($1)"
|
|
389
|
+
|
|
390
|
+
if self.pool is None:
|
|
391
|
+
raise RuntimeError("Connection pool not initialized")
|
|
392
|
+
async with self.pool.acquire() as conn:
|
|
393
|
+
result = await conn.execute(query, batch)
|
|
394
|
+
# Parse result: "DELETE n"
|
|
395
|
+
if result and result.startswith("DELETE"):
|
|
396
|
+
total_deleted += int(result.split()[1])
|
|
397
|
+
|
|
398
|
+
logger.info(f"Batch deleted {total_deleted} entities")
|
|
399
|
+
return total_deleted
|
|
400
|
+
|
|
401
|
+
async def batch_delete_relations(self, relation_ids: List[str], batch_size: int = 1000) -> int:
|
|
402
|
+
"""
|
|
403
|
+
Delete multiple relations efficiently
|
|
404
|
+
|
|
405
|
+
Args:
|
|
406
|
+
relation_ids: List of relation IDs to delete
|
|
407
|
+
batch_size: Number of relations per batch
|
|
408
|
+
|
|
409
|
+
Returns:
|
|
410
|
+
Number of relations deleted
|
|
411
|
+
"""
|
|
412
|
+
if not relation_ids:
|
|
413
|
+
return 0
|
|
414
|
+
|
|
415
|
+
if not hasattr(self, "pool") or not self.pool:
|
|
416
|
+
raise RuntimeError("GraphStore not initialized")
|
|
417
|
+
|
|
418
|
+
total_deleted = 0
|
|
419
|
+
|
|
420
|
+
for i in range(0, len(relation_ids), batch_size):
|
|
421
|
+
batch = relation_ids[i : i + batch_size]
|
|
422
|
+
|
|
423
|
+
# Use ANY() for efficient batch delete
|
|
424
|
+
query = "DELETE FROM graph_relations WHERE id = ANY($1)"
|
|
425
|
+
|
|
426
|
+
if self.pool is None:
|
|
427
|
+
raise RuntimeError("Connection pool not initialized")
|
|
428
|
+
async with self.pool.acquire() as conn:
|
|
429
|
+
result = await conn.execute(query, batch)
|
|
430
|
+
# Parse result: "DELETE n"
|
|
431
|
+
if result and result.startswith("DELETE"):
|
|
432
|
+
total_deleted += int(result.split()[1])
|
|
433
|
+
|
|
434
|
+
logger.info(f"Batch deleted {total_deleted} relations")
|
|
435
|
+
return total_deleted
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def estimate_batch_size(avg_item_size_bytes: int, target_batch_size_mb: int = 10) -> int:
|
|
439
|
+
"""
|
|
440
|
+
Estimate optimal batch size based on item size
|
|
441
|
+
|
|
442
|
+
Args:
|
|
443
|
+
avg_item_size_bytes: Average size of each item in bytes
|
|
444
|
+
target_batch_size_mb: Target batch size in MB
|
|
445
|
+
|
|
446
|
+
Returns:
|
|
447
|
+
Recommended batch size (number of items)
|
|
448
|
+
|
|
449
|
+
Example:
|
|
450
|
+
```python
|
|
451
|
+
# For entities averaging 1KB each
|
|
452
|
+
batch_size = estimate_batch_size(1024, target_batch_size_mb=10)
|
|
453
|
+
# Returns ~10,000
|
|
454
|
+
```
|
|
455
|
+
"""
|
|
456
|
+
target_bytes = target_batch_size_mb * 1024 * 1024
|
|
457
|
+
batch_size = max(100, target_bytes // avg_item_size_bytes)
|
|
458
|
+
return batch_size
|