PyPI - aiecs - Versions diffs - 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl - Mend

aiecs 1.0.1py3-none-any.whl → 1.7.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show

aiecs/__init__.py +13 -16
aiecs/__main__.py +7 -7
aiecs/aiecs_client.py +269 -75
aiecs/application/executors/operation_executor.py +79 -54
aiecs/application/knowledge_graph/__init__.py +7 -0
aiecs/application/knowledge_graph/builder/__init__.py +37 -0
aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
aiecs/application/knowledge_graph/extractors/base.py +98 -0
aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
aiecs/application/knowledge_graph/search/__init__.py +59 -0
aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
aiecs/application/knowledge_graph/search/reranker.py +293 -0
aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
aiecs/application/knowledge_graph/validators/__init__.py +13 -0
aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
aiecs/common/__init__.py +9 -0
aiecs/common/knowledge_graph/__init__.py +17 -0
aiecs/common/knowledge_graph/runnable.py +471 -0
aiecs/config/__init__.py +20 -5
aiecs/config/config.py +762 -31
aiecs/config/graph_config.py +131 -0
aiecs/config/tool_config.py +399 -0
aiecs/core/__init__.py +29 -13
aiecs/core/interface/__init__.py +2 -2
aiecs/core/interface/execution_interface.py +22 -22
aiecs/core/interface/storage_interface.py +37 -88
aiecs/core/registry/__init__.py +31 -0
aiecs/core/registry/service_registry.py +92 -0
aiecs/domain/__init__.py +270 -1
aiecs/domain/agent/__init__.py +191 -0
aiecs/domain/agent/base_agent.py +3870 -0
aiecs/domain/agent/exceptions.py +99 -0
aiecs/domain/agent/graph_aware_mixin.py +569 -0
aiecs/domain/agent/hybrid_agent.py +1435 -0
aiecs/domain/agent/integration/__init__.py +29 -0
aiecs/domain/agent/integration/context_compressor.py +216 -0
aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
aiecs/domain/agent/integration/protocols.py +281 -0
aiecs/domain/agent/integration/retry_policy.py +218 -0
aiecs/domain/agent/integration/role_config.py +213 -0
aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
aiecs/domain/agent/lifecycle.py +291 -0
aiecs/domain/agent/llm_agent.py +692 -0
aiecs/domain/agent/memory/__init__.py +12 -0
aiecs/domain/agent/memory/conversation.py +1124 -0
aiecs/domain/agent/migration/__init__.py +14 -0
aiecs/domain/agent/migration/conversion.py +163 -0
aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
aiecs/domain/agent/models.py +884 -0
aiecs/domain/agent/observability.py +479 -0
aiecs/domain/agent/persistence.py +449 -0
aiecs/domain/agent/prompts/__init__.py +29 -0
aiecs/domain/agent/prompts/builder.py +159 -0
aiecs/domain/agent/prompts/formatters.py +187 -0
aiecs/domain/agent/prompts/template.py +255 -0
aiecs/domain/agent/registry.py +253 -0
aiecs/domain/agent/tool_agent.py +444 -0
aiecs/domain/agent/tools/__init__.py +15 -0
aiecs/domain/agent/tools/schema_generator.py +364 -0
aiecs/domain/community/__init__.py +155 -0
aiecs/domain/community/agent_adapter.py +469 -0
aiecs/domain/community/analytics.py +432 -0
aiecs/domain/community/collaborative_workflow.py +648 -0
aiecs/domain/community/communication_hub.py +634 -0
aiecs/domain/community/community_builder.py +320 -0
aiecs/domain/community/community_integration.py +796 -0
aiecs/domain/community/community_manager.py +803 -0
aiecs/domain/community/decision_engine.py +849 -0
aiecs/domain/community/exceptions.py +231 -0
aiecs/domain/community/models/__init__.py +33 -0
aiecs/domain/community/models/community_models.py +234 -0
aiecs/domain/community/resource_manager.py +461 -0
aiecs/domain/community/shared_context_manager.py +589 -0
aiecs/domain/context/__init__.py +40 -10
aiecs/domain/context/context_engine.py +1910 -0
aiecs/domain/context/conversation_models.py +87 -53
aiecs/domain/context/graph_memory.py +582 -0
aiecs/domain/execution/model.py +12 -4
aiecs/domain/knowledge_graph/__init__.py +19 -0
aiecs/domain/knowledge_graph/models/__init__.py +52 -0
aiecs/domain/knowledge_graph/models/entity.py +148 -0
aiecs/domain/knowledge_graph/models/evidence.py +178 -0
aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
aiecs/domain/knowledge_graph/models/path.py +171 -0
aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
aiecs/domain/knowledge_graph/models/query.py +261 -0
aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
aiecs/domain/knowledge_graph/models/relation.py +202 -0
aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
aiecs/domain/task/dsl_processor.py +172 -56
aiecs/domain/task/model.py +20 -8
aiecs/domain/task/task_context.py +27 -24
aiecs/infrastructure/__init__.py +0 -2
aiecs/infrastructure/graph_storage/__init__.py +11 -0
aiecs/infrastructure/graph_storage/base.py +837 -0
aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
aiecs/infrastructure/graph_storage/cache.py +424 -0
aiecs/infrastructure/graph_storage/distributed.py +223 -0
aiecs/infrastructure/graph_storage/error_handling.py +380 -0
aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
aiecs/infrastructure/graph_storage/health_checks.py +378 -0
aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
aiecs/infrastructure/graph_storage/metrics.py +344 -0
aiecs/infrastructure/graph_storage/migration.py +400 -0
aiecs/infrastructure/graph_storage/pagination.py +483 -0
aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
aiecs/infrastructure/graph_storage/postgres.py +1563 -0
aiecs/infrastructure/graph_storage/property_storage.py +353 -0
aiecs/infrastructure/graph_storage/protocols.py +76 -0
aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
aiecs/infrastructure/graph_storage/streaming.py +487 -0
aiecs/infrastructure/graph_storage/tenant.py +412 -0
aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
aiecs/infrastructure/messaging/websocket_manager.py +51 -35
aiecs/infrastructure/monitoring/__init__.py +22 -0
aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
aiecs/infrastructure/monitoring/structured_logger.py +3 -7
aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
aiecs/infrastructure/persistence/__init__.py +14 -1
aiecs/infrastructure/persistence/context_engine_client.py +184 -0
aiecs/infrastructure/persistence/database_manager.py +67 -43
aiecs/infrastructure/persistence/file_storage.py +180 -103
aiecs/infrastructure/persistence/redis_client.py +74 -21
aiecs/llm/__init__.py +73 -25
aiecs/llm/callbacks/__init__.py +11 -0
aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
aiecs/llm/client_factory.py +224 -36
aiecs/llm/client_resolver.py +155 -0
aiecs/llm/clients/__init__.py +38 -0
aiecs/llm/clients/base_client.py +324 -0
aiecs/llm/clients/google_function_calling_mixin.py +457 -0
aiecs/llm/clients/googleai_client.py +241 -0
aiecs/llm/clients/openai_client.py +158 -0
aiecs/llm/clients/openai_compatible_mixin.py +367 -0
aiecs/llm/clients/vertex_client.py +897 -0
aiecs/llm/clients/xai_client.py +201 -0
aiecs/llm/config/__init__.py +51 -0
aiecs/llm/config/config_loader.py +272 -0
aiecs/llm/config/config_validator.py +206 -0
aiecs/llm/config/model_config.py +143 -0
aiecs/llm/protocols.py +149 -0
aiecs/llm/utils/__init__.py +10 -0
aiecs/llm/utils/validate_config.py +89 -0
aiecs/main.py +140 -121
aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
aiecs/scripts/aid/__init__.py +19 -0
aiecs/scripts/aid/module_checker.py +499 -0
aiecs/scripts/aid/version_manager.py +235 -0
aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
aiecs/scripts/dependance_check/__init__.py +15 -0
aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
aiecs/scripts/dependance_patch/__init__.py +7 -0
aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
aiecs/scripts/knowledge_graph/__init__.py +3 -0
aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
aiecs/scripts/tools_develop/README.md +671 -0
aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
aiecs/scripts/tools_develop/__init__.py +21 -0
aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
aiecs/scripts/tools_develop/schema_coverage.py +511 -0
aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
aiecs/scripts/tools_develop/verify_tools.py +352 -0
aiecs/tasks/__init__.py +0 -1
aiecs/tasks/worker.py +115 -47
aiecs/tools/__init__.py +194 -72
aiecs/tools/apisource/__init__.py +99 -0
aiecs/tools/apisource/intelligence/__init__.py +19 -0
aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
aiecs/tools/apisource/monitoring/__init__.py +9 -0
aiecs/tools/apisource/monitoring/metrics.py +330 -0
aiecs/tools/apisource/providers/__init__.py +112 -0
aiecs/tools/apisource/providers/base.py +671 -0
aiecs/tools/apisource/providers/census.py +397 -0
aiecs/tools/apisource/providers/fred.py +535 -0
aiecs/tools/apisource/providers/newsapi.py +409 -0
aiecs/tools/apisource/providers/worldbank.py +352 -0
aiecs/tools/apisource/reliability/__init__.py +12 -0
aiecs/tools/apisource/reliability/error_handler.py +363 -0
aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
aiecs/tools/apisource/tool.py +832 -0
aiecs/tools/apisource/utils/__init__.py +9 -0
aiecs/tools/apisource/utils/validators.py +334 -0
aiecs/tools/base_tool.py +415 -21
aiecs/tools/docs/__init__.py +121 -0
aiecs/tools/docs/ai_document_orchestrator.py +607 -0
aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
aiecs/tools/docs/content_insertion_tool.py +1320 -0
aiecs/tools/docs/document_creator_tool.py +1323 -0
aiecs/tools/docs/document_layout_tool.py +1160 -0
aiecs/tools/docs/document_parser_tool.py +1011 -0
aiecs/tools/docs/document_writer_tool.py +1829 -0
aiecs/tools/knowledge_graph/__init__.py +17 -0
aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
aiecs/tools/langchain_adapter.py +300 -138
aiecs/tools/schema_generator.py +455 -0
aiecs/tools/search_tool/__init__.py +100 -0
aiecs/tools/search_tool/analyzers.py +581 -0
aiecs/tools/search_tool/cache.py +264 -0
aiecs/tools/search_tool/constants.py +128 -0
aiecs/tools/search_tool/context.py +224 -0
aiecs/tools/search_tool/core.py +778 -0
aiecs/tools/search_tool/deduplicator.py +119 -0
aiecs/tools/search_tool/error_handler.py +242 -0
aiecs/tools/search_tool/metrics.py +343 -0
aiecs/tools/search_tool/rate_limiter.py +172 -0
aiecs/tools/search_tool/schemas.py +275 -0
aiecs/tools/statistics/__init__.py +80 -0
aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
aiecs/tools/statistics/data_loader_tool.py +555 -0
aiecs/tools/statistics/data_profiler_tool.py +638 -0
aiecs/tools/statistics/data_transformer_tool.py +580 -0
aiecs/tools/statistics/data_visualizer_tool.py +498 -0
aiecs/tools/statistics/model_trainer_tool.py +507 -0
aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
aiecs/tools/task_tools/__init__.py +49 -36
aiecs/tools/task_tools/chart_tool.py +200 -184
aiecs/tools/task_tools/classfire_tool.py +268 -267
aiecs/tools/task_tools/image_tool.py +175 -131
aiecs/tools/task_tools/office_tool.py +226 -146
aiecs/tools/task_tools/pandas_tool.py +477 -121
aiecs/tools/task_tools/report_tool.py +390 -142
aiecs/tools/task_tools/research_tool.py +149 -79
aiecs/tools/task_tools/scraper_tool.py +339 -145
aiecs/tools/task_tools/stats_tool.py +448 -209
aiecs/tools/temp_file_manager.py +26 -24
aiecs/tools/tool_executor/__init__.py +18 -16
aiecs/tools/tool_executor/tool_executor.py +364 -52
aiecs/utils/LLM_output_structor.py +74 -48
aiecs/utils/__init__.py +14 -3
aiecs/utils/base_callback.py +0 -3
aiecs/utils/cache_provider.py +696 -0
aiecs/utils/execution_utils.py +50 -31
aiecs/utils/prompt_loader.py +1 -0
aiecs/utils/token_usage_repository.py +37 -11
aiecs/ws/socket_server.py +14 -4
{aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
aiecs-1.7.6.dist-info/RECORD +337 -0
aiecs-1.7.6.dist-info/entry_points.txt +13 -0
aiecs/config/registry.py +0 -19
aiecs/domain/context/content_engine.py +0 -982
aiecs/llm/base_client.py +0 -99
aiecs/llm/openai_client.py +0 -125
aiecs/llm/vertex_client.py +0 -186
aiecs/llm/xai_client.py +0 -184
aiecs/scripts/dependency_checker.py +0 -857
aiecs/scripts/quick_dependency_check.py +0 -269
aiecs/tools/task_tools/search_api.py +0 -7
aiecs-1.0.1.dist-info/RECORD +0 -90
aiecs-1.0.1.dist-info/entry_points.txt +0 -7
/aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
/aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
/aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
/aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
{aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
{aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
{aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0

aiecs/application/executors/operation_executor.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import asyncio
 import logging
-from typing import Dict, List, Any, Optional
+from typing import Dict, List, Any
 from aiecs.tools import get_tool
 from aiecs.tools.tool_executor import ToolExecutor
 from aiecs.utils.execution_utils import ExecutionUtils
@@ -14,27 +14,33 @@ class OperationExecutor:
     Core logic for handling operation execution
     """
-    def __init__(self, tool_executor: ToolExecutor, execution_utils: ExecutionUtils, config: Dict[str, Any]):
+    def __init__(
+        self,
+        tool_executor: ToolExecutor,
+        execution_utils: ExecutionUtils,
+        config: Dict[str, Any],
+    ):
         self.tool_executor = tool_executor
         self.execution_utils = execution_utils
         self.config = config
-        self._tool_instances = {}
-        self.semaphore = asyncio.Semaphore(config.get('rate_limit_requests_per_second', 5))
+        self._tool_instances: Dict[str, Any] = {}
+        self.semaphore = asyncio.Semaphore(config.get("rate_limit_requests_per_second", 5))
     def _filter_tool_params(self, params: Dict[str, Any]) -> Dict[str, Any]:
         """
         Filter out system-related parameters, keeping only parameters needed by tool methods
         """
         # System-related parameters that should not be passed to tool methods
-        system_params = {'user_id', 'task_id', 'op'}
+        system_params = {"user_id", "task_id", "op"}
         return {k: v for k, v in params.items() if k not in system_params}
     def _filter_tool_call_params(self, params: Dict[str, Any]) -> Dict[str, Any]:
         """
         Filter out system-related parameters in tool calls, but keep 'op' parameter (needed by BaseTool.run())
         """
-        # Only filter user and task IDs, keep 'op' parameter for BaseTool.run() to use
-        system_params = {'user_id', 'task_id'}
+        # Only filter user and task IDs, keep 'op' parameter for BaseTool.run()
+        # to use
+        system_params = {"user_id", "task_id"}
         return {k: v for k, v in params.items() if k not in system_params}
     async def execute_operation(self, operation_spec: str, params: Dict[str, Any]) -> Any:
@@ -44,7 +50,9 @@ class OperationExecutor:
         if "." not in operation_spec:
             raise ValueError(f"Invalid operation spec: {operation_spec}, expected 'tool_name.operation_name'")
-        tool_name, operation_name = operation_spec.split(".", 1)
+        parts = operation_spec.split(".", 1)
+        tool_name: str = parts[0]
+        operation_name: str = parts[1]
         # Get or create tool instance
         if tool_name not in self._tool_instances:
@@ -69,29 +77,37 @@ class OperationExecutor:
         Batch execute operations with rate limiting
         """
         results = []
-        batch_size = self.config.get('batch_size', 10)
-        rate_limit = self.config.get('rate_limit_requests_per_second', 5)
+        batch_size = self.config.get("batch_size", 10)
+        rate_limit = self.config.get("rate_limit_requests_per_second", 5)
         for i in range(0, len(operations), batch_size):
-            batch = operations[i:i + batch_size]
+            batch = operations[i : i + batch_size]
             batch_results = await asyncio.gather(
                 *[self.execute_operation(op["operation"], op.get("params", {})) for op in batch],
-                return_exceptions=True
+                return_exceptions=True,
             )
             results.extend(batch_results)
             await asyncio.sleep(1.0 / rate_limit)
         return results
-    async def execute_operations_sequence(self, operations: List[Dict[str, Any]], user_id: str, task_id: str,
-                                        stop_on_failure: bool = False, save_callback=None) -> List[TaskStepResult]:
+    async def execute_operations_sequence(
+        self,
+        operations: List[Dict[str, Any]],
+        user_id: str,
+        task_id: str,
+        stop_on_failure: bool = False,
+        save_callback=None,
+    ) -> List[TaskStepResult]:
         """
         Execute operations sequence sequentially, with option to stop on failure
         """
-        results = []
+        results: List[TaskStepResult] = []
         for step, op_info in enumerate(operations):
             operation_spec = op_info.get("operation")
+            if not isinstance(operation_spec, str):
+                raise ValueError(f"Invalid operation spec: {operation_spec}, expected string")
             params = op_info.get("params", {})
             # Process parameter references
@@ -104,7 +120,7 @@ class OperationExecutor:
                     result=result,
                     completed=True,
                     message=f"Completed operation {operation_spec}",
-                    status=TaskStatus.COMPLETED.value
+                    status=TaskStatus.COMPLETED.value,
                 )
             except Exception as e:
                 step_result = TaskStepResult(
@@ -114,7 +130,7 @@ class OperationExecutor:
                     message=f"Failed to execute {operation_spec}",
                     status=TaskStatus.FAILED.value,
                     error_code=ErrorCode.EXECUTION_ERROR.value,
-                    error_message=str(e)
+                    error_message=str(e),
                 )
                 if stop_on_failure:
@@ -138,9 +154,9 @@ class OperationExecutor:
         processed = {}
         for name, value in params.items():
-            if isinstance(value, str) and value.startswith('$result['):
+            if isinstance(value, str) and value.startswith("$result["):
                 try:
-                    ref_parts = value[8:].split(']', 1)
+                    ref_parts = value[8:].split("]", 1)
                     idx = int(ref_parts[0])
                     if idx >= len(results):
@@ -148,9 +164,10 @@ class OperationExecutor:
                     ref_value = results[idx].result
-                    # Handle nested attribute access, such as $result[0].data.field
-                    if len(ref_parts) > 1 and ref_parts[1].startswith('.'):
-                        for attr in ref_parts[1][1:].split('.'):
+                    # Handle nested attribute access, such as
+                    # $result[0].data.field
+                    if len(ref_parts) > 1 and ref_parts[1].startswith("."):
+                        for attr in ref_parts[1][1:].split("."):
                             if attr:
                                 if isinstance(ref_value, dict):
                                     ref_value = ref_value.get(attr)
@@ -171,14 +188,14 @@ class OperationExecutor:
         Execute batch tool calls with rate limiting
         """
         results = []
-        batch_size = self.config.get('batch_size', 10)
-        rate_limit = self.config.get('rate_limit_requests_per_second', 5)
+        batch_size = self.config.get("batch_size", 10)
+        rate_limit = self.config.get("rate_limit_requests_per_second", 5)
         for i in range(0, len(tool_calls), batch_size):
-            batch = tool_calls[i:i + batch_size]
+            batch = tool_calls[i : i + batch_size]
             batch_results = await asyncio.gather(
                 *[self._execute_tool_call(call, tool_executor_func) for call in batch],
-                return_exceptions=True
+                return_exceptions=True,
             )
             results.extend(batch_results)
             await asyncio.sleep(1.0 / rate_limit)
@@ -190,11 +207,14 @@ class OperationExecutor:
         Execute a single tool call with rate limiting
         """
         async with self.semaphore:
-            tool_name = call.get("tool")
+            tool_name_raw = call.get("tool")
+            if not isinstance(tool_name_raw, str):
+                raise ValueError(f"Invalid tool name: {tool_name_raw}, expected string")
+            tool_name: str = tool_name_raw
             params = call.get("params", {})
             # Use context-aware caching
-            if self.config.get('enable_cache', True):
+            if self.config.get("enable_cache", True):
                 user_id = params.get("user_id", "anonymous")
                 task_id = params.get("task_id", "none")
                 cache_key = self.execution_utils.generate_cache_key("tool_call", user_id, task_id, (), params)
@@ -211,14 +231,16 @@ class OperationExecutor:
                 if tool_name not in self._tool_instances:
                     self._tool_instances[tool_name] = get_tool(tool_name)
                 tool = self._tool_instances[tool_name]
-                # Filter parameters, remove system-related parameters (but keep 'op' parameter)
+                # Filter parameters, remove system-related parameters (but keep
+                # 'op' parameter)
                 tool_params = self._filter_tool_call_params(params)
-                # Execute through BaseTool.run method, passing filtered parameters
+                # Execute through BaseTool.run method, passing filtered
+                # parameters
                 result = await self.tool_executor.execute_async(tool, "run", **tool_params)
             # Cache result
-            if self.config.get('enable_cache', True):
+            if self.config.get("enable_cache", True):
                 self.execution_utils.add_to_cache(cache_key, result)
             return result
@@ -230,7 +252,7 @@ class OperationExecutor:
         import re
         tool_calls = []
-        tool_pattern = r'\{\{(\w+)\((.*?)\)\}\}'
+        tool_pattern = r"\{\{(\w+)\((.*?)\)\}\}"
         matches = re.finditer(tool_pattern, description)
         for match in matches:
@@ -256,10 +278,7 @@ class OperationExecutor:
                 params[param_name] = param_value
-            tool_calls.append({
-                "tool": tool_name,
-                "params": params
-            })
+            tool_calls.append({"tool": tool_name, "params": params})
         return tool_calls
@@ -271,9 +290,11 @@ class OperationExecutor:
         for i, op_info in enumerate(operations):
             operation_spec = op_info.get("operation")
+            if not isinstance(operation_spec, str):
+                raise ValueError(f"Invalid operation spec: {operation_spec}, expected string")
             params = op_info.get("params", {})
-            async def execute_single_op(spec, p, index):
+            async def execute_single_op(spec: str, p: Dict[str, Any], index: int) -> TaskStepResult:
                 try:
                     result = await self.execute_operation(spec, p)
                     return TaskStepResult(
@@ -281,7 +302,7 @@ class OperationExecutor:
                         result=result,
                         completed=True,
                         message=f"Completed parallel operation {spec}",
-                        status=TaskStatus.COMPLETED.value
+                        status=TaskStatus.COMPLETED.value,
                     )
                 except Exception as e:
                     return TaskStepResult(
@@ -291,7 +312,7 @@ class OperationExecutor:
                         message=f"Failed parallel operation {spec}",
                         status=TaskStatus.FAILED.value,
                         error_code=ErrorCode.EXECUTION_ERROR.value,
-                        error_message=str(e)
+                        error_message=str(e),
                     )
             tasks.append(execute_single_op(operation_spec, params, i))
@@ -299,19 +320,23 @@ class OperationExecutor:
         results = await asyncio.gather(*tasks, return_exceptions=True)
         # Handle exception results
-        processed_results = []
+        processed_results: List[TaskStepResult] = []
         for i, result in enumerate(results):
             if isinstance(result, Exception):
-                processed_results.append(TaskStepResult(
-                    step=f"parallel_{i}_error",
-                    result=None,
-                    completed=False,
-                    message=f"Parallel operation failed with exception",
-                    status=TaskStatus.FAILED.value,
-                    error_code=ErrorCode.EXECUTION_ERROR.value,
-                    error_message=str(result)
-                ))
+                processed_results.append(
+                    TaskStepResult(
+                        step=f"parallel_{i}_error",
+                        result=None,
+                        completed=False,
+                        message="Parallel operation failed with exception",
+                        status=TaskStatus.FAILED.value,
+                        error_code=ErrorCode.EXECUTION_ERROR.value,
+                        error_message=str(result),
+                    )
+                )
             else:
+                # result is TaskStepResult here because execute_single_op always returns TaskStepResult
+                assert isinstance(result, TaskStepResult), f"Expected TaskStepResult, got {type(result)}"
                 processed_results.append(result)
         return processed_results
@@ -334,8 +359,8 @@ class OperationExecutor:
             "tool_names": list(self._tool_instances.keys()),
             "semaphore_value": self.semaphore._value,
             "config": {
-                "batch_size": self.config.get('batch_size', 10),
-                "rate_limit": self.config.get('rate_limit_requests_per_second', 5),
-                "enable_cache": self.config.get('enable_cache', True)
-            }
+                "batch_size": self.config.get("batch_size", 10),
+                "rate_limit": self.config.get("rate_limit_requests_per_second", 5),
+                "enable_cache": self.config.get("enable_cache", True),
+            },
         }

aiecs/application/knowledge_graph/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+"""
+Knowledge Graph Application Layer
+This module contains application services and use cases for knowledge graph operations.
+"""
+__all__ = []

aiecs/application/knowledge_graph/builder/__init__.py ADDED Viewed

@@ -0,0 +1,37 @@
+"""
+Knowledge Graph Builder Pipeline
+Orchestrates document-to-graph conversion workflow.
+"""
+from aiecs.application.knowledge_graph.builder.graph_builder import (
+    GraphBuilder,
+)
+from aiecs.application.knowledge_graph.builder.document_builder import (
+    DocumentGraphBuilder,
+)
+from aiecs.application.knowledge_graph.builder.text_chunker import TextChunker
+from aiecs.application.knowledge_graph.builder.schema_mapping import (
+    SchemaMapping,
+    EntityMapping,
+    RelationMapping,
+    PropertyTransformation,
+    TransformationType,
+)
+from aiecs.application.knowledge_graph.builder.structured_pipeline import (
+    StructuredDataPipeline,
+    ImportResult,
+)
+__all__ = [
+    "GraphBuilder",
+    "DocumentGraphBuilder",
+    "TextChunker",
+    "SchemaMapping",
+    "EntityMapping",
+    "RelationMapping",
+    "PropertyTransformation",
+    "TransformationType",
+    "StructuredDataPipeline",
+    "ImportResult",
+]

aiecs/application/knowledge_graph/builder/data_quality.py ADDED Viewed

@@ -0,0 +1,302 @@
+"""
+Data Quality Validation for Knowledge Graph Import
+Provides validation capabilities to ensure data quality during import,
+including range validation, outlier detection, completeness checks, and
+type consistency validation.
+"""
+from typing import Dict, List, Optional, Any, Set, Union
+from dataclasses import dataclass, field
+from enum import Enum
+import logging
+logger = logging.getLogger(__name__)
+# Check for pandas and numpy availability
+try:
+    import pandas as pd
+    import numpy as np
+    PANDAS_AVAILABLE = True
+except ImportError:
+    PANDAS_AVAILABLE = False
+class ViolationType(Enum):
+    """Types of data quality violations"""
+    RANGE_VIOLATION = "range_violation"
+    OUTLIER = "outlier"
+    MISSING_VALUE = "missing_value"
+    TYPE_MISMATCH = "type_mismatch"
+@dataclass
+class ValidationViolation:
+    """
+    Represents a single data quality violation
+    Attributes:
+        violation_type: Type of violation
+        property_name: Property that violated the rule
+        row_id: Identifier of the row with violation
+        value: The violating value
+        expected: Expected value or constraint
+        message: Human-readable description
+    """
+    violation_type: ViolationType
+    property_name: str
+    row_id: Any
+    value: Any
+    expected: Any
+    message: str
+@dataclass
+class QualityReport:
+    """
+    Data quality validation report
+    Attributes:
+        total_rows: Total number of rows validated
+        violations: List of all violations found
+        completeness: Completeness percentage per property
+        outlier_count: Number of outliers detected per property
+        range_violations: Number of range violations per property
+        type_violations: Number of type violations per property
+        passed: Whether validation passed (no critical violations)
+    """
+    total_rows: int
+    violations: List[ValidationViolation] = field(default_factory=list)
+    completeness: Dict[str, float] = field(default_factory=dict)
+    outlier_count: Dict[str, int] = field(default_factory=dict)
+    range_violations: Dict[str, int] = field(default_factory=dict)
+    type_violations: Dict[str, int] = field(default_factory=dict)
+    passed: bool = True
+    def add_violation(self, violation: ValidationViolation):
+        """Add a violation to the report"""
+        self.violations.append(violation)
+        # Update counts
+        if violation.violation_type == ViolationType.RANGE_VIOLATION:
+            self.range_violations[violation.property_name] = \
+                self.range_violations.get(violation.property_name, 0) + 1
+        elif violation.violation_type == ViolationType.OUTLIER:
+            self.outlier_count[violation.property_name] = \
+                self.outlier_count.get(violation.property_name, 0) + 1
+        elif violation.violation_type == ViolationType.TYPE_MISMATCH:
+            self.type_violations[violation.property_name] = \
+                self.type_violations.get(violation.property_name, 0) + 1
+    def get_summary(self) -> Dict[str, Any]:
+        """Get a summary of the quality report"""
+        return {
+            "total_rows": self.total_rows,
+            "total_violations": len(self.violations),
+            "range_violations": sum(self.range_violations.values()),
+            "outliers": sum(self.outlier_count.values()),
+            "type_violations": sum(self.type_violations.values()),
+            "completeness": self.completeness,
+            "passed": self.passed
+        }
+@dataclass
+class RangeRule:
+    """Range validation rule for numeric properties"""
+    min_value: Optional[float] = None
+    max_value: Optional[float] = None
+@dataclass
+class ValidationConfig:
+    """
+    Configuration for data quality validation
+    Attributes:
+        range_rules: Range validation rules per property
+        required_properties: Set of required properties
+        detect_outliers: Whether to detect outliers (3 std devs)
+        fail_on_violations: Whether to fail import on violations
+        max_violation_rate: Maximum allowed violation rate (0.0-1.0)
+    """
+    range_rules: Dict[str, RangeRule] = field(default_factory=dict)
+    required_properties: Set[str] = field(default_factory=set)
+    detect_outliers: bool = False
+    fail_on_violations: bool = False
+    max_violation_rate: float = 0.1  # 10% by default
+class DataQualityValidator:
+    """
+    Validates data quality during knowledge graph import
+    Provides range validation, outlier detection, completeness checks,
+    and type consistency validation.
+    """
+    def __init__(self, config: Optional[ValidationConfig] = None):
+        """
+        Initialize validator with configuration
+        Args:
+            config: Validation configuration
+        """
+        self.config = config or ValidationConfig()
+        self._property_stats: Dict[str, Dict[str, float]] = {}
+    def validate_dataframe(self, df: 'pd.DataFrame', id_column: Optional[str] = None) -> QualityReport:
+        """
+        Validate a pandas DataFrame
+        Args:
+            df: DataFrame to validate
+            id_column: Column to use as row identifier
+        Returns:
+            QualityReport with validation results
+        """
+        if not PANDAS_AVAILABLE:
+            raise ImportError("pandas and numpy are required for data quality validation")
+        report = QualityReport(total_rows=len(df))
+        # Use index as row ID if no id_column specified
+        row_ids = df[id_column] if id_column and id_column in df.columns else df.index
+        # Check completeness
+        self._check_completeness(df, report)
+        # Check required properties
+        self._check_required_properties(df, row_ids, report)
+        # Validate ranges
+        self._validate_ranges(df, row_ids, report)
+        # Detect outliers
+        if self.config.detect_outliers:
+            self._detect_outliers(df, row_ids, report)
+        # Check if validation passed
+        violation_rate = len(report.violations) / max(report.total_rows, 1)
+        if self.config.fail_on_violations and violation_rate > self.config.max_violation_rate:
+            report.passed = False
+        return report
+    def _check_completeness(self, df: 'pd.DataFrame', report: QualityReport):
+        """Check completeness of properties"""
+        for col in df.columns:
+            non_null_count = df[col].notna().sum()
+            completeness = non_null_count / len(df) if len(df) > 0 else 0.0
+            report.completeness[col] = completeness
+    def _check_required_properties(self, df: 'pd.DataFrame', row_ids: Any, report: QualityReport):
+        """Check that required properties are present and non-null"""
+        for prop in self.config.required_properties:
+            if prop not in df.columns:
+                # Property missing entirely
+                violation = ValidationViolation(
+                    violation_type=ViolationType.MISSING_VALUE,
+                    property_name=prop,
+                    row_id="ALL",
+                    value=None,
+                    expected="required property",
+                    message=f"Required property '{prop}' is missing from dataset"
+                )
+                report.add_violation(violation)
+            else:
+                # Check for null values in required property
+                null_mask = df[prop].isna()
+                for idx in df[null_mask].index:
+                    row_id = row_ids.iloc[idx] if hasattr(row_ids, 'iloc') else row_ids[idx]
+                    violation = ValidationViolation(
+                        violation_type=ViolationType.MISSING_VALUE,
+                        property_name=prop,
+                        row_id=row_id,
+                        value=None,
+                        expected="non-null value",
+                        message=f"Required property '{prop}' is null in row {row_id}"
+                    )
+                    report.add_violation(violation)
+    def _validate_ranges(self, df: 'pd.DataFrame', row_ids: Any, report: QualityReport):
+        """Validate numeric properties are within specified ranges"""
+        for prop, rule in self.config.range_rules.items():
+            if prop not in df.columns:
+                continue
+            # Only validate numeric columns
+            if not pd.api.types.is_numeric_dtype(df[prop]):
+                continue
+            # Check min value
+            if rule.min_value is not None:
+                violations_mask = df[prop] < rule.min_value
+                for idx in df[violations_mask].index:
+                    row_id = row_ids.iloc[idx] if hasattr(row_ids, 'iloc') else row_ids[idx]
+                    value = df[prop].iloc[idx]
+                    violation = ValidationViolation(
+                        violation_type=ViolationType.RANGE_VIOLATION,
+                        property_name=prop,
+                        row_id=row_id,
+                        value=value,
+                        expected=f">= {rule.min_value}",
+                        message=f"Value {value} is below minimum {rule.min_value} for property '{prop}' in row {row_id}"
+                    )
+                    report.add_violation(violation)
+            # Check max value
+            if rule.max_value is not None:
+                violations_mask = df[prop] > rule.max_value
+                for idx in df[violations_mask].index:
+                    row_id = row_ids.iloc[idx] if hasattr(row_ids, 'iloc') else row_ids[idx]
+                    value = df[prop].iloc[idx]
+                    violation = ValidationViolation(
+                        violation_type=ViolationType.RANGE_VIOLATION,
+                        property_name=prop,
+                        row_id=row_id,
+                        value=value,
+                        expected=f"<= {rule.max_value}",
+                        message=f"Value {value} is above maximum {rule.max_value} for property '{prop}' in row {row_id}"
+                    )
+                    report.add_violation(violation)
+    def _detect_outliers(self, df: 'pd.DataFrame', row_ids: Any, report: QualityReport):
+        """Detect outliers using 3 standard deviations rule"""
+        numeric_cols = df.select_dtypes(include=[np.number]).columns
+        for col in numeric_cols:
+            # Skip if all values are null
+            if df[col].isna().all():
+                continue
+            # Calculate mean and std
+            mean = df[col].mean()
+            std = df[col].std()
+            # Skip if std is 0 or NaN
+            if pd.isna(std) or std == 0:
+                continue
+            # Store stats for later use
+            self._property_stats[col] = {"mean": mean, "std": std}
+            # Detect outliers (beyond 3 standard deviations)
+            lower_bound = mean - 3 * std
+            upper_bound = mean + 3 * std
+            outliers_mask = (df[col] < lower_bound) | (df[col] > upper_bound)
+            for idx in df[outliers_mask].index:
+                row_id = row_ids.iloc[idx] if hasattr(row_ids, 'iloc') else row_ids[idx]
+                value = df[col].iloc[idx]
+                violation = ValidationViolation(
+                    violation_type=ViolationType.OUTLIER,
+                    property_name=col,
+                    row_id=row_id,
+                    value=value,
+                    expected=f"within [{lower_bound:.2f}, {upper_bound:.2f}]",
+                    message=f"Value {value} is an outlier (>3 std devs) for property '{col}' in row {row_id}"
+                )
+                report.add_violation(violation)

aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

Potentially problematic release.

aiecs 1.0.1py3-none-any.whl → 1.7.6py3-none-any.whl