PyPI - aiecs - Versions diffs - 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl - Mend

aiecs 1.0.1py3-none-any.whl → 1.7.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show

aiecs/__init__.py +13 -16
aiecs/__main__.py +7 -7
aiecs/aiecs_client.py +269 -75
aiecs/application/executors/operation_executor.py +79 -54
aiecs/application/knowledge_graph/__init__.py +7 -0
aiecs/application/knowledge_graph/builder/__init__.py +37 -0
aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
aiecs/application/knowledge_graph/extractors/base.py +98 -0
aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
aiecs/application/knowledge_graph/search/__init__.py +59 -0
aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
aiecs/application/knowledge_graph/search/reranker.py +293 -0
aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
aiecs/application/knowledge_graph/validators/__init__.py +13 -0
aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
aiecs/common/__init__.py +9 -0
aiecs/common/knowledge_graph/__init__.py +17 -0
aiecs/common/knowledge_graph/runnable.py +471 -0
aiecs/config/__init__.py +20 -5
aiecs/config/config.py +762 -31
aiecs/config/graph_config.py +131 -0
aiecs/config/tool_config.py +399 -0
aiecs/core/__init__.py +29 -13
aiecs/core/interface/__init__.py +2 -2
aiecs/core/interface/execution_interface.py +22 -22
aiecs/core/interface/storage_interface.py +37 -88
aiecs/core/registry/__init__.py +31 -0
aiecs/core/registry/service_registry.py +92 -0
aiecs/domain/__init__.py +270 -1
aiecs/domain/agent/__init__.py +191 -0
aiecs/domain/agent/base_agent.py +3870 -0
aiecs/domain/agent/exceptions.py +99 -0
aiecs/domain/agent/graph_aware_mixin.py +569 -0
aiecs/domain/agent/hybrid_agent.py +1435 -0
aiecs/domain/agent/integration/__init__.py +29 -0
aiecs/domain/agent/integration/context_compressor.py +216 -0
aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
aiecs/domain/agent/integration/protocols.py +281 -0
aiecs/domain/agent/integration/retry_policy.py +218 -0
aiecs/domain/agent/integration/role_config.py +213 -0
aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
aiecs/domain/agent/lifecycle.py +291 -0
aiecs/domain/agent/llm_agent.py +692 -0
aiecs/domain/agent/memory/__init__.py +12 -0
aiecs/domain/agent/memory/conversation.py +1124 -0
aiecs/domain/agent/migration/__init__.py +14 -0
aiecs/domain/agent/migration/conversion.py +163 -0
aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
aiecs/domain/agent/models.py +884 -0
aiecs/domain/agent/observability.py +479 -0
aiecs/domain/agent/persistence.py +449 -0
aiecs/domain/agent/prompts/__init__.py +29 -0
aiecs/domain/agent/prompts/builder.py +159 -0
aiecs/domain/agent/prompts/formatters.py +187 -0
aiecs/domain/agent/prompts/template.py +255 -0
aiecs/domain/agent/registry.py +253 -0
aiecs/domain/agent/tool_agent.py +444 -0
aiecs/domain/agent/tools/__init__.py +15 -0
aiecs/domain/agent/tools/schema_generator.py +364 -0
aiecs/domain/community/__init__.py +155 -0
aiecs/domain/community/agent_adapter.py +469 -0
aiecs/domain/community/analytics.py +432 -0
aiecs/domain/community/collaborative_workflow.py +648 -0
aiecs/domain/community/communication_hub.py +634 -0
aiecs/domain/community/community_builder.py +320 -0
aiecs/domain/community/community_integration.py +796 -0
aiecs/domain/community/community_manager.py +803 -0
aiecs/domain/community/decision_engine.py +849 -0
aiecs/domain/community/exceptions.py +231 -0
aiecs/domain/community/models/__init__.py +33 -0
aiecs/domain/community/models/community_models.py +234 -0
aiecs/domain/community/resource_manager.py +461 -0
aiecs/domain/community/shared_context_manager.py +589 -0
aiecs/domain/context/__init__.py +40 -10
aiecs/domain/context/context_engine.py +1910 -0
aiecs/domain/context/conversation_models.py +87 -53
aiecs/domain/context/graph_memory.py +582 -0
aiecs/domain/execution/model.py +12 -4
aiecs/domain/knowledge_graph/__init__.py +19 -0
aiecs/domain/knowledge_graph/models/__init__.py +52 -0
aiecs/domain/knowledge_graph/models/entity.py +148 -0
aiecs/domain/knowledge_graph/models/evidence.py +178 -0
aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
aiecs/domain/knowledge_graph/models/path.py +171 -0
aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
aiecs/domain/knowledge_graph/models/query.py +261 -0
aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
aiecs/domain/knowledge_graph/models/relation.py +202 -0
aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
aiecs/domain/task/dsl_processor.py +172 -56
aiecs/domain/task/model.py +20 -8
aiecs/domain/task/task_context.py +27 -24
aiecs/infrastructure/__init__.py +0 -2
aiecs/infrastructure/graph_storage/__init__.py +11 -0
aiecs/infrastructure/graph_storage/base.py +837 -0
aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
aiecs/infrastructure/graph_storage/cache.py +424 -0
aiecs/infrastructure/graph_storage/distributed.py +223 -0
aiecs/infrastructure/graph_storage/error_handling.py +380 -0
aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
aiecs/infrastructure/graph_storage/health_checks.py +378 -0
aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
aiecs/infrastructure/graph_storage/metrics.py +344 -0
aiecs/infrastructure/graph_storage/migration.py +400 -0
aiecs/infrastructure/graph_storage/pagination.py +483 -0
aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
aiecs/infrastructure/graph_storage/postgres.py +1563 -0
aiecs/infrastructure/graph_storage/property_storage.py +353 -0
aiecs/infrastructure/graph_storage/protocols.py +76 -0
aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
aiecs/infrastructure/graph_storage/streaming.py +487 -0
aiecs/infrastructure/graph_storage/tenant.py +412 -0
aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
aiecs/infrastructure/messaging/websocket_manager.py +51 -35
aiecs/infrastructure/monitoring/__init__.py +22 -0
aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
aiecs/infrastructure/monitoring/structured_logger.py +3 -7
aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
aiecs/infrastructure/persistence/__init__.py +14 -1
aiecs/infrastructure/persistence/context_engine_client.py +184 -0
aiecs/infrastructure/persistence/database_manager.py +67 -43
aiecs/infrastructure/persistence/file_storage.py +180 -103
aiecs/infrastructure/persistence/redis_client.py +74 -21
aiecs/llm/__init__.py +73 -25
aiecs/llm/callbacks/__init__.py +11 -0
aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
aiecs/llm/client_factory.py +224 -36
aiecs/llm/client_resolver.py +155 -0
aiecs/llm/clients/__init__.py +38 -0
aiecs/llm/clients/base_client.py +324 -0
aiecs/llm/clients/google_function_calling_mixin.py +457 -0
aiecs/llm/clients/googleai_client.py +241 -0
aiecs/llm/clients/openai_client.py +158 -0
aiecs/llm/clients/openai_compatible_mixin.py +367 -0
aiecs/llm/clients/vertex_client.py +897 -0
aiecs/llm/clients/xai_client.py +201 -0
aiecs/llm/config/__init__.py +51 -0
aiecs/llm/config/config_loader.py +272 -0
aiecs/llm/config/config_validator.py +206 -0
aiecs/llm/config/model_config.py +143 -0
aiecs/llm/protocols.py +149 -0
aiecs/llm/utils/__init__.py +10 -0
aiecs/llm/utils/validate_config.py +89 -0
aiecs/main.py +140 -121
aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
aiecs/scripts/aid/__init__.py +19 -0
aiecs/scripts/aid/module_checker.py +499 -0
aiecs/scripts/aid/version_manager.py +235 -0
aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
aiecs/scripts/dependance_check/__init__.py +15 -0
aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
aiecs/scripts/dependance_patch/__init__.py +7 -0
aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
aiecs/scripts/knowledge_graph/__init__.py +3 -0
aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
aiecs/scripts/tools_develop/README.md +671 -0
aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
aiecs/scripts/tools_develop/__init__.py +21 -0
aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
aiecs/scripts/tools_develop/schema_coverage.py +511 -0
aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
aiecs/scripts/tools_develop/verify_tools.py +352 -0
aiecs/tasks/__init__.py +0 -1
aiecs/tasks/worker.py +115 -47
aiecs/tools/__init__.py +194 -72
aiecs/tools/apisource/__init__.py +99 -0
aiecs/tools/apisource/intelligence/__init__.py +19 -0
aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
aiecs/tools/apisource/monitoring/__init__.py +9 -0
aiecs/tools/apisource/monitoring/metrics.py +330 -0
aiecs/tools/apisource/providers/__init__.py +112 -0
aiecs/tools/apisource/providers/base.py +671 -0
aiecs/tools/apisource/providers/census.py +397 -0
aiecs/tools/apisource/providers/fred.py +535 -0
aiecs/tools/apisource/providers/newsapi.py +409 -0
aiecs/tools/apisource/providers/worldbank.py +352 -0
aiecs/tools/apisource/reliability/__init__.py +12 -0
aiecs/tools/apisource/reliability/error_handler.py +363 -0
aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
aiecs/tools/apisource/tool.py +832 -0
aiecs/tools/apisource/utils/__init__.py +9 -0
aiecs/tools/apisource/utils/validators.py +334 -0
aiecs/tools/base_tool.py +415 -21
aiecs/tools/docs/__init__.py +121 -0
aiecs/tools/docs/ai_document_orchestrator.py +607 -0
aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
aiecs/tools/docs/content_insertion_tool.py +1320 -0
aiecs/tools/docs/document_creator_tool.py +1323 -0
aiecs/tools/docs/document_layout_tool.py +1160 -0
aiecs/tools/docs/document_parser_tool.py +1011 -0
aiecs/tools/docs/document_writer_tool.py +1829 -0
aiecs/tools/knowledge_graph/__init__.py +17 -0
aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
aiecs/tools/langchain_adapter.py +300 -138
aiecs/tools/schema_generator.py +455 -0
aiecs/tools/search_tool/__init__.py +100 -0
aiecs/tools/search_tool/analyzers.py +581 -0
aiecs/tools/search_tool/cache.py +264 -0
aiecs/tools/search_tool/constants.py +128 -0
aiecs/tools/search_tool/context.py +224 -0
aiecs/tools/search_tool/core.py +778 -0
aiecs/tools/search_tool/deduplicator.py +119 -0
aiecs/tools/search_tool/error_handler.py +242 -0
aiecs/tools/search_tool/metrics.py +343 -0
aiecs/tools/search_tool/rate_limiter.py +172 -0
aiecs/tools/search_tool/schemas.py +275 -0
aiecs/tools/statistics/__init__.py +80 -0
aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
aiecs/tools/statistics/data_loader_tool.py +555 -0
aiecs/tools/statistics/data_profiler_tool.py +638 -0
aiecs/tools/statistics/data_transformer_tool.py +580 -0
aiecs/tools/statistics/data_visualizer_tool.py +498 -0
aiecs/tools/statistics/model_trainer_tool.py +507 -0
aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
aiecs/tools/task_tools/__init__.py +49 -36
aiecs/tools/task_tools/chart_tool.py +200 -184
aiecs/tools/task_tools/classfire_tool.py +268 -267
aiecs/tools/task_tools/image_tool.py +175 -131
aiecs/tools/task_tools/office_tool.py +226 -146
aiecs/tools/task_tools/pandas_tool.py +477 -121
aiecs/tools/task_tools/report_tool.py +390 -142
aiecs/tools/task_tools/research_tool.py +149 -79
aiecs/tools/task_tools/scraper_tool.py +339 -145
aiecs/tools/task_tools/stats_tool.py +448 -209
aiecs/tools/temp_file_manager.py +26 -24
aiecs/tools/tool_executor/__init__.py +18 -16
aiecs/tools/tool_executor/tool_executor.py +364 -52
aiecs/utils/LLM_output_structor.py +74 -48
aiecs/utils/__init__.py +14 -3
aiecs/utils/base_callback.py +0 -3
aiecs/utils/cache_provider.py +696 -0
aiecs/utils/execution_utils.py +50 -31
aiecs/utils/prompt_loader.py +1 -0
aiecs/utils/token_usage_repository.py +37 -11
aiecs/ws/socket_server.py +14 -4
{aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
aiecs-1.7.6.dist-info/RECORD +337 -0
aiecs-1.7.6.dist-info/entry_points.txt +13 -0
aiecs/config/registry.py +0 -19
aiecs/domain/context/content_engine.py +0 -982
aiecs/llm/base_client.py +0 -99
aiecs/llm/openai_client.py +0 -125
aiecs/llm/vertex_client.py +0 -186
aiecs/llm/xai_client.py +0 -184
aiecs/scripts/dependency_checker.py +0 -857
aiecs/scripts/quick_dependency_check.py +0 -269
aiecs/tools/task_tools/search_api.py +0 -7
aiecs-1.0.1.dist-info/RECORD +0 -90
aiecs-1.0.1.dist-info/entry_points.txt +0 -7
/aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
/aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
/aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
/aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
{aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
{aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
{aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0

aiecs/tools/task_tools/office_tool.py CHANGED Viewed

@@ -1,138 +1,109 @@
+from aiecs.tools import register_tool
+from aiecs.tools.base_tool import BaseTool
+from pydantic import BaseModel, field_validator, Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+from pptx.util import Inches
+from pptx import Presentation
+from docx.shared import Pt
+from docx import Document as DocxDocument
+from tika import parser  # type: ignore[import-untyped]
 import os
 import logging
+import warnings
 from typing import List, Dict, Optional, Any
-import pandas as pd
+import pandas as pd  # type: ignore[import-untyped]
 import pdfplumber
-import pytesseract
+import pytesseract  # type: ignore[import-untyped]
 from PIL import Image
-from tika import parser
-from docx import Document as DocxDocument
-from docx.shared import Pt
-from pptx import Presentation
-from pptx.util import Inches
-from pydantic import BaseModel, field_validator, ValidationError, ConfigDict
-from pydantic_settings import BaseSettings
-from aiecs.tools.base_tool import BaseTool
-from aiecs.tools import register_tool
+# Tika log path will be configured via Config class
+# Suppress pkg_resources deprecation warning from tika
+warnings.filterwarnings("ignore", category=UserWarning, module="tika")
-# Configuration for OfficeTool
-class OfficeSettings(BaseSettings):
-    """
-    Configuration for OfficeTool.
-    Attributes:
-        max_file_size_mb (int): Maximum file size in megabytes.
-        default_font (str): Default font for documents.
-        default_font_size (int): Default font size in points.
-        allowed_extensions (List[str]): Allowed document file extensions.
-        env_prefix (str): Environment variable prefix for settings.
-    """
-    max_file_size_mb: int = 100
-    default_font: str = "Arial"
-    default_font_size: int = 12
-    allowed_extensions: List[str] = ['.docx', '.pptx', '.xlsx', '.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.bmp', '.gif']
-    env_prefix: str = 'OFFICE_TOOL_'
-    model_config = ConfigDict(env_prefix='OFFICE_TOOL_')
+# Module-level default configuration for validators
+_DEFAULT_MAX_FILE_SIZE_MB = 100
+_DEFAULT_ALLOWED_EXTENSIONS = [
+    ".docx",
+    ".pptx",
+    ".xlsx",
+    ".pdf",
+    ".png",
+    ".jpg",
+    ".jpeg",
+    ".tiff",
+    ".bmp",
+    ".gif",
+]
 # Exceptions
 class OfficeToolError(Exception):
     """Base exception for OfficeTool errors."""
-    pass
 class InputValidationError(OfficeToolError):
     """Raised when input validation fails."""
-    pass
 class FileOperationError(OfficeToolError):
     """Raised when file operations fail."""
-    pass
 class SecurityError(OfficeToolError):
     """Raised for security-related issues."""
-    pass
 class ContentValidationError(OfficeToolError):
     """Raised when document content validation fails."""
-    pass
 # Base schema for common fields
 class BaseFileSchema(BaseModel):
     file_path: Optional[str] = None
     output_path: Optional[str] = None
     image_path: Optional[str] = None
-    @field_validator('file_path', 'output_path', 'image_path')
+    @field_validator("file_path", "output_path", "image_path")
     def validate_path(cls, v: Optional[str], field) -> Optional[str]:
         """Validate file paths for existence, size, extension, and path traversal."""
         if not v:
             return v
-        settings = OfficeSettings()
         abs_path = os.path.abspath(os.path.normpath(v))
         # Check for path traversal
-        if '..' in v or '~' in v or '%' in v:
+        if ".." in v or "~" in v or "%" in v:
             raise SecurityError(f"Path traversal attempt detected: {v}")
         # Ensure path is in allowed directories
         base_dir = os.path.abspath(os.getcwd())
-        allowed_dirs = [os.path.abspath(os.path.normpath(d)) for d in ['/tmp', './data', './uploads']]
+        allowed_dirs = [os.path.abspath(os.path.normpath(d)) for d in ["/tmp", "./data", "./uploads"]]
         if not abs_path.startswith(base_dir) and not any(abs_path.startswith(d) for d in allowed_dirs):
             raise SecurityError(f"Path not in allowed directories: {abs_path}")
         # Check extension
         ext = os.path.splitext(abs_path)[1].lower()
-        if ext not in settings.allowed_extensions:
-            raise SecurityError(f"Extension '{ext}' not allowed for '{field.field_name}', expected {settings.allowed_extensions}")
+        if ext not in _DEFAULT_ALLOWED_EXTENSIONS:
+            raise SecurityError(f"Extension '{ext}' not allowed for '{field.field_name}', expected {_DEFAULT_ALLOWED_EXTENSIONS}")
         # Check file existence and size for input paths
-        if field.field_name == 'file_path':
+        if field.field_name == "file_path":
             if not os.path.isfile(abs_path):
                 raise FileOperationError(f"{field.field_name}: File not found: {abs_path}")
             size_mb = os.path.getsize(abs_path) / (1024 * 1024)
-            if size_mb > settings.max_file_size_mb:
-                raise FileOperationError(f"{field.field_name}: File too large: {size_mb:.1f}MB, max {settings.max_file_size_mb}MB")
+            if size_mb > _DEFAULT_MAX_FILE_SIZE_MB:
+                raise FileOperationError(f"{field.field_name}: File too large: {size_mb:.1f}MB, max {_DEFAULT_MAX_FILE_SIZE_MB}MB")
         # Check for existing output paths
-        elif field.field_name == 'output_path' and os.path.exists(abs_path):
+        elif field.field_name == "output_path" and os.path.exists(abs_path):
             raise FileOperationError(f"{field.field_name}: File already exists: {abs_path}")
         return abs_path
-# Schemas for operations
-class ReadDocxSchema(BaseFileSchema):
-    """Schema for reading DOCX files."""
-    file_path: str
-    include_tables: bool = False
-class WriteDocxSchema(BaseFileSchema):
-    """Schema for writing DOCX files."""
-    text: str
-    output_path: str
-    table_data: Optional[List[List[str]]] = None
-class ReadPptxSchema(BaseFileSchema):
-    """Schema for reading PPTX files."""
-    file_path: str
-class WritePptxSchema(BaseFileSchema):
-    """Schema for writing PPTX files."""
-    slides: List[str]
-    output_path: str
-    image_path: Optional[str] = None
-class ReadXlsxSchema(BaseFileSchema):
-    """Schema for reading XLSX files."""
-    file_path: str
-    sheet_name: Optional[str] = None
-class WriteXlsxSchema(BaseFileSchema):
-    """Schema for writing XLSX files."""
-    data: List[Dict]
-    output_path: str
-    sheet_name: str = 'Sheet1'
+# Schemas for operations - moved to OfficeTool class as inner classes
-class ExtractTextSchema(BaseFileSchema):
-    """Schema for extracting text from files."""
-    file_path: str
-@register_tool('office')
+@register_tool("office")
 class OfficeTool(BaseTool):
     """
     Office document processing tool supporting:
@@ -146,27 +117,115 @@ class OfficeTool(BaseTool):
     Inherits from BaseTool to leverage ToolExecutor for caching, concurrency, and error handling.
     """
-    def __init__(self, config: Optional[Dict[str, Any]] = None):
+    # Configuration schema
+    class Config(BaseSettings):
+        """Configuration for the office tool
+        Automatically reads from environment variables with OFFICE_TOOL_ prefix.
+        Example: OFFICE_TOOL_MAX_FILE_SIZE_MB -> max_file_size_mb
         """
-        Initialize OfficeTool with settings.
+        model_config = SettingsConfigDict(env_prefix="OFFICE_TOOL_")
+        max_file_size_mb: int = Field(default=100, description="Maximum file size in megabytes")
+        default_font: str = Field(default="Arial", description="Default font for documents")
+        default_font_size: int = Field(default=12, description="Default font size in points")
+        tika_log_path: str = Field(
+            default=os.path.expanduser("~/.cache/tika"),
+            description="Tika log directory path",
+        )
+        allowed_extensions: List[str] = Field(
+            default=[
+                ".docx",
+                ".pptx",
+                ".xlsx",
+                ".pdf",
+                ".png",
+                ".jpg",
+                ".jpeg",
+                ".tiff",
+                ".bmp",
+                ".gif",
+            ],
+            description="Allowed document file extensions",
+        )
+    # Schema definitions
+    class Read_docxSchema(BaseFileSchema):
+        """Schema for read_docx operation"""
+        file_path: str = Field(description="Path to the DOCX file to read")
+        include_tables: bool = Field(default=False, description="Whether to include table data in the output. If True, tables are included as nested lists")
+    class Write_docxSchema(BaseFileSchema):
+        """Schema for write_docx operation"""
+        text: str = Field(description="Text content to write to the DOCX file")
+        output_path: str = Field(description="Path where the DOCX file will be saved")
+        table_data: Optional[List[List[str]]] = Field(default=None, description="Optional table data to include in the document. Each inner list represents a row, each string represents a cell")
+    class Read_pptxSchema(BaseFileSchema):
+        """Schema for read_pptx operation"""
+        file_path: str = Field(description="Path to the PPTX file to read")
+    class Write_pptxSchema(BaseFileSchema):
+        """Schema for write_pptx operation"""
+        slides: List[str] = Field(description="List of slide content strings. Each string becomes a slide")
+        output_path: str = Field(description="Path where the PPTX file will be saved")
+        image_path: Optional[str] = Field(default=None, description="Optional path to an image file to include on the first slide")
+    class Read_xlsxSchema(BaseFileSchema):
+        """Schema for read_xlsx operation"""
+        file_path: str = Field(description="Path to the XLSX file to read")
+        sheet_name: Optional[str] = Field(default=None, description="Optional name of the sheet to read. If None, reads the first sheet")
+    class Write_xlsxSchema(BaseFileSchema):
+        """Schema for write_xlsx operation"""
+        data: List[Dict[str, Any]] = Field(description="List of dictionaries representing Excel rows. Each dictionary key becomes a column header, values become cell data")
+        output_path: str = Field(description="Path where the XLSX file will be saved")
+        sheet_name: str = Field(default="Sheet1", description="Name of the Excel sheet to create")
+    class Extract_textSchema(BaseFileSchema):
+        """Schema for extract_text operation"""
+        file_path: str = Field(description="Path to the file to extract text from. Supports DOCX, PPTX, XLSX, PDF, and image formats")
+    def __init__(self, config: Optional[Dict[str, Any]] = None, **kwargs):
+        """
+        Initialize OfficeTool with configuration.
+        Configuration is automatically loaded by BaseTool from:
+        1. Explicit config dict (highest priority)
+        2. YAML config files (config/tools/office_tool.yaml)
+        3. Environment variables (via dotenv from .env files)
+        4. Tool defaults (lowest priority)
         Args:
-            config (Dict, optional): Configuration overrides for OfficeSettings.
+            config (Dict, optional): Configuration overrides for OfficeTool.
+            **kwargs: Additional arguments passed to BaseTool (e.g., tool_name)
         Raises:
             ValueError: If config contains invalid settings.
         """
-        super().__init__(config)
-        self.settings = OfficeSettings()
-        if config:
-            try:
-                self.settings = self.settings.model_validate({**self.settings.model_dump(), **config})
-            except ValidationError as e:
-                raise ValueError(f"Invalid configuration: {e}")
+        super().__init__(config, **kwargs)
+        # Configuration is automatically loaded by BaseTool into self._config_obj
+        # Access config via self._config_obj (BaseSettings instance)
+        self.config = self._config_obj if self._config_obj else self.Config()
+        # Configure Tika log path from config
+        os.environ["TIKA_LOG_PATH"] = self.config.tika_log_path
+        os.makedirs(self.config.tika_log_path, exist_ok=True)
         self.logger = logging.getLogger(__name__)
         if not self.logger.handlers:
             handler = logging.StreamHandler()
-            handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
+            handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
             self.logger.addHandler(handler)
         self.logger.setLevel(logging.INFO)
@@ -182,28 +241,29 @@ class OfficeTool(BaseTool):
             ContentValidationError: If document structure is invalid.
         """
         try:
-            if file_type == 'docx':
+            if file_type == "docx":
                 doc = DocxDocument(file_path)
-                if not hasattr(doc, 'paragraphs'):
+                if not hasattr(doc, "paragraphs"):
                     raise ContentValidationError("Invalid DOCX structure")
-            elif file_type == 'pptx':
+            elif file_type == "pptx":
                 prs = Presentation(file_path)
-                if not hasattr(prs, 'slides'):
+                if not hasattr(prs, "slides"):
                     raise ContentValidationError("Invalid PPTX structure")
-            elif file_type == 'xlsx':
-                # Just validate that file can be read - don't care about return type
+            elif file_type == "xlsx":
+                # Just validate that file can be read - don't care about return
+                # type
                 pd.read_excel(file_path, nrows=5)
-            elif file_type == 'pdf':
+            elif file_type == "pdf":
                 with pdfplumber.open(file_path) as pdf:
                     if len(pdf.pages) == 0:
                         raise ContentValidationError("PDF has no pages")
-            elif file_type == 'image':
+            elif file_type == "image":
                 img = Image.open(file_path)
                 img.verify()  # Verify it's a valid image
             else:
                 # Use tika as fallback for other formats
                 parsed = parser.from_file(file_path)
-                if not parsed or not parsed.get('content'):
+                if not parsed or not parsed.get("content"):
                     raise ContentValidationError("Unable to parse file content")
         except Exception as e:
             raise ContentValidationError(f"Invalid {file_type.upper()} file: {str(e)}")
@@ -220,7 +280,7 @@ class OfficeTool(BaseTool):
         """
         if not text:
             return ""
-        return ''.join(char for char in text if ord(char) >= 32 or char in '\n\r\t')
+        return "".join(char for char in text if ord(char) >= 32 or char in "\n\r\t")
     def _sanitize_table_data(self, table_data: Optional[List[List[str]]]) -> Optional[List[List[str]]]:
         """
@@ -252,7 +312,8 @@ class OfficeTool(BaseTool):
         for item in data_list:
             clean_item = {}
             for k, v in item.items():
-                clean_key = self._sanitize_text(str(k))[:255]  # Excel key limit with sanitization
+                # Excel key limit with sanitization
+                clean_key = self._sanitize_text(str(k))[:255]
                 if isinstance(v, str):
                     clean_value = self._sanitize_text(v)[:32767]  # Excel cell limit
                 else:
@@ -281,7 +342,7 @@ class OfficeTool(BaseTool):
                     page_text = page.extract_text()
                     if page_text:
                         text_content.append(page_text)
-            return '\n'.join(text_content)
+            return "\n".join(text_content)
         except Exception as e:
             raise FileOperationError(f"Failed to extract PDF text: {str(e)}")
@@ -299,11 +360,11 @@ class OfficeTool(BaseTool):
             FileOperationError: If image text extraction fails.
         """
         try:
-            image = Image.open(file_path)
+            image: Image.Image = Image.open(file_path)
             # Convert to RGB if necessary
-            if image.mode != 'RGB':
-                image = image.convert('RGB')
-            text = pytesseract.image_to_string(image, lang='eng+chi_sim')
+            if image.mode != "RGB":
+                image = image.convert("RGB")
+            text = pytesseract.image_to_string(image, lang="eng+chi_sim")
             return text.strip()
         except Exception as e:
             raise FileOperationError(f"Failed to extract image text: {str(e)}")
@@ -323,7 +384,7 @@ class OfficeTool(BaseTool):
         """
         try:
             parsed = parser.from_file(file_path)
-            content = parsed.get('content', '')
+            content = parsed.get("content", "")
             return content.strip() if content else ""
         except Exception as e:
             raise FileOperationError(f"Failed to extract text with Tika: {str(e)}")
@@ -344,19 +405,24 @@ class OfficeTool(BaseTool):
             ContentValidationError: If document structure is invalid.
         """
         try:
-            self._validate_document(file_path, 'docx')
+            self._validate_document(file_path, "docx")
             doc = DocxDocument(file_path)
             paras = [p.text for p in doc.paragraphs if p.text.strip()]
             tables = None
             if include_tables:
                 tables = [[[cell.text for cell in row.cells] for row in table.rows] for table in doc.tables]
-            return {'paragraphs': paras, 'tables': tables}
+            return {"paragraphs": paras, "tables": tables}
         except ContentValidationError:
             raise
         except Exception as e:
             raise FileOperationError(f"Failed to read DOCX: {str(e)}")
-    def write_docx(self, text: str, output_path: str, table_data: Optional[List[List[str]]] = None) -> Dict[str, Any]:
+    def write_docx(
+        self,
+        text: str,
+        output_path: str,
+        table_data: Optional[List[List[str]]] = None,
+    ) -> Dict[str, Any]:
         """
         Write content to a DOCX file.
@@ -375,9 +441,9 @@ class OfficeTool(BaseTool):
             sanitized_text = self._sanitize_text(text)
             sanitized_table_data = self._sanitize_table_data(table_data)
             doc = DocxDocument()
-            style = doc.styles['Normal']
-            style.font.name = self.settings.default_font
-            style.font.size = Pt(self.settings.default_font_size)
+            style = doc.styles["Normal"]
+            style.font.name = self.config.default_font
+            style.font.size = Pt(self.config.default_font_size)
             for line in sanitized_text.splitlines():
                 doc.add_paragraph(line)
             if sanitized_table_data and sanitized_table_data[0]:
@@ -389,9 +455,10 @@ class OfficeTool(BaseTool):
                         if j < len(row):
                             table.rows[i].cells[j].text = str(row[j])
                         else:
-                            table.rows[i].cells[j].text = ""  # Empty cell for missing data
+                            # Empty cell for missing data
+                            table.rows[i].cells[j].text = ""
             doc.save(output_path)
-            return {'success': True, 'file_path': output_path}
+            return {"success": True, "file_path": output_path}
         except Exception as e:
             raise FileOperationError(f"Failed to write DOCX: {str(e)}")
@@ -410,12 +477,12 @@ class OfficeTool(BaseTool):
             ContentValidationError: If document structure is invalid.
         """
         try:
-            self._validate_document(file_path, 'pptx')
+            self._validate_document(file_path, "pptx")
             prs = Presentation(file_path)
             texts = []
             for slide in prs.slides:
                 for shape in slide.shapes:
-                    if hasattr(shape, 'text'):
+                    if hasattr(shape, "text"):
                         txt = shape.text.strip()
                         if txt:
                             texts.append(txt)
@@ -425,7 +492,12 @@ class OfficeTool(BaseTool):
         except Exception as e:
             raise FileOperationError(f"Failed to read PPTX: {str(e)}")
-    def write_pptx(self, slides: List[str], output_path: str, image_path: Optional[str] = None) -> Dict[str, Any]:
+    def write_pptx(
+        self,
+        slides: List[str],
+        output_path: str,
+        image_path: Optional[str] = None,
+    ) -> Dict[str, Any]:
         """
         Write content to a PPTX file.
@@ -462,7 +534,7 @@ class OfficeTool(BaseTool):
                     except Exception as img_err:
                         self.logger.warning(f"Could not add image to slide: {img_err}")
             prs.save(output_path)
-            return {'success': True, 'file_path': output_path}
+            return {"success": True, "file_path": output_path}
         except Exception as e:
             raise FileOperationError(f"Failed to write PPTX: {str(e)}")
@@ -482,27 +554,27 @@ class OfficeTool(BaseTool):
             ContentValidationError: If document structure is invalid.
         """
         try:
-            self._validate_document(file_path, 'xlsx')
+            self._validate_document(file_path, "xlsx")
             data = pd.read_excel(file_path, sheet_name=sheet_name)
             # Handle different return types from pd.read_excel()
             if isinstance(data, pd.DataFrame):
                 # Single sheet or specific sheet requested
-                return data.to_dict(orient='records')
+                return data.to_dict(orient="records")
             elif isinstance(data, dict):
                 # Multiple sheets returned as dict - use the first sheet
                 first_sheet_name = list(data.keys())[0]
                 first_df = data[first_sheet_name]
-                return first_df.to_dict(orient='records')
+                return first_df.to_dict(orient="records")
             else:
                 raise FileOperationError("Unexpected data type returned from Excel file")
         except ContentValidationError:
             raise
         except Exception as e:
             raise FileOperationError(f"Failed to read XLSX: {str(e)}")
-    def write_xlsx(self, data: List[Dict], output_path: str, sheet_name: str = 'Sheet1') -> Dict[str, Any]:
+    def write_xlsx(self, data: List[Dict], output_path: str, sheet_name: str = "Sheet1") -> Dict[str, Any]:
         """
         Write content to an XLSX file.
@@ -523,7 +595,7 @@ class OfficeTool(BaseTool):
                 pd.DataFrame().to_excel(output_path, index=False, sheet_name=sheet_name)
             else:
                 pd.DataFrame(sanitized_data).to_excel(output_path, index=False, sheet_name=sheet_name)
-            return {'success': True, 'file_path': output_path}
+            return {"success": True, "file_path": output_path}
         except Exception as e:
             raise FileOperationError(f"Failed to write XLSX: {str(e)}")
@@ -545,38 +617,45 @@ class OfficeTool(BaseTool):
             file_ext = os.path.splitext(file_path)[1].lower()
             # Determine file type and validate
-            if file_ext == '.pdf':
-                file_type = 'pdf'
-            elif file_ext == '.docx':
-                file_type = 'docx'
-            elif file_ext == '.pptx':
-                file_type = 'pptx'
-            elif file_ext == '.xlsx':
-                file_type = 'xlsx'
-            elif file_ext in ['.png', '.jpg', '.jpeg', '.tiff', '.bmp', '.gif']:
-                file_type = 'image'
+            if file_ext == ".pdf":
+                file_type = "pdf"
+            elif file_ext == ".docx":
+                file_type = "docx"
+            elif file_ext == ".pptx":
+                file_type = "pptx"
+            elif file_ext == ".xlsx":
+                file_type = "xlsx"
+            elif file_ext in [
+                ".png",
+                ".jpg",
+                ".jpeg",
+                ".tiff",
+                ".bmp",
+                ".gif",
+            ]:
+                file_type = "image"
             else:
-                file_type = 'other'
+                file_type = "other"
             # Validate document structure
             self._validate_document(file_path, file_type)
             # Extract text based on file type
-            if file_type == 'pdf':
+            if file_type == "pdf":
                 return self._sanitize_text(self._extract_pdf_text(file_path))
-            elif file_type == 'docx':
+            elif file_type == "docx":
                 doc = DocxDocument(file_path)
                 paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
-                return self._sanitize_text('\n'.join(paragraphs))
-            elif file_type == 'pptx':
+                return self._sanitize_text("\n".join(paragraphs))
+            elif file_type == "pptx":
                 prs = Presentation(file_path)
                 texts = []
                 for slide in prs.slides:
                     for shape in slide.shapes:
-                        if hasattr(shape, 'text') and shape.text.strip():
+                        if hasattr(shape, "text") and shape.text.strip():
                             texts.append(shape.text)
-                return self._sanitize_text('\n'.join(texts))
-            elif file_type == 'xlsx':
+                return self._sanitize_text("\n".join(texts))
+            elif file_type == "xlsx":
                 data = pd.read_excel(file_path)
                 # Handle different return types from pd.read_excel()
                 if isinstance(data, pd.DataFrame):
@@ -587,8 +666,9 @@ class OfficeTool(BaseTool):
                     first_df = data[first_sheet_name]
                     return self._sanitize_text(first_df.to_string(index=False))
                 else:
-                    return self._sanitize_text("")  # Fallback for unexpected data types
-            elif file_type == 'image':
+                    # Fallback for unexpected data types
+                    return self._sanitize_text("")
+            elif file_type == "image":
                 return self._sanitize_text(self._extract_image_text(file_path))
             else:
                 # Use Tika as fallback for other formats

aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

Potentially problematic release.

aiecs 1.0.1py3-none-any.whl → 1.7.6py3-none-any.whl