aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show
  1. aiecs/__init__.py +13 -16
  2. aiecs/__main__.py +7 -7
  3. aiecs/aiecs_client.py +269 -75
  4. aiecs/application/executors/operation_executor.py +79 -54
  5. aiecs/application/knowledge_graph/__init__.py +7 -0
  6. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  7. aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
  8. aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
  11. aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
  12. aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
  13. aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
  14. aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
  15. aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
  16. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  17. aiecs/application/knowledge_graph/extractors/base.py +98 -0
  18. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
  19. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
  20. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
  21. aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
  22. aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
  23. aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
  24. aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
  25. aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
  26. aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
  27. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
  28. aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
  29. aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
  30. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
  31. aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
  32. aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
  33. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
  34. aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
  35. aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
  36. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  37. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
  38. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
  39. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  40. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
  41. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  42. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  43. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
  44. aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
  45. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
  46. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  47. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  48. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
  49. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
  50. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
  51. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
  52. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
  53. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
  54. aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
  55. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
  56. aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
  57. aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
  58. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
  59. aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
  60. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  61. aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
  62. aiecs/application/knowledge_graph/search/reranker.py +293 -0
  63. aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
  64. aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
  65. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  66. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
  67. aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
  68. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  69. aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
  70. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  71. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
  72. aiecs/common/__init__.py +9 -0
  73. aiecs/common/knowledge_graph/__init__.py +17 -0
  74. aiecs/common/knowledge_graph/runnable.py +471 -0
  75. aiecs/config/__init__.py +20 -5
  76. aiecs/config/config.py +762 -31
  77. aiecs/config/graph_config.py +131 -0
  78. aiecs/config/tool_config.py +399 -0
  79. aiecs/core/__init__.py +29 -13
  80. aiecs/core/interface/__init__.py +2 -2
  81. aiecs/core/interface/execution_interface.py +22 -22
  82. aiecs/core/interface/storage_interface.py +37 -88
  83. aiecs/core/registry/__init__.py +31 -0
  84. aiecs/core/registry/service_registry.py +92 -0
  85. aiecs/domain/__init__.py +270 -1
  86. aiecs/domain/agent/__init__.py +191 -0
  87. aiecs/domain/agent/base_agent.py +3870 -0
  88. aiecs/domain/agent/exceptions.py +99 -0
  89. aiecs/domain/agent/graph_aware_mixin.py +569 -0
  90. aiecs/domain/agent/hybrid_agent.py +1435 -0
  91. aiecs/domain/agent/integration/__init__.py +29 -0
  92. aiecs/domain/agent/integration/context_compressor.py +216 -0
  93. aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
  94. aiecs/domain/agent/integration/protocols.py +281 -0
  95. aiecs/domain/agent/integration/retry_policy.py +218 -0
  96. aiecs/domain/agent/integration/role_config.py +213 -0
  97. aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
  98. aiecs/domain/agent/lifecycle.py +291 -0
  99. aiecs/domain/agent/llm_agent.py +692 -0
  100. aiecs/domain/agent/memory/__init__.py +12 -0
  101. aiecs/domain/agent/memory/conversation.py +1124 -0
  102. aiecs/domain/agent/migration/__init__.py +14 -0
  103. aiecs/domain/agent/migration/conversion.py +163 -0
  104. aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
  105. aiecs/domain/agent/models.py +884 -0
  106. aiecs/domain/agent/observability.py +479 -0
  107. aiecs/domain/agent/persistence.py +449 -0
  108. aiecs/domain/agent/prompts/__init__.py +29 -0
  109. aiecs/domain/agent/prompts/builder.py +159 -0
  110. aiecs/domain/agent/prompts/formatters.py +187 -0
  111. aiecs/domain/agent/prompts/template.py +255 -0
  112. aiecs/domain/agent/registry.py +253 -0
  113. aiecs/domain/agent/tool_agent.py +444 -0
  114. aiecs/domain/agent/tools/__init__.py +15 -0
  115. aiecs/domain/agent/tools/schema_generator.py +364 -0
  116. aiecs/domain/community/__init__.py +155 -0
  117. aiecs/domain/community/agent_adapter.py +469 -0
  118. aiecs/domain/community/analytics.py +432 -0
  119. aiecs/domain/community/collaborative_workflow.py +648 -0
  120. aiecs/domain/community/communication_hub.py +634 -0
  121. aiecs/domain/community/community_builder.py +320 -0
  122. aiecs/domain/community/community_integration.py +796 -0
  123. aiecs/domain/community/community_manager.py +803 -0
  124. aiecs/domain/community/decision_engine.py +849 -0
  125. aiecs/domain/community/exceptions.py +231 -0
  126. aiecs/domain/community/models/__init__.py +33 -0
  127. aiecs/domain/community/models/community_models.py +234 -0
  128. aiecs/domain/community/resource_manager.py +461 -0
  129. aiecs/domain/community/shared_context_manager.py +589 -0
  130. aiecs/domain/context/__init__.py +40 -10
  131. aiecs/domain/context/context_engine.py +1910 -0
  132. aiecs/domain/context/conversation_models.py +87 -53
  133. aiecs/domain/context/graph_memory.py +582 -0
  134. aiecs/domain/execution/model.py +12 -4
  135. aiecs/domain/knowledge_graph/__init__.py +19 -0
  136. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  137. aiecs/domain/knowledge_graph/models/entity.py +148 -0
  138. aiecs/domain/knowledge_graph/models/evidence.py +178 -0
  139. aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
  140. aiecs/domain/knowledge_graph/models/path.py +171 -0
  141. aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
  142. aiecs/domain/knowledge_graph/models/query.py +261 -0
  143. aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
  144. aiecs/domain/knowledge_graph/models/relation.py +202 -0
  145. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  146. aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
  147. aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
  148. aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
  149. aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
  150. aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
  151. aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
  152. aiecs/domain/task/dsl_processor.py +172 -56
  153. aiecs/domain/task/model.py +20 -8
  154. aiecs/domain/task/task_context.py +27 -24
  155. aiecs/infrastructure/__init__.py +0 -2
  156. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  157. aiecs/infrastructure/graph_storage/base.py +837 -0
  158. aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
  159. aiecs/infrastructure/graph_storage/cache.py +424 -0
  160. aiecs/infrastructure/graph_storage/distributed.py +223 -0
  161. aiecs/infrastructure/graph_storage/error_handling.py +380 -0
  162. aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
  163. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  164. aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
  165. aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
  166. aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
  167. aiecs/infrastructure/graph_storage/metrics.py +344 -0
  168. aiecs/infrastructure/graph_storage/migration.py +400 -0
  169. aiecs/infrastructure/graph_storage/pagination.py +483 -0
  170. aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
  171. aiecs/infrastructure/graph_storage/postgres.py +1563 -0
  172. aiecs/infrastructure/graph_storage/property_storage.py +353 -0
  173. aiecs/infrastructure/graph_storage/protocols.py +76 -0
  174. aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
  175. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  176. aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
  177. aiecs/infrastructure/graph_storage/streaming.py +487 -0
  178. aiecs/infrastructure/graph_storage/tenant.py +412 -0
  179. aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
  180. aiecs/infrastructure/messaging/websocket_manager.py +51 -35
  181. aiecs/infrastructure/monitoring/__init__.py +22 -0
  182. aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
  183. aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
  184. aiecs/infrastructure/monitoring/structured_logger.py +3 -7
  185. aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
  186. aiecs/infrastructure/persistence/__init__.py +14 -1
  187. aiecs/infrastructure/persistence/context_engine_client.py +184 -0
  188. aiecs/infrastructure/persistence/database_manager.py +67 -43
  189. aiecs/infrastructure/persistence/file_storage.py +180 -103
  190. aiecs/infrastructure/persistence/redis_client.py +74 -21
  191. aiecs/llm/__init__.py +73 -25
  192. aiecs/llm/callbacks/__init__.py +11 -0
  193. aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
  194. aiecs/llm/client_factory.py +224 -36
  195. aiecs/llm/client_resolver.py +155 -0
  196. aiecs/llm/clients/__init__.py +38 -0
  197. aiecs/llm/clients/base_client.py +324 -0
  198. aiecs/llm/clients/google_function_calling_mixin.py +457 -0
  199. aiecs/llm/clients/googleai_client.py +241 -0
  200. aiecs/llm/clients/openai_client.py +158 -0
  201. aiecs/llm/clients/openai_compatible_mixin.py +367 -0
  202. aiecs/llm/clients/vertex_client.py +897 -0
  203. aiecs/llm/clients/xai_client.py +201 -0
  204. aiecs/llm/config/__init__.py +51 -0
  205. aiecs/llm/config/config_loader.py +272 -0
  206. aiecs/llm/config/config_validator.py +206 -0
  207. aiecs/llm/config/model_config.py +143 -0
  208. aiecs/llm/protocols.py +149 -0
  209. aiecs/llm/utils/__init__.py +10 -0
  210. aiecs/llm/utils/validate_config.py +89 -0
  211. aiecs/main.py +140 -121
  212. aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
  213. aiecs/scripts/aid/__init__.py +19 -0
  214. aiecs/scripts/aid/module_checker.py +499 -0
  215. aiecs/scripts/aid/version_manager.py +235 -0
  216. aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
  217. aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
  218. aiecs/scripts/dependance_check/__init__.py +15 -0
  219. aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
  220. aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
  221. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
  222. aiecs/scripts/dependance_patch/__init__.py +7 -0
  223. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  224. aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
  225. aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
  226. aiecs/scripts/knowledge_graph/__init__.py +3 -0
  227. aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
  228. aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
  229. aiecs/scripts/tools_develop/README.md +671 -0
  230. aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
  231. aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
  232. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  233. aiecs/scripts/tools_develop/__init__.py +21 -0
  234. aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
  235. aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
  236. aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
  237. aiecs/scripts/tools_develop/schema_coverage.py +511 -0
  238. aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
  239. aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
  240. aiecs/scripts/tools_develop/verify_tools.py +352 -0
  241. aiecs/tasks/__init__.py +0 -1
  242. aiecs/tasks/worker.py +115 -47
  243. aiecs/tools/__init__.py +194 -72
  244. aiecs/tools/apisource/__init__.py +99 -0
  245. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  246. aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
  247. aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
  248. aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
  249. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  250. aiecs/tools/apisource/monitoring/metrics.py +330 -0
  251. aiecs/tools/apisource/providers/__init__.py +112 -0
  252. aiecs/tools/apisource/providers/base.py +671 -0
  253. aiecs/tools/apisource/providers/census.py +397 -0
  254. aiecs/tools/apisource/providers/fred.py +535 -0
  255. aiecs/tools/apisource/providers/newsapi.py +409 -0
  256. aiecs/tools/apisource/providers/worldbank.py +352 -0
  257. aiecs/tools/apisource/reliability/__init__.py +12 -0
  258. aiecs/tools/apisource/reliability/error_handler.py +363 -0
  259. aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
  260. aiecs/tools/apisource/tool.py +832 -0
  261. aiecs/tools/apisource/utils/__init__.py +9 -0
  262. aiecs/tools/apisource/utils/validators.py +334 -0
  263. aiecs/tools/base_tool.py +415 -21
  264. aiecs/tools/docs/__init__.py +121 -0
  265. aiecs/tools/docs/ai_document_orchestrator.py +607 -0
  266. aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
  267. aiecs/tools/docs/content_insertion_tool.py +1320 -0
  268. aiecs/tools/docs/document_creator_tool.py +1323 -0
  269. aiecs/tools/docs/document_layout_tool.py +1160 -0
  270. aiecs/tools/docs/document_parser_tool.py +1011 -0
  271. aiecs/tools/docs/document_writer_tool.py +1829 -0
  272. aiecs/tools/knowledge_graph/__init__.py +17 -0
  273. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
  274. aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
  275. aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
  276. aiecs/tools/langchain_adapter.py +300 -138
  277. aiecs/tools/schema_generator.py +455 -0
  278. aiecs/tools/search_tool/__init__.py +100 -0
  279. aiecs/tools/search_tool/analyzers.py +581 -0
  280. aiecs/tools/search_tool/cache.py +264 -0
  281. aiecs/tools/search_tool/constants.py +128 -0
  282. aiecs/tools/search_tool/context.py +224 -0
  283. aiecs/tools/search_tool/core.py +778 -0
  284. aiecs/tools/search_tool/deduplicator.py +119 -0
  285. aiecs/tools/search_tool/error_handler.py +242 -0
  286. aiecs/tools/search_tool/metrics.py +343 -0
  287. aiecs/tools/search_tool/rate_limiter.py +172 -0
  288. aiecs/tools/search_tool/schemas.py +275 -0
  289. aiecs/tools/statistics/__init__.py +80 -0
  290. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
  291. aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
  292. aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
  293. aiecs/tools/statistics/data_loader_tool.py +555 -0
  294. aiecs/tools/statistics/data_profiler_tool.py +638 -0
  295. aiecs/tools/statistics/data_transformer_tool.py +580 -0
  296. aiecs/tools/statistics/data_visualizer_tool.py +498 -0
  297. aiecs/tools/statistics/model_trainer_tool.py +507 -0
  298. aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
  299. aiecs/tools/task_tools/__init__.py +49 -36
  300. aiecs/tools/task_tools/chart_tool.py +200 -184
  301. aiecs/tools/task_tools/classfire_tool.py +268 -267
  302. aiecs/tools/task_tools/image_tool.py +175 -131
  303. aiecs/tools/task_tools/office_tool.py +226 -146
  304. aiecs/tools/task_tools/pandas_tool.py +477 -121
  305. aiecs/tools/task_tools/report_tool.py +390 -142
  306. aiecs/tools/task_tools/research_tool.py +149 -79
  307. aiecs/tools/task_tools/scraper_tool.py +339 -145
  308. aiecs/tools/task_tools/stats_tool.py +448 -209
  309. aiecs/tools/temp_file_manager.py +26 -24
  310. aiecs/tools/tool_executor/__init__.py +18 -16
  311. aiecs/tools/tool_executor/tool_executor.py +364 -52
  312. aiecs/utils/LLM_output_structor.py +74 -48
  313. aiecs/utils/__init__.py +14 -3
  314. aiecs/utils/base_callback.py +0 -3
  315. aiecs/utils/cache_provider.py +696 -0
  316. aiecs/utils/execution_utils.py +50 -31
  317. aiecs/utils/prompt_loader.py +1 -0
  318. aiecs/utils/token_usage_repository.py +37 -11
  319. aiecs/ws/socket_server.py +14 -4
  320. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
  321. aiecs-1.7.6.dist-info/RECORD +337 -0
  322. aiecs-1.7.6.dist-info/entry_points.txt +13 -0
  323. aiecs/config/registry.py +0 -19
  324. aiecs/domain/context/content_engine.py +0 -982
  325. aiecs/llm/base_client.py +0 -99
  326. aiecs/llm/openai_client.py +0 -125
  327. aiecs/llm/vertex_client.py +0 -186
  328. aiecs/llm/xai_client.py +0 -184
  329. aiecs/scripts/dependency_checker.py +0 -857
  330. aiecs/scripts/quick_dependency_check.py +0 -269
  331. aiecs/tools/task_tools/search_api.py +0 -7
  332. aiecs-1.0.1.dist-info/RECORD +0 -90
  333. aiecs-1.0.1.dist-info/entry_points.txt +0 -7
  334. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  335. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  336. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  337. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  338. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
  339. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
  340. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
@@ -1,138 +1,109 @@
1
+ from aiecs.tools import register_tool
2
+ from aiecs.tools.base_tool import BaseTool
3
+ from pydantic import BaseModel, field_validator, Field
4
+ from pydantic_settings import BaseSettings, SettingsConfigDict
5
+ from pptx.util import Inches
6
+ from pptx import Presentation
7
+ from docx.shared import Pt
8
+ from docx import Document as DocxDocument
9
+ from tika import parser # type: ignore[import-untyped]
1
10
  import os
2
11
  import logging
12
+ import warnings
3
13
  from typing import List, Dict, Optional, Any
4
14
 
5
- import pandas as pd
15
+ import pandas as pd # type: ignore[import-untyped]
6
16
  import pdfplumber
7
- import pytesseract
17
+ import pytesseract # type: ignore[import-untyped]
8
18
  from PIL import Image
9
- from tika import parser
10
- from docx import Document as DocxDocument
11
- from docx.shared import Pt
12
- from pptx import Presentation
13
- from pptx.util import Inches
14
- from pydantic import BaseModel, field_validator, ValidationError, ConfigDict
15
- from pydantic_settings import BaseSettings
16
19
 
17
- from aiecs.tools.base_tool import BaseTool
18
- from aiecs.tools import register_tool
20
+ # Tika log path will be configured via Config class
21
+
22
+ # Suppress pkg_resources deprecation warning from tika
23
+ warnings.filterwarnings("ignore", category=UserWarning, module="tika")
19
24
 
20
- # Configuration for OfficeTool
21
- class OfficeSettings(BaseSettings):
22
- """
23
- Configuration for OfficeTool.
24
-
25
- Attributes:
26
- max_file_size_mb (int): Maximum file size in megabytes.
27
- default_font (str): Default font for documents.
28
- default_font_size (int): Default font size in points.
29
- allowed_extensions (List[str]): Allowed document file extensions.
30
- env_prefix (str): Environment variable prefix for settings.
31
- """
32
- max_file_size_mb: int = 100
33
- default_font: str = "Arial"
34
- default_font_size: int = 12
35
- allowed_extensions: List[str] = ['.docx', '.pptx', '.xlsx', '.pdf', '.png', '.jpg', '.jpeg', '.tiff', '.bmp', '.gif']
36
- env_prefix: str = 'OFFICE_TOOL_'
37
25
 
38
- model_config = ConfigDict(env_prefix='OFFICE_TOOL_')
26
+ # Module-level default configuration for validators
27
+ _DEFAULT_MAX_FILE_SIZE_MB = 100
28
+ _DEFAULT_ALLOWED_EXTENSIONS = [
29
+ ".docx",
30
+ ".pptx",
31
+ ".xlsx",
32
+ ".pdf",
33
+ ".png",
34
+ ".jpg",
35
+ ".jpeg",
36
+ ".tiff",
37
+ ".bmp",
38
+ ".gif",
39
+ ]
39
40
 
40
41
  # Exceptions
42
+
43
+
41
44
  class OfficeToolError(Exception):
42
45
  """Base exception for OfficeTool errors."""
43
- pass
46
+
44
47
 
45
48
  class InputValidationError(OfficeToolError):
46
49
  """Raised when input validation fails."""
47
- pass
50
+
48
51
 
49
52
  class FileOperationError(OfficeToolError):
50
53
  """Raised when file operations fail."""
51
- pass
54
+
52
55
 
53
56
  class SecurityError(OfficeToolError):
54
57
  """Raised for security-related issues."""
55
- pass
58
+
56
59
 
57
60
  class ContentValidationError(OfficeToolError):
58
61
  """Raised when document content validation fails."""
59
- pass
62
+
60
63
 
61
64
  # Base schema for common fields
65
+
66
+
62
67
  class BaseFileSchema(BaseModel):
63
68
  file_path: Optional[str] = None
64
69
  output_path: Optional[str] = None
65
70
  image_path: Optional[str] = None
66
71
 
67
- @field_validator('file_path', 'output_path', 'image_path')
72
+ @field_validator("file_path", "output_path", "image_path")
68
73
  def validate_path(cls, v: Optional[str], field) -> Optional[str]:
69
74
  """Validate file paths for existence, size, extension, and path traversal."""
70
75
  if not v:
71
76
  return v
72
- settings = OfficeSettings()
73
77
  abs_path = os.path.abspath(os.path.normpath(v))
74
78
  # Check for path traversal
75
- if '..' in v or '~' in v or '%' in v:
79
+ if ".." in v or "~" in v or "%" in v:
76
80
  raise SecurityError(f"Path traversal attempt detected: {v}")
77
81
  # Ensure path is in allowed directories
78
82
  base_dir = os.path.abspath(os.getcwd())
79
- allowed_dirs = [os.path.abspath(os.path.normpath(d)) for d in ['/tmp', './data', './uploads']]
83
+ allowed_dirs = [os.path.abspath(os.path.normpath(d)) for d in ["/tmp", "./data", "./uploads"]]
80
84
  if not abs_path.startswith(base_dir) and not any(abs_path.startswith(d) for d in allowed_dirs):
81
85
  raise SecurityError(f"Path not in allowed directories: {abs_path}")
82
86
  # Check extension
83
87
  ext = os.path.splitext(abs_path)[1].lower()
84
- if ext not in settings.allowed_extensions:
85
- raise SecurityError(f"Extension '{ext}' not allowed for '{field.field_name}', expected {settings.allowed_extensions}")
88
+ if ext not in _DEFAULT_ALLOWED_EXTENSIONS:
89
+ raise SecurityError(f"Extension '{ext}' not allowed for '{field.field_name}', expected {_DEFAULT_ALLOWED_EXTENSIONS}")
86
90
  # Check file existence and size for input paths
87
- if field.field_name == 'file_path':
91
+ if field.field_name == "file_path":
88
92
  if not os.path.isfile(abs_path):
89
93
  raise FileOperationError(f"{field.field_name}: File not found: {abs_path}")
90
94
  size_mb = os.path.getsize(abs_path) / (1024 * 1024)
91
- if size_mb > settings.max_file_size_mb:
92
- raise FileOperationError(f"{field.field_name}: File too large: {size_mb:.1f}MB, max {settings.max_file_size_mb}MB")
95
+ if size_mb > _DEFAULT_MAX_FILE_SIZE_MB:
96
+ raise FileOperationError(f"{field.field_name}: File too large: {size_mb:.1f}MB, max {_DEFAULT_MAX_FILE_SIZE_MB}MB")
93
97
  # Check for existing output paths
94
- elif field.field_name == 'output_path' and os.path.exists(abs_path):
98
+ elif field.field_name == "output_path" and os.path.exists(abs_path):
95
99
  raise FileOperationError(f"{field.field_name}: File already exists: {abs_path}")
96
100
  return abs_path
97
101
 
98
- # Schemas for operations
99
- class ReadDocxSchema(BaseFileSchema):
100
- """Schema for reading DOCX files."""
101
- file_path: str
102
- include_tables: bool = False
103
-
104
- class WriteDocxSchema(BaseFileSchema):
105
- """Schema for writing DOCX files."""
106
- text: str
107
- output_path: str
108
- table_data: Optional[List[List[str]]] = None
109
-
110
- class ReadPptxSchema(BaseFileSchema):
111
- """Schema for reading PPTX files."""
112
- file_path: str
113
-
114
- class WritePptxSchema(BaseFileSchema):
115
- """Schema for writing PPTX files."""
116
- slides: List[str]
117
- output_path: str
118
- image_path: Optional[str] = None
119
-
120
- class ReadXlsxSchema(BaseFileSchema):
121
- """Schema for reading XLSX files."""
122
- file_path: str
123
- sheet_name: Optional[str] = None
124
102
 
125
- class WriteXlsxSchema(BaseFileSchema):
126
- """Schema for writing XLSX files."""
127
- data: List[Dict]
128
- output_path: str
129
- sheet_name: str = 'Sheet1'
103
+ # Schemas for operations - moved to OfficeTool class as inner classes
130
104
 
131
- class ExtractTextSchema(BaseFileSchema):
132
- """Schema for extracting text from files."""
133
- file_path: str
134
105
 
135
- @register_tool('office')
106
+ @register_tool("office")
136
107
  class OfficeTool(BaseTool):
137
108
  """
138
109
  Office document processing tool supporting:
@@ -146,27 +117,115 @@ class OfficeTool(BaseTool):
146
117
 
147
118
  Inherits from BaseTool to leverage ToolExecutor for caching, concurrency, and error handling.
148
119
  """
149
- def __init__(self, config: Optional[Dict[str, Any]] = None):
120
+
121
+ # Configuration schema
122
+ class Config(BaseSettings):
123
+ """Configuration for the office tool
124
+
125
+ Automatically reads from environment variables with OFFICE_TOOL_ prefix.
126
+ Example: OFFICE_TOOL_MAX_FILE_SIZE_MB -> max_file_size_mb
150
127
  """
151
- Initialize OfficeTool with settings.
128
+
129
+ model_config = SettingsConfigDict(env_prefix="OFFICE_TOOL_")
130
+
131
+ max_file_size_mb: int = Field(default=100, description="Maximum file size in megabytes")
132
+ default_font: str = Field(default="Arial", description="Default font for documents")
133
+ default_font_size: int = Field(default=12, description="Default font size in points")
134
+ tika_log_path: str = Field(
135
+ default=os.path.expanduser("~/.cache/tika"),
136
+ description="Tika log directory path",
137
+ )
138
+ allowed_extensions: List[str] = Field(
139
+ default=[
140
+ ".docx",
141
+ ".pptx",
142
+ ".xlsx",
143
+ ".pdf",
144
+ ".png",
145
+ ".jpg",
146
+ ".jpeg",
147
+ ".tiff",
148
+ ".bmp",
149
+ ".gif",
150
+ ],
151
+ description="Allowed document file extensions",
152
+ )
153
+
154
+ # Schema definitions
155
+ class Read_docxSchema(BaseFileSchema):
156
+ """Schema for read_docx operation"""
157
+
158
+ file_path: str = Field(description="Path to the DOCX file to read")
159
+ include_tables: bool = Field(default=False, description="Whether to include table data in the output. If True, tables are included as nested lists")
160
+
161
+ class Write_docxSchema(BaseFileSchema):
162
+ """Schema for write_docx operation"""
163
+
164
+ text: str = Field(description="Text content to write to the DOCX file")
165
+ output_path: str = Field(description="Path where the DOCX file will be saved")
166
+ table_data: Optional[List[List[str]]] = Field(default=None, description="Optional table data to include in the document. Each inner list represents a row, each string represents a cell")
167
+
168
+ class Read_pptxSchema(BaseFileSchema):
169
+ """Schema for read_pptx operation"""
170
+
171
+ file_path: str = Field(description="Path to the PPTX file to read")
172
+
173
+ class Write_pptxSchema(BaseFileSchema):
174
+ """Schema for write_pptx operation"""
175
+
176
+ slides: List[str] = Field(description="List of slide content strings. Each string becomes a slide")
177
+ output_path: str = Field(description="Path where the PPTX file will be saved")
178
+ image_path: Optional[str] = Field(default=None, description="Optional path to an image file to include on the first slide")
179
+
180
+ class Read_xlsxSchema(BaseFileSchema):
181
+ """Schema for read_xlsx operation"""
182
+
183
+ file_path: str = Field(description="Path to the XLSX file to read")
184
+ sheet_name: Optional[str] = Field(default=None, description="Optional name of the sheet to read. If None, reads the first sheet")
185
+
186
+ class Write_xlsxSchema(BaseFileSchema):
187
+ """Schema for write_xlsx operation"""
188
+
189
+ data: List[Dict[str, Any]] = Field(description="List of dictionaries representing Excel rows. Each dictionary key becomes a column header, values become cell data")
190
+ output_path: str = Field(description="Path where the XLSX file will be saved")
191
+ sheet_name: str = Field(default="Sheet1", description="Name of the Excel sheet to create")
192
+
193
+ class Extract_textSchema(BaseFileSchema):
194
+ """Schema for extract_text operation"""
195
+
196
+ file_path: str = Field(description="Path to the file to extract text from. Supports DOCX, PPTX, XLSX, PDF, and image formats")
197
+
198
+ def __init__(self, config: Optional[Dict[str, Any]] = None, **kwargs):
199
+ """
200
+ Initialize OfficeTool with configuration.
201
+
202
+ Configuration is automatically loaded by BaseTool from:
203
+ 1. Explicit config dict (highest priority)
204
+ 2. YAML config files (config/tools/office_tool.yaml)
205
+ 3. Environment variables (via dotenv from .env files)
206
+ 4. Tool defaults (lowest priority)
152
207
 
153
208
  Args:
154
- config (Dict, optional): Configuration overrides for OfficeSettings.
209
+ config (Dict, optional): Configuration overrides for OfficeTool.
210
+ **kwargs: Additional arguments passed to BaseTool (e.g., tool_name)
155
211
 
156
212
  Raises:
157
213
  ValueError: If config contains invalid settings.
158
214
  """
159
- super().__init__(config)
160
- self.settings = OfficeSettings()
161
- if config:
162
- try:
163
- self.settings = self.settings.model_validate({**self.settings.model_dump(), **config})
164
- except ValidationError as e:
165
- raise ValueError(f"Invalid configuration: {e}")
215
+ super().__init__(config, **kwargs)
216
+
217
+ # Configuration is automatically loaded by BaseTool into self._config_obj
218
+ # Access config via self._config_obj (BaseSettings instance)
219
+ self.config = self._config_obj if self._config_obj else self.Config()
220
+
221
+ # Configure Tika log path from config
222
+ os.environ["TIKA_LOG_PATH"] = self.config.tika_log_path
223
+ os.makedirs(self.config.tika_log_path, exist_ok=True)
224
+
166
225
  self.logger = logging.getLogger(__name__)
167
226
  if not self.logger.handlers:
168
227
  handler = logging.StreamHandler()
169
- handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
228
+ handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
170
229
  self.logger.addHandler(handler)
171
230
  self.logger.setLevel(logging.INFO)
172
231
 
@@ -182,28 +241,29 @@ class OfficeTool(BaseTool):
182
241
  ContentValidationError: If document structure is invalid.
183
242
  """
184
243
  try:
185
- if file_type == 'docx':
244
+ if file_type == "docx":
186
245
  doc = DocxDocument(file_path)
187
- if not hasattr(doc, 'paragraphs'):
246
+ if not hasattr(doc, "paragraphs"):
188
247
  raise ContentValidationError("Invalid DOCX structure")
189
- elif file_type == 'pptx':
248
+ elif file_type == "pptx":
190
249
  prs = Presentation(file_path)
191
- if not hasattr(prs, 'slides'):
250
+ if not hasattr(prs, "slides"):
192
251
  raise ContentValidationError("Invalid PPTX structure")
193
- elif file_type == 'xlsx':
194
- # Just validate that file can be read - don't care about return type
252
+ elif file_type == "xlsx":
253
+ # Just validate that file can be read - don't care about return
254
+ # type
195
255
  pd.read_excel(file_path, nrows=5)
196
- elif file_type == 'pdf':
256
+ elif file_type == "pdf":
197
257
  with pdfplumber.open(file_path) as pdf:
198
258
  if len(pdf.pages) == 0:
199
259
  raise ContentValidationError("PDF has no pages")
200
- elif file_type == 'image':
260
+ elif file_type == "image":
201
261
  img = Image.open(file_path)
202
262
  img.verify() # Verify it's a valid image
203
263
  else:
204
264
  # Use tika as fallback for other formats
205
265
  parsed = parser.from_file(file_path)
206
- if not parsed or not parsed.get('content'):
266
+ if not parsed or not parsed.get("content"):
207
267
  raise ContentValidationError("Unable to parse file content")
208
268
  except Exception as e:
209
269
  raise ContentValidationError(f"Invalid {file_type.upper()} file: {str(e)}")
@@ -220,7 +280,7 @@ class OfficeTool(BaseTool):
220
280
  """
221
281
  if not text:
222
282
  return ""
223
- return ''.join(char for char in text if ord(char) >= 32 or char in '\n\r\t')
283
+ return "".join(char for char in text if ord(char) >= 32 or char in "\n\r\t")
224
284
 
225
285
  def _sanitize_table_data(self, table_data: Optional[List[List[str]]]) -> Optional[List[List[str]]]:
226
286
  """
@@ -252,7 +312,8 @@ class OfficeTool(BaseTool):
252
312
  for item in data_list:
253
313
  clean_item = {}
254
314
  for k, v in item.items():
255
- clean_key = self._sanitize_text(str(k))[:255] # Excel key limit with sanitization
315
+ # Excel key limit with sanitization
316
+ clean_key = self._sanitize_text(str(k))[:255]
256
317
  if isinstance(v, str):
257
318
  clean_value = self._sanitize_text(v)[:32767] # Excel cell limit
258
319
  else:
@@ -281,7 +342,7 @@ class OfficeTool(BaseTool):
281
342
  page_text = page.extract_text()
282
343
  if page_text:
283
344
  text_content.append(page_text)
284
- return '\n'.join(text_content)
345
+ return "\n".join(text_content)
285
346
  except Exception as e:
286
347
  raise FileOperationError(f"Failed to extract PDF text: {str(e)}")
287
348
 
@@ -299,11 +360,11 @@ class OfficeTool(BaseTool):
299
360
  FileOperationError: If image text extraction fails.
300
361
  """
301
362
  try:
302
- image = Image.open(file_path)
363
+ image: Image.Image = Image.open(file_path)
303
364
  # Convert to RGB if necessary
304
- if image.mode != 'RGB':
305
- image = image.convert('RGB')
306
- text = pytesseract.image_to_string(image, lang='eng+chi_sim')
365
+ if image.mode != "RGB":
366
+ image = image.convert("RGB")
367
+ text = pytesseract.image_to_string(image, lang="eng+chi_sim")
307
368
  return text.strip()
308
369
  except Exception as e:
309
370
  raise FileOperationError(f"Failed to extract image text: {str(e)}")
@@ -323,7 +384,7 @@ class OfficeTool(BaseTool):
323
384
  """
324
385
  try:
325
386
  parsed = parser.from_file(file_path)
326
- content = parsed.get('content', '')
387
+ content = parsed.get("content", "")
327
388
  return content.strip() if content else ""
328
389
  except Exception as e:
329
390
  raise FileOperationError(f"Failed to extract text with Tika: {str(e)}")
@@ -344,19 +405,24 @@ class OfficeTool(BaseTool):
344
405
  ContentValidationError: If document structure is invalid.
345
406
  """
346
407
  try:
347
- self._validate_document(file_path, 'docx')
408
+ self._validate_document(file_path, "docx")
348
409
  doc = DocxDocument(file_path)
349
410
  paras = [p.text for p in doc.paragraphs if p.text.strip()]
350
411
  tables = None
351
412
  if include_tables:
352
413
  tables = [[[cell.text for cell in row.cells] for row in table.rows] for table in doc.tables]
353
- return {'paragraphs': paras, 'tables': tables}
414
+ return {"paragraphs": paras, "tables": tables}
354
415
  except ContentValidationError:
355
416
  raise
356
417
  except Exception as e:
357
418
  raise FileOperationError(f"Failed to read DOCX: {str(e)}")
358
419
 
359
- def write_docx(self, text: str, output_path: str, table_data: Optional[List[List[str]]] = None) -> Dict[str, Any]:
420
+ def write_docx(
421
+ self,
422
+ text: str,
423
+ output_path: str,
424
+ table_data: Optional[List[List[str]]] = None,
425
+ ) -> Dict[str, Any]:
360
426
  """
361
427
  Write content to a DOCX file.
362
428
 
@@ -375,9 +441,9 @@ class OfficeTool(BaseTool):
375
441
  sanitized_text = self._sanitize_text(text)
376
442
  sanitized_table_data = self._sanitize_table_data(table_data)
377
443
  doc = DocxDocument()
378
- style = doc.styles['Normal']
379
- style.font.name = self.settings.default_font
380
- style.font.size = Pt(self.settings.default_font_size)
444
+ style = doc.styles["Normal"]
445
+ style.font.name = self.config.default_font
446
+ style.font.size = Pt(self.config.default_font_size)
381
447
  for line in sanitized_text.splitlines():
382
448
  doc.add_paragraph(line)
383
449
  if sanitized_table_data and sanitized_table_data[0]:
@@ -389,9 +455,10 @@ class OfficeTool(BaseTool):
389
455
  if j < len(row):
390
456
  table.rows[i].cells[j].text = str(row[j])
391
457
  else:
392
- table.rows[i].cells[j].text = "" # Empty cell for missing data
458
+ # Empty cell for missing data
459
+ table.rows[i].cells[j].text = ""
393
460
  doc.save(output_path)
394
- return {'success': True, 'file_path': output_path}
461
+ return {"success": True, "file_path": output_path}
395
462
  except Exception as e:
396
463
  raise FileOperationError(f"Failed to write DOCX: {str(e)}")
397
464
 
@@ -410,12 +477,12 @@ class OfficeTool(BaseTool):
410
477
  ContentValidationError: If document structure is invalid.
411
478
  """
412
479
  try:
413
- self._validate_document(file_path, 'pptx')
480
+ self._validate_document(file_path, "pptx")
414
481
  prs = Presentation(file_path)
415
482
  texts = []
416
483
  for slide in prs.slides:
417
484
  for shape in slide.shapes:
418
- if hasattr(shape, 'text'):
485
+ if hasattr(shape, "text"):
419
486
  txt = shape.text.strip()
420
487
  if txt:
421
488
  texts.append(txt)
@@ -425,7 +492,12 @@ class OfficeTool(BaseTool):
425
492
  except Exception as e:
426
493
  raise FileOperationError(f"Failed to read PPTX: {str(e)}")
427
494
 
428
- def write_pptx(self, slides: List[str], output_path: str, image_path: Optional[str] = None) -> Dict[str, Any]:
495
+ def write_pptx(
496
+ self,
497
+ slides: List[str],
498
+ output_path: str,
499
+ image_path: Optional[str] = None,
500
+ ) -> Dict[str, Any]:
429
501
  """
430
502
  Write content to a PPTX file.
431
503
 
@@ -462,7 +534,7 @@ class OfficeTool(BaseTool):
462
534
  except Exception as img_err:
463
535
  self.logger.warning(f"Could not add image to slide: {img_err}")
464
536
  prs.save(output_path)
465
- return {'success': True, 'file_path': output_path}
537
+ return {"success": True, "file_path": output_path}
466
538
  except Exception as e:
467
539
  raise FileOperationError(f"Failed to write PPTX: {str(e)}")
468
540
 
@@ -482,27 +554,27 @@ class OfficeTool(BaseTool):
482
554
  ContentValidationError: If document structure is invalid.
483
555
  """
484
556
  try:
485
- self._validate_document(file_path, 'xlsx')
557
+ self._validate_document(file_path, "xlsx")
486
558
  data = pd.read_excel(file_path, sheet_name=sheet_name)
487
-
559
+
488
560
  # Handle different return types from pd.read_excel()
489
561
  if isinstance(data, pd.DataFrame):
490
562
  # Single sheet or specific sheet requested
491
- return data.to_dict(orient='records')
563
+ return data.to_dict(orient="records")
492
564
  elif isinstance(data, dict):
493
565
  # Multiple sheets returned as dict - use the first sheet
494
566
  first_sheet_name = list(data.keys())[0]
495
567
  first_df = data[first_sheet_name]
496
- return first_df.to_dict(orient='records')
568
+ return first_df.to_dict(orient="records")
497
569
  else:
498
570
  raise FileOperationError("Unexpected data type returned from Excel file")
499
-
571
+
500
572
  except ContentValidationError:
501
573
  raise
502
574
  except Exception as e:
503
575
  raise FileOperationError(f"Failed to read XLSX: {str(e)}")
504
576
 
505
- def write_xlsx(self, data: List[Dict], output_path: str, sheet_name: str = 'Sheet1') -> Dict[str, Any]:
577
+ def write_xlsx(self, data: List[Dict], output_path: str, sheet_name: str = "Sheet1") -> Dict[str, Any]:
506
578
  """
507
579
  Write content to an XLSX file.
508
580
 
@@ -523,7 +595,7 @@ class OfficeTool(BaseTool):
523
595
  pd.DataFrame().to_excel(output_path, index=False, sheet_name=sheet_name)
524
596
  else:
525
597
  pd.DataFrame(sanitized_data).to_excel(output_path, index=False, sheet_name=sheet_name)
526
- return {'success': True, 'file_path': output_path}
598
+ return {"success": True, "file_path": output_path}
527
599
  except Exception as e:
528
600
  raise FileOperationError(f"Failed to write XLSX: {str(e)}")
529
601
 
@@ -545,38 +617,45 @@ class OfficeTool(BaseTool):
545
617
  file_ext = os.path.splitext(file_path)[1].lower()
546
618
 
547
619
  # Determine file type and validate
548
- if file_ext == '.pdf':
549
- file_type = 'pdf'
550
- elif file_ext == '.docx':
551
- file_type = 'docx'
552
- elif file_ext == '.pptx':
553
- file_type = 'pptx'
554
- elif file_ext == '.xlsx':
555
- file_type = 'xlsx'
556
- elif file_ext in ['.png', '.jpg', '.jpeg', '.tiff', '.bmp', '.gif']:
557
- file_type = 'image'
620
+ if file_ext == ".pdf":
621
+ file_type = "pdf"
622
+ elif file_ext == ".docx":
623
+ file_type = "docx"
624
+ elif file_ext == ".pptx":
625
+ file_type = "pptx"
626
+ elif file_ext == ".xlsx":
627
+ file_type = "xlsx"
628
+ elif file_ext in [
629
+ ".png",
630
+ ".jpg",
631
+ ".jpeg",
632
+ ".tiff",
633
+ ".bmp",
634
+ ".gif",
635
+ ]:
636
+ file_type = "image"
558
637
  else:
559
- file_type = 'other'
638
+ file_type = "other"
560
639
 
561
640
  # Validate document structure
562
641
  self._validate_document(file_path, file_type)
563
642
 
564
643
  # Extract text based on file type
565
- if file_type == 'pdf':
644
+ if file_type == "pdf":
566
645
  return self._sanitize_text(self._extract_pdf_text(file_path))
567
- elif file_type == 'docx':
646
+ elif file_type == "docx":
568
647
  doc = DocxDocument(file_path)
569
648
  paragraphs = [p.text for p in doc.paragraphs if p.text.strip()]
570
- return self._sanitize_text('\n'.join(paragraphs))
571
- elif file_type == 'pptx':
649
+ return self._sanitize_text("\n".join(paragraphs))
650
+ elif file_type == "pptx":
572
651
  prs = Presentation(file_path)
573
652
  texts = []
574
653
  for slide in prs.slides:
575
654
  for shape in slide.shapes:
576
- if hasattr(shape, 'text') and shape.text.strip():
655
+ if hasattr(shape, "text") and shape.text.strip():
577
656
  texts.append(shape.text)
578
- return self._sanitize_text('\n'.join(texts))
579
- elif file_type == 'xlsx':
657
+ return self._sanitize_text("\n".join(texts))
658
+ elif file_type == "xlsx":
580
659
  data = pd.read_excel(file_path)
581
660
  # Handle different return types from pd.read_excel()
582
661
  if isinstance(data, pd.DataFrame):
@@ -587,8 +666,9 @@ class OfficeTool(BaseTool):
587
666
  first_df = data[first_sheet_name]
588
667
  return self._sanitize_text(first_df.to_string(index=False))
589
668
  else:
590
- return self._sanitize_text("") # Fallback for unexpected data types
591
- elif file_type == 'image':
669
+ # Fallback for unexpected data types
670
+ return self._sanitize_text("")
671
+ elif file_type == "image":
592
672
  return self._sanitize_text(self._extract_image_text(file_path))
593
673
  else:
594
674
  # Use Tika as fallback for other formats