aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show
  1. aiecs/__init__.py +13 -16
  2. aiecs/__main__.py +7 -7
  3. aiecs/aiecs_client.py +269 -75
  4. aiecs/application/executors/operation_executor.py +79 -54
  5. aiecs/application/knowledge_graph/__init__.py +7 -0
  6. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  7. aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
  8. aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
  11. aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
  12. aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
  13. aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
  14. aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
  15. aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
  16. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  17. aiecs/application/knowledge_graph/extractors/base.py +98 -0
  18. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
  19. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
  20. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
  21. aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
  22. aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
  23. aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
  24. aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
  25. aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
  26. aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
  27. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
  28. aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
  29. aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
  30. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
  31. aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
  32. aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
  33. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
  34. aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
  35. aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
  36. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  37. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
  38. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
  39. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  40. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
  41. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  42. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  43. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
  44. aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
  45. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
  46. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  47. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  48. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
  49. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
  50. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
  51. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
  52. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
  53. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
  54. aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
  55. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
  56. aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
  57. aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
  58. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
  59. aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
  60. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  61. aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
  62. aiecs/application/knowledge_graph/search/reranker.py +293 -0
  63. aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
  64. aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
  65. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  66. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
  67. aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
  68. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  69. aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
  70. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  71. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
  72. aiecs/common/__init__.py +9 -0
  73. aiecs/common/knowledge_graph/__init__.py +17 -0
  74. aiecs/common/knowledge_graph/runnable.py +471 -0
  75. aiecs/config/__init__.py +20 -5
  76. aiecs/config/config.py +762 -31
  77. aiecs/config/graph_config.py +131 -0
  78. aiecs/config/tool_config.py +399 -0
  79. aiecs/core/__init__.py +29 -13
  80. aiecs/core/interface/__init__.py +2 -2
  81. aiecs/core/interface/execution_interface.py +22 -22
  82. aiecs/core/interface/storage_interface.py +37 -88
  83. aiecs/core/registry/__init__.py +31 -0
  84. aiecs/core/registry/service_registry.py +92 -0
  85. aiecs/domain/__init__.py +270 -1
  86. aiecs/domain/agent/__init__.py +191 -0
  87. aiecs/domain/agent/base_agent.py +3870 -0
  88. aiecs/domain/agent/exceptions.py +99 -0
  89. aiecs/domain/agent/graph_aware_mixin.py +569 -0
  90. aiecs/domain/agent/hybrid_agent.py +1435 -0
  91. aiecs/domain/agent/integration/__init__.py +29 -0
  92. aiecs/domain/agent/integration/context_compressor.py +216 -0
  93. aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
  94. aiecs/domain/agent/integration/protocols.py +281 -0
  95. aiecs/domain/agent/integration/retry_policy.py +218 -0
  96. aiecs/domain/agent/integration/role_config.py +213 -0
  97. aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
  98. aiecs/domain/agent/lifecycle.py +291 -0
  99. aiecs/domain/agent/llm_agent.py +692 -0
  100. aiecs/domain/agent/memory/__init__.py +12 -0
  101. aiecs/domain/agent/memory/conversation.py +1124 -0
  102. aiecs/domain/agent/migration/__init__.py +14 -0
  103. aiecs/domain/agent/migration/conversion.py +163 -0
  104. aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
  105. aiecs/domain/agent/models.py +884 -0
  106. aiecs/domain/agent/observability.py +479 -0
  107. aiecs/domain/agent/persistence.py +449 -0
  108. aiecs/domain/agent/prompts/__init__.py +29 -0
  109. aiecs/domain/agent/prompts/builder.py +159 -0
  110. aiecs/domain/agent/prompts/formatters.py +187 -0
  111. aiecs/domain/agent/prompts/template.py +255 -0
  112. aiecs/domain/agent/registry.py +253 -0
  113. aiecs/domain/agent/tool_agent.py +444 -0
  114. aiecs/domain/agent/tools/__init__.py +15 -0
  115. aiecs/domain/agent/tools/schema_generator.py +364 -0
  116. aiecs/domain/community/__init__.py +155 -0
  117. aiecs/domain/community/agent_adapter.py +469 -0
  118. aiecs/domain/community/analytics.py +432 -0
  119. aiecs/domain/community/collaborative_workflow.py +648 -0
  120. aiecs/domain/community/communication_hub.py +634 -0
  121. aiecs/domain/community/community_builder.py +320 -0
  122. aiecs/domain/community/community_integration.py +796 -0
  123. aiecs/domain/community/community_manager.py +803 -0
  124. aiecs/domain/community/decision_engine.py +849 -0
  125. aiecs/domain/community/exceptions.py +231 -0
  126. aiecs/domain/community/models/__init__.py +33 -0
  127. aiecs/domain/community/models/community_models.py +234 -0
  128. aiecs/domain/community/resource_manager.py +461 -0
  129. aiecs/domain/community/shared_context_manager.py +589 -0
  130. aiecs/domain/context/__init__.py +40 -10
  131. aiecs/domain/context/context_engine.py +1910 -0
  132. aiecs/domain/context/conversation_models.py +87 -53
  133. aiecs/domain/context/graph_memory.py +582 -0
  134. aiecs/domain/execution/model.py +12 -4
  135. aiecs/domain/knowledge_graph/__init__.py +19 -0
  136. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  137. aiecs/domain/knowledge_graph/models/entity.py +148 -0
  138. aiecs/domain/knowledge_graph/models/evidence.py +178 -0
  139. aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
  140. aiecs/domain/knowledge_graph/models/path.py +171 -0
  141. aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
  142. aiecs/domain/knowledge_graph/models/query.py +261 -0
  143. aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
  144. aiecs/domain/knowledge_graph/models/relation.py +202 -0
  145. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  146. aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
  147. aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
  148. aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
  149. aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
  150. aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
  151. aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
  152. aiecs/domain/task/dsl_processor.py +172 -56
  153. aiecs/domain/task/model.py +20 -8
  154. aiecs/domain/task/task_context.py +27 -24
  155. aiecs/infrastructure/__init__.py +0 -2
  156. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  157. aiecs/infrastructure/graph_storage/base.py +837 -0
  158. aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
  159. aiecs/infrastructure/graph_storage/cache.py +424 -0
  160. aiecs/infrastructure/graph_storage/distributed.py +223 -0
  161. aiecs/infrastructure/graph_storage/error_handling.py +380 -0
  162. aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
  163. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  164. aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
  165. aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
  166. aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
  167. aiecs/infrastructure/graph_storage/metrics.py +344 -0
  168. aiecs/infrastructure/graph_storage/migration.py +400 -0
  169. aiecs/infrastructure/graph_storage/pagination.py +483 -0
  170. aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
  171. aiecs/infrastructure/graph_storage/postgres.py +1563 -0
  172. aiecs/infrastructure/graph_storage/property_storage.py +353 -0
  173. aiecs/infrastructure/graph_storage/protocols.py +76 -0
  174. aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
  175. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  176. aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
  177. aiecs/infrastructure/graph_storage/streaming.py +487 -0
  178. aiecs/infrastructure/graph_storage/tenant.py +412 -0
  179. aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
  180. aiecs/infrastructure/messaging/websocket_manager.py +51 -35
  181. aiecs/infrastructure/monitoring/__init__.py +22 -0
  182. aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
  183. aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
  184. aiecs/infrastructure/monitoring/structured_logger.py +3 -7
  185. aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
  186. aiecs/infrastructure/persistence/__init__.py +14 -1
  187. aiecs/infrastructure/persistence/context_engine_client.py +184 -0
  188. aiecs/infrastructure/persistence/database_manager.py +67 -43
  189. aiecs/infrastructure/persistence/file_storage.py +180 -103
  190. aiecs/infrastructure/persistence/redis_client.py +74 -21
  191. aiecs/llm/__init__.py +73 -25
  192. aiecs/llm/callbacks/__init__.py +11 -0
  193. aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
  194. aiecs/llm/client_factory.py +224 -36
  195. aiecs/llm/client_resolver.py +155 -0
  196. aiecs/llm/clients/__init__.py +38 -0
  197. aiecs/llm/clients/base_client.py +324 -0
  198. aiecs/llm/clients/google_function_calling_mixin.py +457 -0
  199. aiecs/llm/clients/googleai_client.py +241 -0
  200. aiecs/llm/clients/openai_client.py +158 -0
  201. aiecs/llm/clients/openai_compatible_mixin.py +367 -0
  202. aiecs/llm/clients/vertex_client.py +897 -0
  203. aiecs/llm/clients/xai_client.py +201 -0
  204. aiecs/llm/config/__init__.py +51 -0
  205. aiecs/llm/config/config_loader.py +272 -0
  206. aiecs/llm/config/config_validator.py +206 -0
  207. aiecs/llm/config/model_config.py +143 -0
  208. aiecs/llm/protocols.py +149 -0
  209. aiecs/llm/utils/__init__.py +10 -0
  210. aiecs/llm/utils/validate_config.py +89 -0
  211. aiecs/main.py +140 -121
  212. aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
  213. aiecs/scripts/aid/__init__.py +19 -0
  214. aiecs/scripts/aid/module_checker.py +499 -0
  215. aiecs/scripts/aid/version_manager.py +235 -0
  216. aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
  217. aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
  218. aiecs/scripts/dependance_check/__init__.py +15 -0
  219. aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
  220. aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
  221. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
  222. aiecs/scripts/dependance_patch/__init__.py +7 -0
  223. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  224. aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
  225. aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
  226. aiecs/scripts/knowledge_graph/__init__.py +3 -0
  227. aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
  228. aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
  229. aiecs/scripts/tools_develop/README.md +671 -0
  230. aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
  231. aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
  232. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  233. aiecs/scripts/tools_develop/__init__.py +21 -0
  234. aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
  235. aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
  236. aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
  237. aiecs/scripts/tools_develop/schema_coverage.py +511 -0
  238. aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
  239. aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
  240. aiecs/scripts/tools_develop/verify_tools.py +352 -0
  241. aiecs/tasks/__init__.py +0 -1
  242. aiecs/tasks/worker.py +115 -47
  243. aiecs/tools/__init__.py +194 -72
  244. aiecs/tools/apisource/__init__.py +99 -0
  245. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  246. aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
  247. aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
  248. aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
  249. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  250. aiecs/tools/apisource/monitoring/metrics.py +330 -0
  251. aiecs/tools/apisource/providers/__init__.py +112 -0
  252. aiecs/tools/apisource/providers/base.py +671 -0
  253. aiecs/tools/apisource/providers/census.py +397 -0
  254. aiecs/tools/apisource/providers/fred.py +535 -0
  255. aiecs/tools/apisource/providers/newsapi.py +409 -0
  256. aiecs/tools/apisource/providers/worldbank.py +352 -0
  257. aiecs/tools/apisource/reliability/__init__.py +12 -0
  258. aiecs/tools/apisource/reliability/error_handler.py +363 -0
  259. aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
  260. aiecs/tools/apisource/tool.py +832 -0
  261. aiecs/tools/apisource/utils/__init__.py +9 -0
  262. aiecs/tools/apisource/utils/validators.py +334 -0
  263. aiecs/tools/base_tool.py +415 -21
  264. aiecs/tools/docs/__init__.py +121 -0
  265. aiecs/tools/docs/ai_document_orchestrator.py +607 -0
  266. aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
  267. aiecs/tools/docs/content_insertion_tool.py +1320 -0
  268. aiecs/tools/docs/document_creator_tool.py +1323 -0
  269. aiecs/tools/docs/document_layout_tool.py +1160 -0
  270. aiecs/tools/docs/document_parser_tool.py +1011 -0
  271. aiecs/tools/docs/document_writer_tool.py +1829 -0
  272. aiecs/tools/knowledge_graph/__init__.py +17 -0
  273. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
  274. aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
  275. aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
  276. aiecs/tools/langchain_adapter.py +300 -138
  277. aiecs/tools/schema_generator.py +455 -0
  278. aiecs/tools/search_tool/__init__.py +100 -0
  279. aiecs/tools/search_tool/analyzers.py +581 -0
  280. aiecs/tools/search_tool/cache.py +264 -0
  281. aiecs/tools/search_tool/constants.py +128 -0
  282. aiecs/tools/search_tool/context.py +224 -0
  283. aiecs/tools/search_tool/core.py +778 -0
  284. aiecs/tools/search_tool/deduplicator.py +119 -0
  285. aiecs/tools/search_tool/error_handler.py +242 -0
  286. aiecs/tools/search_tool/metrics.py +343 -0
  287. aiecs/tools/search_tool/rate_limiter.py +172 -0
  288. aiecs/tools/search_tool/schemas.py +275 -0
  289. aiecs/tools/statistics/__init__.py +80 -0
  290. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
  291. aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
  292. aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
  293. aiecs/tools/statistics/data_loader_tool.py +555 -0
  294. aiecs/tools/statistics/data_profiler_tool.py +638 -0
  295. aiecs/tools/statistics/data_transformer_tool.py +580 -0
  296. aiecs/tools/statistics/data_visualizer_tool.py +498 -0
  297. aiecs/tools/statistics/model_trainer_tool.py +507 -0
  298. aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
  299. aiecs/tools/task_tools/__init__.py +49 -36
  300. aiecs/tools/task_tools/chart_tool.py +200 -184
  301. aiecs/tools/task_tools/classfire_tool.py +268 -267
  302. aiecs/tools/task_tools/image_tool.py +175 -131
  303. aiecs/tools/task_tools/office_tool.py +226 -146
  304. aiecs/tools/task_tools/pandas_tool.py +477 -121
  305. aiecs/tools/task_tools/report_tool.py +390 -142
  306. aiecs/tools/task_tools/research_tool.py +149 -79
  307. aiecs/tools/task_tools/scraper_tool.py +339 -145
  308. aiecs/tools/task_tools/stats_tool.py +448 -209
  309. aiecs/tools/temp_file_manager.py +26 -24
  310. aiecs/tools/tool_executor/__init__.py +18 -16
  311. aiecs/tools/tool_executor/tool_executor.py +364 -52
  312. aiecs/utils/LLM_output_structor.py +74 -48
  313. aiecs/utils/__init__.py +14 -3
  314. aiecs/utils/base_callback.py +0 -3
  315. aiecs/utils/cache_provider.py +696 -0
  316. aiecs/utils/execution_utils.py +50 -31
  317. aiecs/utils/prompt_loader.py +1 -0
  318. aiecs/utils/token_usage_repository.py +37 -11
  319. aiecs/ws/socket_server.py +14 -4
  320. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
  321. aiecs-1.7.6.dist-info/RECORD +337 -0
  322. aiecs-1.7.6.dist-info/entry_points.txt +13 -0
  323. aiecs/config/registry.py +0 -19
  324. aiecs/domain/context/content_engine.py +0 -982
  325. aiecs/llm/base_client.py +0 -99
  326. aiecs/llm/openai_client.py +0 -125
  327. aiecs/llm/vertex_client.py +0 -186
  328. aiecs/llm/xai_client.py +0 -184
  329. aiecs/scripts/dependency_checker.py +0 -857
  330. aiecs/scripts/quick_dependency_check.py +0 -269
  331. aiecs/tools/task_tools/search_api.py +0 -7
  332. aiecs-1.0.1.dist-info/RECORD +0 -90
  333. aiecs-1.0.1.dist-info/entry_points.txt +0 -7
  334. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  335. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  336. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  337. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  338. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
  339. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
  340. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
@@ -1,53 +1,63 @@
1
+ from aiecs.tools import register_tool
2
+ from aiecs.tools.tool_executor import (
3
+ validate_input,
4
+ )
5
+ from aiecs.tools.base_tool import BaseTool
1
6
  import os
2
7
  import re
3
8
  import logging
4
9
  import asyncio
5
10
  import time
6
- from typing import Dict, Any, List, Optional, Union, Tuple
11
+ from typing import Dict, Any, List, Optional, Tuple
7
12
  from enum import Enum
8
13
 
9
- from pydantic import BaseModel, Field, field_validator, ValidationError, ConfigDict
14
+ from pydantic import BaseModel, Field, field_validator
15
+ from pydantic_settings import BaseSettings, SettingsConfigDict
10
16
 
11
17
  # Lazy imports for heavy dependencies
12
18
  rake_nltk = None
13
19
  spacy = None
14
20
 
21
+
15
22
  def _init_heavy_dependencies():
16
23
  """Initialize heavy dependencies when actually needed"""
17
24
  global rake_nltk, spacy
18
-
25
+
19
26
  if rake_nltk is None:
20
27
  try:
21
- import rake_nltk as _rake_nltk
28
+ import rake_nltk as _rake_nltk # type: ignore[import-untyped]
29
+
22
30
  rake_nltk = _rake_nltk
23
31
  except ImportError:
24
32
  import logging
33
+
25
34
  logging.getLogger(__name__).error("rake_nltk not available")
26
-
35
+
27
36
  if spacy is None:
28
37
  try:
29
38
  import spacy as _spacy
39
+
30
40
  spacy = _spacy
31
41
  except ImportError:
32
42
  import logging
43
+
33
44
  logging.getLogger(__name__).warning("spacy not available (optional)")
34
45
 
35
- from aiecs.tools import register_tool
36
- from aiecs.tools.base_tool import BaseTool
37
- from aiecs.tools.tool_executor import (
38
- validate_input,
39
- )
40
46
 
41
47
  # Enums for configuration options
48
+
49
+
42
50
  class Language(str, Enum):
43
51
  ENGLISH = "en"
44
52
  CHINESE = "zh"
45
53
  AUTO = "auto"
46
54
 
55
+
47
56
  class ModelType(str, Enum):
48
57
  SPACY_ENGLISH = "en_core_web_sm"
49
58
  SPACY_CHINESE = "zh_core_web_sm"
50
59
 
60
+
51
61
  @register_tool("classifier")
52
62
  class ClassifierTool(BaseTool):
53
63
  """
@@ -69,182 +79,126 @@ class ClassifierTool(BaseTool):
69
79
  """
70
80
 
71
81
  # Configuration schema
72
- class Config(BaseModel):
73
- """Configuration for the classifier tool"""
82
+ class Config(BaseSettings):
83
+ """Configuration for the classifier tool
84
+
85
+ Automatically reads from environment variables with CLASSIFIER_TOOL_ prefix.
86
+ Example: CLASSIFIER_TOOL_MAX_WORKERS -> max_workers
87
+ """
88
+
89
+ model_config = SettingsConfigDict(env_prefix="CLASSIFIER_TOOL_")
90
+
74
91
  max_workers: int = Field(
75
92
  default=min(32, (os.cpu_count() or 4) * 2),
76
- description="Maximum number of worker threads"
93
+ description="Maximum number of worker threads",
77
94
  )
78
95
  pipeline_cache_ttl: int = Field(
79
96
  default=3600,
80
- description="Time-to-live for pipeline cache in seconds"
81
- )
82
- pipeline_cache_size: int = Field(
83
- default=10,
84
- description="Maximum number of pipeline cache entries"
85
- )
86
- max_text_length: int = Field(
87
- default=10_000,
88
- description="Maximum text length in characters"
89
- )
90
- spacy_model_en: str = Field(
91
- default="en_core_web_sm",
92
- description="spaCy model for English"
93
- )
94
- spacy_model_zh: str = Field(
95
- default="zh_core_web_sm",
96
- description="spaCy model for Chinese"
97
+ description="Time-to-live for pipeline cache in seconds",
97
98
  )
99
+ pipeline_cache_size: int = Field(default=10, description="Maximum number of pipeline cache entries")
100
+ max_text_length: int = Field(default=10_000, description="Maximum text length in characters")
101
+ spacy_model_en: str = Field(default="en_core_web_sm", description="spaCy model for English")
102
+ spacy_model_zh: str = Field(default="zh_core_web_sm", description="spaCy model for Chinese")
98
103
  allowed_models: List[str] = Field(
99
- default=[
100
- "en_core_web_sm",
101
- "zh_core_web_sm"
102
- ],
103
- description="List of allowed spaCy models"
104
- )
105
- rate_limit_enabled: bool = Field(
106
- default=True,
107
- description="Enable rate limiting"
104
+ default=["en_core_web_sm", "zh_core_web_sm"],
105
+ description="List of allowed spaCy models",
108
106
  )
109
- rate_limit_requests: int = Field(
110
- default=100,
111
- description="Maximum requests per window"
112
- )
113
- rate_limit_window: int = Field(
114
- default=60,
115
- description="Rate limit window in seconds"
116
- )
117
- use_rake_for_english: bool = Field(
118
- default=True,
119
- description="Use RAKE for English phrase extraction"
120
- )
121
-
122
- model_config = ConfigDict(env_prefix="CLASSIFIER_TOOL_")
107
+ rate_limit_enabled: bool = Field(default=True, description="Enable rate limiting")
108
+ rate_limit_requests: int = Field(default=100, description="Maximum requests per window")
109
+ rate_limit_window: int = Field(default=60, description="Rate limit window in seconds")
110
+ use_rake_for_english: bool = Field(default=True, description="Use RAKE for English phrase extraction")
123
111
 
124
112
  # Base schema for text operations
125
113
  class BaseTextSchema(BaseModel):
126
114
  """Base schema for text operations"""
127
- text: str = Field(
128
- description="Text to process"
129
- )
115
+
116
+ text: str = Field(description="Text to process")
130
117
 
131
118
  @field_validator("text")
132
119
  @classmethod
133
120
  def check_length_and_content(cls, v: str) -> str:
134
121
  if len(v) > 10_000: # Using a constant here for validation
135
- raise ValueError(f"Text length exceeds 10,000 characters")
122
+ raise ValueError("Text length exceeds 10,000 characters")
136
123
  # Check for malicious patterns (e.g., SQL injection)
137
- if re.search(r'(\bSELECT\b|\bINSERT\b|\bDELETE\b|--|;|/\*)', v, re.IGNORECASE):
124
+ if re.search(
125
+ r"(\bSELECT\b|\bINSERT\b|\bDELETE\b|--|;|/\*)",
126
+ v,
127
+ re.IGNORECASE,
128
+ ):
138
129
  raise ValueError("Text contains potentially malicious content")
139
130
  return v
140
131
 
141
132
  # Input schemas for operations
142
133
  class ClassifySchema(BaseTextSchema):
143
- """Schema for text classification"""
144
- model: Optional[str] = Field(
145
- default=None,
146
- description="Model to use for classification"
147
- )
148
- language: Optional[Language] = Field(
149
- default=None,
150
- description="Language of the text"
151
- )
134
+ """Schema for classify operation"""
135
+
136
+ model: Optional[str] = Field(default=None, description="Model to use for classification")
137
+ language: Optional[Language] = Field(default=None, description="Language of the text")
152
138
 
153
139
  @field_validator("model")
154
140
  @classmethod
155
141
  def check_model(cls, v: Optional[str]) -> Optional[str]:
156
- allowed_models = [
157
- "en_core_web_sm",
158
- "zh_core_web_sm"
159
- ]
142
+ allowed_models = ["en_core_web_sm", "zh_core_web_sm"]
160
143
  if v and v not in allowed_models:
161
144
  raise ValueError(f"Model '{v}' not in allowed spaCy models: {allowed_models}")
162
145
  return v
163
146
 
164
147
  class TokenizeSchema(BaseTextSchema):
165
- """Schema for text tokenization"""
166
- language: Optional[Language] = Field(
167
- default=None,
168
- description="Language of the text"
169
- )
148
+ """Schema for tokenize operation"""
170
149
 
171
- class PosTagSchema(BaseTextSchema):
172
- """Schema for part-of-speech tagging"""
173
- language: Optional[Language] = Field(
174
- default=None,
175
- description="Language of the text"
176
- )
150
+ language: Optional[Language] = Field(default=None, description="Language of the text")
151
+
152
+ class Pos_tagSchema(BaseTextSchema):
153
+ """Schema for pos_tag operation"""
154
+
155
+ language: Optional[Language] = Field(default=None, description="Language of the text")
177
156
 
178
157
  class NERSchema(BaseTextSchema):
179
- """Schema for named entity recognition"""
180
- language: Optional[Language] = Field(
181
- default=None,
182
- description="Language of the text"
183
- )
158
+ """Schema for ner operation"""
159
+
160
+ language: Optional[Language] = Field(default=None, description="Language of the text")
184
161
 
185
162
  class LemmatizeSchema(BaseTextSchema):
186
- """Schema for lemmatization"""
187
- language: Optional[Language] = Field(
188
- default=None,
189
- description="Language of the text"
190
- )
163
+ """Schema for lemmatize operation"""
191
164
 
192
- class DependencyParseSchema(BaseTextSchema):
193
- """Schema for dependency parsing"""
194
- language: Optional[Language] = Field(
195
- default=None,
196
- description="Language of the text"
197
- )
165
+ language: Optional[Language] = Field(default=None, description="Language of the text")
198
166
 
199
- class KeywordExtractSchema(BaseTextSchema):
200
- """Schema for keyword extraction"""
201
- top_k: int = Field(
202
- default=10,
203
- description="Number of keywords to extract"
204
- )
205
- language: Optional[Language] = Field(
206
- default=None,
207
- description="Language of the text"
208
- )
167
+ class Dependency_parseSchema(BaseTextSchema):
168
+ """Schema for dependency_parse operation"""
169
+
170
+ language: Optional[Language] = Field(default=None, description="Language of the text")
171
+
172
+ class Keyword_extractSchema(BaseTextSchema):
173
+ """Schema for keyword_extract operation"""
174
+
175
+ top_k: int = Field(default=10, description="Number of keywords to extract")
176
+ language: Optional[Language] = Field(default=None, description="Language of the text")
209
177
  extract_phrases: bool = Field(
210
178
  default=True,
211
- description="Whether to extract phrases or just keywords"
179
+ description="Whether to extract phrases or just keywords",
212
180
  )
213
181
 
214
182
  class SummarizeSchema(BaseTextSchema):
215
- """Schema for text summarization"""
216
- max_length: int = Field(
217
- default=150,
218
- description="Maximum length of the summary"
219
- )
220
- language: Optional[Language] = Field(
221
- default=None,
222
- description="Language of the text"
223
- )
183
+ """Schema for summarize operation"""
184
+
185
+ max_length: int = Field(default=150, description="Maximum length of the summary")
186
+ language: Optional[Language] = Field(default=None, description="Language of the text")
224
187
 
225
- class BatchProcessSchema(BaseModel):
188
+ class Batch_processSchema(BaseModel):
226
189
  """Schema for batch processing"""
227
- texts: List[str] = Field(
228
- description="List of texts to process"
229
- )
230
- operation: str = Field(
231
- description="Operation to perform on each text"
232
- )
233
- language: Optional[Language] = Field(
234
- default=None,
235
- description="Language of the texts"
236
- )
237
- model: Optional[str] = Field(
238
- default=None,
239
- description="Model to use for processing"
240
- )
190
+
191
+ texts: List[str] = Field(description="List of texts to process")
192
+ operation: str = Field(description="Operation to perform on each text")
193
+ language: Optional[Language] = Field(default=None, description="Language of the texts")
194
+ model: Optional[str] = Field(default=None, description="Model to use for processing")
241
195
  top_k: Optional[int] = Field(
242
196
  default=None,
243
- description="Number of keywords to extract (for keyword_extract)"
197
+ description="Number of keywords to extract (for keyword_extract)",
244
198
  )
245
199
  max_length: Optional[int] = Field(
246
200
  default=None,
247
- description="Maximum length of the summary (for summarize)"
201
+ description="Maximum length of the summary (for summarize)",
248
202
  )
249
203
 
250
204
  @field_validator("texts")
@@ -252,8 +206,12 @@ class ClassifierTool(BaseTool):
252
206
  def check_texts(cls, v: List[str]) -> List[str]:
253
207
  for text in v:
254
208
  if len(text) > 10_000: # Using a constant here for validation
255
- raise ValueError(f"Text length exceeds 10,000 characters")
256
- if re.search(r'(\bSELECT\b|\bINSERT\b|\bDELETE\b|--|;|/\*)', text, re.IGNORECASE):
209
+ raise ValueError("Text length exceeds 10,000 characters")
210
+ if re.search(
211
+ r"(\bSELECT\b|\bINSERT\b|\bDELETE\b|--|;|/\*)",
212
+ text,
213
+ re.IGNORECASE,
214
+ ):
257
215
  raise ValueError("Text contains potentially malicious content")
258
216
  return v
259
217
 
@@ -266,24 +224,31 @@ class ClassifierTool(BaseTool):
266
224
 
267
225
  Raises:
268
226
  ValueError: If config contains invalid settings.
227
+
228
+ Configuration is automatically loaded by BaseTool from:
229
+ 1. Explicit config dict (highest priority)
230
+ 2. YAML config files (config/tools/classifier.yaml)
231
+ 3. Environment variables (via dotenv from .env files)
232
+ 4. Tool defaults (lowest priority)
269
233
  """
270
234
  super().__init__(config)
271
235
 
272
- # Parse configuration
273
- self.config = self.Config(**(config or {}))
236
+ # Configuration is automatically loaded by BaseTool into self._config_obj
237
+ # Access config via self._config_obj (BaseSettings instance)
238
+ self.config = self._config_obj if self._config_obj else self.Config()
274
239
 
275
240
  # Set up logger
276
241
  self.logger = logging.getLogger(__name__)
277
242
  if not self.logger.handlers:
278
243
  handler = logging.StreamHandler()
279
- handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
244
+ handler.setFormatter(logging.Formatter("%(asctime)s %(levelname)s %(message)s"))
280
245
  self.logger.addHandler(handler)
281
246
  self.logger.setLevel(logging.INFO)
282
247
 
283
248
  # Initialize resources
284
- self._spacy_nlp = {} # Language -> spaCy pipeline
285
- self._metrics = {'requests': 0, 'cache_hits': 0, 'processing_time': []}
286
- self._request_timestamps = []
249
+ self._spacy_nlp: Dict[str, Any] = {} # Language -> spaCy pipeline
250
+ self._metrics = {"requests": 0, "cache_hits": 0, "processing_time": []}
251
+ self._request_timestamps: List[float] = []
287
252
 
288
253
  def _get_sentiment_lexicon(self, language: str) -> Dict[str, float]:
289
254
  """
@@ -295,22 +260,66 @@ class ClassifierTool(BaseTool):
295
260
  Returns:
296
261
  Dict[str, float]: Sentiment lexicon with word -> score mapping.
297
262
  """
298
- if language == 'en':
263
+ if language == "en":
299
264
  # Simple English sentiment lexicon
300
265
  return {
301
- 'good': 1.0, 'great': 1.5, 'excellent': 2.0, 'amazing': 2.0, 'wonderful': 1.5,
302
- 'fantastic': 2.0, 'awesome': 1.5, 'perfect': 2.0, 'love': 1.5, 'like': 1.0,
303
- 'happy': 1.5, 'pleased': 1.0, 'satisfied': 1.0, 'positive': 1.0, 'best': 2.0,
304
- 'bad': -1.0, 'terrible': -2.0, 'awful': -2.0, 'horrible': -2.0, 'hate': -2.0,
305
- 'dislike': -1.0, 'sad': -1.5, 'angry': -1.5, 'disappointed': -1.5, 'negative': -1.0,
306
- 'worst': -2.0, 'poor': -1.0, 'fail': -1.5, 'wrong': -1.0, 'problem': -1.0
266
+ "good": 1.0,
267
+ "great": 1.5,
268
+ "excellent": 2.0,
269
+ "amazing": 2.0,
270
+ "wonderful": 1.5,
271
+ "fantastic": 2.0,
272
+ "awesome": 1.5,
273
+ "perfect": 2.0,
274
+ "love": 1.5,
275
+ "like": 1.0,
276
+ "happy": 1.5,
277
+ "pleased": 1.0,
278
+ "satisfied": 1.0,
279
+ "positive": 1.0,
280
+ "best": 2.0,
281
+ "bad": -1.0,
282
+ "terrible": -2.0,
283
+ "awful": -2.0,
284
+ "horrible": -2.0,
285
+ "hate": -2.0,
286
+ "dislike": -1.0,
287
+ "sad": -1.5,
288
+ "angry": -1.5,
289
+ "disappointed": -1.5,
290
+ "negative": -1.0,
291
+ "worst": -2.0,
292
+ "poor": -1.0,
293
+ "fail": -1.5,
294
+ "wrong": -1.0,
295
+ "problem": -1.0,
307
296
  }
308
297
  else: # Chinese
309
298
  return {
310
- '': 1.0, '很好': 1.5, '非常好': 2.0, '棒': 1.5, '优秀': 2.0, '完美': 2.0,
311
- '喜欢': 1.5, '爱': 2.0, '满意': 1.0, '开心': 1.5, '高兴': 1.5, '积极': 1.0,
312
- '坏': -1.0, '很坏': -1.5, '糟糕': -2.0, '讨厌': -2.0, '恨': -2.0, '失望': -1.5,
313
- '生气': -1.5, '愤怒': -2.0, '消极': -1.0, '问题': -1.0, '错误': -1.0, '失败': -1.5
299
+ "": 1.0,
300
+ "很好": 1.5,
301
+ "非常好": 2.0,
302
+ "棒": 1.5,
303
+ "优秀": 2.0,
304
+ "完美": 2.0,
305
+ "喜欢": 1.5,
306
+ "爱": 2.0,
307
+ "满意": 1.0,
308
+ "开心": 1.5,
309
+ "高兴": 1.5,
310
+ "积极": 1.0,
311
+ "坏": -1.0,
312
+ "很坏": -1.5,
313
+ "糟糕": -2.0,
314
+ "讨厌": -2.0,
315
+ "恨": -2.0,
316
+ "失望": -1.5,
317
+ "生气": -1.5,
318
+ "愤怒": -2.0,
319
+ "消极": -1.0,
320
+ "问题": -1.0,
321
+ "错误": -1.0,
322
+ "失败": -1.5,
314
323
  }
315
324
 
316
325
  def _get_spacy(self, language: str) -> Any:
@@ -327,11 +336,12 @@ class ClassifierTool(BaseTool):
327
336
  if spacy is None:
328
337
  try:
329
338
  import spacy as spacy_module
339
+
330
340
  spacy = spacy_module
331
341
  except ImportError:
332
342
  raise ImportError("spaCy is required but not installed. Please install it with: pip install spacy")
333
343
 
334
- model = self.config.spacy_model_zh if language == 'zh' else self.config.spacy_model_en
344
+ model = self.config.spacy_model_zh if language == "zh" else self.config.spacy_model_en
335
345
  return spacy.load(model, disable=["textcat"])
336
346
 
337
347
  def _detect_language(self, text: str) -> str:
@@ -346,17 +356,17 @@ class ClassifierTool(BaseTool):
346
356
  """
347
357
  try:
348
358
  # Count Chinese characters (CJK Unified Ideographs)
349
- chinese_chars = sum(1 for char in text if '\u4e00' <= char <= '\u9fff')
359
+ chinese_chars = sum(1 for char in text if "\u4e00" <= char <= "\u9fff")
350
360
  total_chars = len([char for char in text if char.isalpha()])
351
361
 
352
362
  if total_chars == 0:
353
- return 'en'
363
+ return "en"
354
364
 
355
365
  # If more than 30% are Chinese characters, consider it Chinese
356
366
  chinese_ratio = chinese_chars / total_chars
357
- return 'zh' if chinese_ratio > 0.3 else 'en'
367
+ return "zh" if chinese_ratio > 0.3 else "en"
358
368
  except Exception:
359
- return 'en'
369
+ return "en"
360
370
 
361
371
  def _check_rate_limit(self) -> bool:
362
372
  """
@@ -373,8 +383,7 @@ class ClassifierTool(BaseTool):
373
383
  # Get lock from executor
374
384
  with self._executor.get_lock("rate_limit"):
375
385
  # Remove timestamps outside the window
376
- self._request_timestamps = [ts for ts in self._request_timestamps
377
- if current_time - ts <= self.config.rate_limit_window]
386
+ self._request_timestamps = [ts for ts in self._request_timestamps if current_time - ts <= self.config.rate_limit_window]
378
387
 
379
388
  # Check if we're at the limit
380
389
  if len(self._request_timestamps) >= self.config.rate_limit_requests:
@@ -398,10 +407,10 @@ class ClassifierTool(BaseTool):
398
407
  try:
399
408
  # Initialize heavy dependencies if needed
400
409
  _init_heavy_dependencies()
401
-
410
+
402
411
  if rake_nltk is None:
403
412
  raise ImportError("rake_nltk not available")
404
-
413
+
405
414
  rake = rake_nltk.Rake()
406
415
  rake.extract_keywords_from_text(text)
407
416
  phrases = rake.get_ranked_phrases()[:top_k]
@@ -409,9 +418,9 @@ class ClassifierTool(BaseTool):
409
418
  except Exception as e:
410
419
  self.logger.error(f"Error extracting English phrases: {e}")
411
420
  # Fallback to simple keyword extraction
412
- nlp = self._get_spacy('en')
421
+ nlp = self._get_spacy("en")
413
422
  doc = nlp(text)
414
- keywords = [token.text for token in doc if token.pos_ in ('NOUN', 'PROPN')][:top_k]
423
+ keywords = [token.text for token in doc if token.pos_ in ("NOUN", "PROPN")][:top_k]
415
424
  return keywords
416
425
 
417
426
  def _extract_chinese_phrases(self, text: str, top_k: int) -> List[str]:
@@ -426,7 +435,7 @@ class ClassifierTool(BaseTool):
426
435
  List[str]: Extracted phrases.
427
436
  """
428
437
  try:
429
- nlp = self._get_spacy('zh')
438
+ nlp = self._get_spacy("zh")
430
439
  doc = nlp(text)
431
440
 
432
441
  # Extract noun phrases and named entities
@@ -444,7 +453,7 @@ class ClassifierTool(BaseTool):
444
453
 
445
454
  # Add important nouns and proper nouns
446
455
  for token in doc:
447
- if token.pos_ in ('NOUN', 'PROPN') and len(token.text.strip()) > 1:
456
+ if token.pos_ in ("NOUN", "PROPN") and len(token.text.strip()) > 1:
448
457
  phrases.append(token.text.strip())
449
458
 
450
459
  # Remove duplicates and return top_k
@@ -455,9 +464,9 @@ class ClassifierTool(BaseTool):
455
464
  self.logger.error(f"Error extracting Chinese phrases with spaCy: {e}")
456
465
  # Fallback to simple noun extraction
457
466
  try:
458
- nlp = self._get_spacy('zh')
467
+ nlp = self._get_spacy("zh")
459
468
  doc = nlp(text)
460
- nouns = [token.text for token in doc if token.pos_ in ('NOUN', 'PROPN')]
469
+ nouns = [token.text for token in doc if token.pos_ in ("NOUN", "PROPN")]
461
470
  return nouns[:top_k]
462
471
  except Exception:
463
472
  return []
@@ -478,14 +487,20 @@ class ClassifierTool(BaseTool):
478
487
  ValueError: If the pipeline creation fails.
479
488
  """
480
489
  try:
481
- from transformers import pipeline
490
+ from transformers import pipeline # type: ignore[import-not-found]
491
+
482
492
  return pipeline(task, model=model)
483
493
  except ImportError:
484
494
  raise ImportError("transformers library is required for summarization but not installed. Please install it with: pip install transformers")
485
495
  except Exception as e:
486
496
  raise ValueError(f"Error creating pipeline for task '{task}' with model '{model}': {e}")
487
497
 
488
- async def classify(self, text: str, model: Optional[str] = None, language: Optional[str] = None) -> List[Dict[str, Any]]:
498
+ async def classify(
499
+ self,
500
+ text: str,
501
+ model: Optional[str] = None,
502
+ language: Optional[str] = None,
503
+ ) -> List[Dict[str, Any]]:
489
504
  """
490
505
  Perform sentiment classification on text using spaCy and lexicon-based approach.
491
506
 
@@ -503,16 +518,12 @@ class ClassifierTool(BaseTool):
503
518
  language = language or self._detect_language(text)
504
519
 
505
520
  # Get spaCy pipeline and sentiment lexicon
506
- nlp = await asyncio.get_event_loop().run_in_executor(
507
- None, self._get_spacy, language
508
- )
521
+ nlp = await asyncio.get_event_loop().run_in_executor(None, self._get_spacy, language)
509
522
 
510
523
  sentiment_lexicon = self._get_sentiment_lexicon(language)
511
524
 
512
525
  # Process text with spaCy
513
- doc = await asyncio.get_event_loop().run_in_executor(
514
- None, nlp, text
515
- )
526
+ doc = await asyncio.get_event_loop().run_in_executor(None, nlp, text)
516
527
 
517
528
  # Calculate sentiment score
518
529
  sentiment_score = 0.0
@@ -556,13 +567,9 @@ class ClassifierTool(BaseTool):
556
567
 
557
568
  language = language or self._detect_language(text)
558
569
 
559
- nlp = await asyncio.get_event_loop().run_in_executor(
560
- None, self._get_spacy, language
561
- )
570
+ nlp = await asyncio.get_event_loop().run_in_executor(None, self._get_spacy, language)
562
571
 
563
- doc = await asyncio.get_event_loop().run_in_executor(
564
- None, nlp, text
565
- )
572
+ doc = await asyncio.get_event_loop().run_in_executor(None, nlp, text)
566
573
 
567
574
  return [token.text for token in doc]
568
575
 
@@ -582,18 +589,12 @@ class ClassifierTool(BaseTool):
582
589
 
583
590
  language = language or self._detect_language(text)
584
591
 
585
- nlp = await asyncio.get_event_loop().run_in_executor(
586
- None, self._get_spacy, language
587
- )
592
+ nlp = await asyncio.get_event_loop().run_in_executor(None, self._get_spacy, language)
588
593
 
589
- doc = await asyncio.get_event_loop().run_in_executor(
590
- None, nlp, text
591
- )
594
+ doc = await asyncio.get_event_loop().run_in_executor(None, nlp, text)
592
595
 
593
596
  return [(token.text, token.pos_) for token in doc]
594
597
 
595
- @validate_input(NERSchema)
596
-
597
598
  async def ner(self, text: str, language: Optional[str] = None) -> List[Dict[str, Any]]:
598
599
  """
599
600
  Perform named entity recognition.
@@ -610,21 +611,20 @@ class ClassifierTool(BaseTool):
610
611
 
611
612
  language = language or self._detect_language(text)
612
613
 
613
- nlp = await asyncio.get_event_loop().run_in_executor(
614
- None, self._get_spacy, language
615
- )
614
+ nlp = await asyncio.get_event_loop().run_in_executor(None, self._get_spacy, language)
616
615
 
617
- doc = await asyncio.get_event_loop().run_in_executor(
618
- None, nlp, text
619
- )
616
+ doc = await asyncio.get_event_loop().run_in_executor(None, nlp, text)
620
617
 
621
618
  return [
622
- {"text": ent.text, "label": ent.label_, "start": ent.start_char, "end": ent.end_char}
619
+ {
620
+ "text": ent.text,
621
+ "label": ent.label_,
622
+ "start": ent.start_char,
623
+ "end": ent.end_char,
624
+ }
623
625
  for ent in doc.ents
624
626
  ]
625
627
 
626
- @validate_input(LemmatizeSchema)
627
-
628
628
  async def lemmatize(self, text: str, language: Optional[str] = None) -> List[str]:
629
629
  """
630
630
  Lemmatize tokens in text using spaCy.
@@ -641,19 +641,14 @@ class ClassifierTool(BaseTool):
641
641
 
642
642
  language = language or self._detect_language(text)
643
643
 
644
- nlp = await asyncio.get_event_loop().run_in_executor(
645
- None, self._get_spacy, language
646
- )
644
+ nlp = await asyncio.get_event_loop().run_in_executor(None, self._get_spacy, language)
647
645
 
648
- doc = await asyncio.get_event_loop().run_in_executor(
649
- None, nlp, text
650
- )
646
+ doc = await asyncio.get_event_loop().run_in_executor(None, nlp, text)
651
647
 
652
- # For Chinese, lemma might be the same as text, but spaCy handles it consistently
648
+ # For Chinese, lemma might be the same as text, but spaCy handles it
649
+ # consistently
653
650
  return [token.lemma_ for token in doc]
654
651
 
655
- @validate_input(DependencyParseSchema)
656
-
657
652
  async def dependency_parse(self, text: str, language: Optional[str] = None) -> List[Dict[str, Any]]:
658
653
  """
659
654
  Perform dependency parsing using spaCy (supports both English and Chinese).
@@ -670,27 +665,27 @@ class ClassifierTool(BaseTool):
670
665
 
671
666
  language = language or self._detect_language(text)
672
667
 
673
- nlp = await asyncio.get_event_loop().run_in_executor(
674
- None, self._get_spacy, language
675
- )
668
+ nlp = await asyncio.get_event_loop().run_in_executor(None, self._get_spacy, language)
676
669
 
677
- doc = await asyncio.get_event_loop().run_in_executor(
678
- None, nlp, text
679
- )
670
+ doc = await asyncio.get_event_loop().run_in_executor(None, nlp, text)
680
671
 
681
672
  return [
682
673
  {
683
674
  "text": token.text,
684
675
  "head": token.head.text,
685
676
  "dep": token.dep_,
686
- "pos": token.pos_
677
+ "pos": token.pos_,
687
678
  }
688
679
  for token in doc
689
680
  ]
690
681
 
691
- @validate_input(KeywordExtractSchema)
692
-
693
- async def keyword_extract(self, text: str, top_k: int = 10, language: Optional[str] = None, extract_phrases: bool = True) -> List[str]:
682
+ async def keyword_extract(
683
+ self,
684
+ text: str,
685
+ top_k: int = 10,
686
+ language: Optional[str] = None,
687
+ extract_phrases: bool = True,
688
+ ) -> List[str]:
694
689
  """
695
690
  Extract keywords or key phrases from text using spaCy.
696
691
 
@@ -708,42 +703,28 @@ class ClassifierTool(BaseTool):
708
703
 
709
704
  language = language or self._detect_language(text)
710
705
 
711
- if language == 'zh':
706
+ if language == "zh":
712
707
  if extract_phrases:
713
- return await asyncio.get_event_loop().run_in_executor(
714
- None, self._extract_chinese_phrases, text, top_k
715
- )
708
+ return await asyncio.get_event_loop().run_in_executor(None, self._extract_chinese_phrases, text, top_k)
716
709
  else:
717
710
  # Extract simple keywords using spaCy
718
- nlp = await asyncio.get_event_loop().run_in_executor(
719
- None, self._get_spacy, language
720
- )
711
+ nlp = await asyncio.get_event_loop().run_in_executor(None, self._get_spacy, language)
721
712
 
722
- doc = await asyncio.get_event_loop().run_in_executor(
723
- None, nlp, text
724
- )
713
+ doc = await asyncio.get_event_loop().run_in_executor(None, nlp, text)
725
714
 
726
- keywords = [token.text for token in doc if token.pos_ in ('NOUN', 'PROPN')][:top_k]
715
+ keywords = [token.text for token in doc if token.pos_ in ("NOUN", "PROPN")][:top_k]
727
716
  return keywords
728
717
  else: # English or other languages
729
718
  if extract_phrases and self.config.use_rake_for_english:
730
- return await asyncio.get_event_loop().run_in_executor(
731
- None, self._extract_english_phrases, text, top_k
732
- )
719
+ return await asyncio.get_event_loop().run_in_executor(None, self._extract_english_phrases, text, top_k)
733
720
  else:
734
- nlp = await asyncio.get_event_loop().run_in_executor(
735
- None, self._get_spacy, language
736
- )
721
+ nlp = await asyncio.get_event_loop().run_in_executor(None, self._get_spacy, language)
737
722
 
738
- doc = await asyncio.get_event_loop().run_in_executor(
739
- None, nlp, text
740
- )
723
+ doc = await asyncio.get_event_loop().run_in_executor(None, nlp, text)
741
724
 
742
- keywords = [token.text for token in doc if token.pos_ in ('NOUN', 'PROPN')][:top_k]
725
+ keywords = [token.text for token in doc if token.pos_ in ("NOUN", "PROPN")][:top_k]
743
726
  return keywords
744
727
 
745
- @validate_input(SummarizeSchema)
746
-
747
728
  async def summarize(self, text: str, max_length: int = 150, language: Optional[str] = None) -> str:
748
729
  """
749
730
  Summarize text.
@@ -761,22 +742,21 @@ class ClassifierTool(BaseTool):
761
742
 
762
743
  language = language or self._detect_language(text)
763
744
  # Use appropriate models for summarization
764
- if language == 'en':
745
+ if language == "en":
765
746
  model = "facebook/bart-large-cnn"
766
747
  else:
767
748
  # For Chinese and other languages, use a multilingual model
768
- # For now, use t5-base, but consider using a Chinese-specific model in the future
749
+ # For now, use t5-base, but consider using a Chinese-specific model
750
+ # in the future
769
751
  model = "t5-base"
770
752
 
771
- pipe = await asyncio.get_event_loop().run_in_executor(
772
- None, self._get_hf_pipeline, "summarization", model
773
- )
753
+ pipe = await asyncio.get_event_loop().run_in_executor(None, self._get_hf_pipeline, "summarization", model)
774
754
 
775
755
  # Different models use different parameter names for length control
776
756
  if model.startswith("t5"):
777
757
  # T5 models use max_new_tokens instead of max_length
778
758
  # For Chinese text, use a more conservative approach
779
- if language == 'zh':
759
+ if language == "zh":
780
760
  # Chinese text: use character count and be more conservative
781
761
  input_chars = len(text)
782
762
  max_new_tokens = min(max_length, max(input_chars // 4, 5))
@@ -786,13 +766,21 @@ class ClassifierTool(BaseTool):
786
766
  input_words = len(text.split())
787
767
  max_new_tokens = min(max_length, max(input_words // 2, 10))
788
768
  min_new_tokens = 5
789
-
769
+
790
770
  result = await asyncio.get_event_loop().run_in_executor(
791
- None, lambda: pipe(text, max_new_tokens=max_new_tokens, min_new_tokens=min_new_tokens, do_sample=False)[0]['summary_text']
771
+ None,
772
+ lambda: pipe(
773
+ text,
774
+ max_new_tokens=max_new_tokens,
775
+ min_new_tokens=min_new_tokens,
776
+ do_sample=False,
777
+ )[
778
+ 0
779
+ ]["summary_text"],
792
780
  )
793
781
  else:
794
782
  # BART and other models use max_length
795
- if language == 'zh':
783
+ if language == "zh":
796
784
  # Chinese text: use character count
797
785
  input_chars = len(text)
798
786
  max_len = min(max_length, max(input_chars // 4, 10))
@@ -802,18 +790,30 @@ class ClassifierTool(BaseTool):
802
790
  input_words = len(text.split())
803
791
  max_len = min(max_length, max(input_words // 2, 20))
804
792
  min_len = 10
805
-
793
+
806
794
  result = await asyncio.get_event_loop().run_in_executor(
807
- None, lambda: pipe(text, max_length=max_len, min_length=min_len, do_sample=False)[0]['summary_text']
795
+ None,
796
+ lambda: pipe(
797
+ text,
798
+ max_length=max_len,
799
+ min_length=min_len,
800
+ do_sample=False,
801
+ )[
802
+ 0
803
+ ]["summary_text"],
808
804
  )
809
805
 
810
806
  return result
811
807
 
812
- @validate_input(BatchProcessSchema)
813
-
814
- async def batch_process(self, texts: List[str], operation: str, language: Optional[str] = None,
815
- model: Optional[str] = None, top_k: Optional[int] = None,
816
- max_length: Optional[int] = None) -> List[Any]:
808
+ async def batch_process(
809
+ self,
810
+ texts: List[str],
811
+ operation: str,
812
+ language: Optional[str] = None,
813
+ model: Optional[str] = None,
814
+ top_k: Optional[int] = None,
815
+ max_length: Optional[int] = None,
816
+ ) -> List[Any]:
817
817
  """
818
818
  Process multiple texts with the specified operation.
819
819
 
@@ -834,7 +834,7 @@ class ClassifierTool(BaseTool):
834
834
  # Prepare operations to execute in batch
835
835
  operations = []
836
836
  for text in texts:
837
- kwargs = {"text": text}
837
+ kwargs: Dict[str, Any] = {"text": text}
838
838
  if language:
839
839
  kwargs["language"] = language
840
840
  if model and operation == "classify":
@@ -861,23 +861,24 @@ class ClassifierTool(BaseTool):
861
861
  "metrics": {
862
862
  "requests": self._metrics["requests"],
863
863
  "cache_hits": self._metrics["cache_hits"],
864
- "avg_processing_time": sum(self._metrics["processing_time"]) / len(self._metrics["processing_time"])
865
- if self._metrics["processing_time"] else 0.0
864
+ "avg_processing_time": (
865
+ sum(float(t) for t in processing_times) / len(processing_times)
866
+ if (processing_times := self._metrics.get("processing_time")) and isinstance(processing_times, list) and len(processing_times) > 0
867
+ else 0.0
868
+ ),
866
869
  },
867
870
  "config": {
868
871
  "max_workers": self.config.max_workers,
869
872
  "pipeline_cache_size": self.config.pipeline_cache_size,
870
873
  "rate_limit_enabled": self.config.rate_limit_enabled,
871
874
  "rate_limit_requests": self.config.rate_limit_requests,
872
- "rate_limit_window": self.config.rate_limit_window
873
- }
875
+ "rate_limit_window": self.config.rate_limit_window,
876
+ },
874
877
  }
875
878
 
876
879
  # Check if models can be loaded
877
880
  try:
878
- await asyncio.get_event_loop().run_in_executor(
879
- None, self._get_spacy, "en"
880
- )
881
+ await asyncio.get_event_loop().run_in_executor(None, self._get_spacy, "en")
881
882
  result["models"] = {"spacy_en": "ok"}
882
883
  except Exception as e:
883
884
  result["status"] = "warning"
@@ -893,7 +894,7 @@ class ClassifierTool(BaseTool):
893
894
  self._spacy_nlp.clear()
894
895
 
895
896
  # Clear metrics
896
- self._metrics = {'requests': 0, 'cache_hits': 0, 'processing_time': []}
897
+ self._metrics = {"requests": 0, "cache_hits": 0, "processing_time": []}
897
898
 
898
899
  # Clear rate limiting data
899
900
  self._request_timestamps = []