aiecs 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. aiecs/__init__.py +72 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +469 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +363 -0
  7. aiecs/application/knowledge_graph/__init__.py +7 -0
  8. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
  11. aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
  12. aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
  13. aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
  14. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  15. aiecs/application/knowledge_graph/extractors/base.py +100 -0
  16. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
  17. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
  18. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
  19. aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
  20. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
  21. aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
  22. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
  23. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
  24. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  25. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
  26. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
  27. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  28. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
  29. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  30. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  31. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
  32. aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
  33. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
  34. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  35. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  36. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
  37. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
  38. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
  39. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
  40. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
  41. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
  42. aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
  43. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
  44. aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
  45. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
  46. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  47. aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
  48. aiecs/application/knowledge_graph/search/reranker.py +295 -0
  49. aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
  50. aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
  51. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  52. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
  53. aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
  54. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  55. aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
  56. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  57. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
  58. aiecs/common/__init__.py +9 -0
  59. aiecs/common/knowledge_graph/__init__.py +17 -0
  60. aiecs/common/knowledge_graph/runnable.py +484 -0
  61. aiecs/config/__init__.py +16 -0
  62. aiecs/config/config.py +498 -0
  63. aiecs/config/graph_config.py +137 -0
  64. aiecs/config/registry.py +23 -0
  65. aiecs/core/__init__.py +46 -0
  66. aiecs/core/interface/__init__.py +34 -0
  67. aiecs/core/interface/execution_interface.py +152 -0
  68. aiecs/core/interface/storage_interface.py +171 -0
  69. aiecs/domain/__init__.py +289 -0
  70. aiecs/domain/agent/__init__.py +189 -0
  71. aiecs/domain/agent/base_agent.py +697 -0
  72. aiecs/domain/agent/exceptions.py +103 -0
  73. aiecs/domain/agent/graph_aware_mixin.py +559 -0
  74. aiecs/domain/agent/hybrid_agent.py +490 -0
  75. aiecs/domain/agent/integration/__init__.py +26 -0
  76. aiecs/domain/agent/integration/context_compressor.py +222 -0
  77. aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
  78. aiecs/domain/agent/integration/retry_policy.py +219 -0
  79. aiecs/domain/agent/integration/role_config.py +213 -0
  80. aiecs/domain/agent/knowledge_aware_agent.py +646 -0
  81. aiecs/domain/agent/lifecycle.py +296 -0
  82. aiecs/domain/agent/llm_agent.py +300 -0
  83. aiecs/domain/agent/memory/__init__.py +12 -0
  84. aiecs/domain/agent/memory/conversation.py +197 -0
  85. aiecs/domain/agent/migration/__init__.py +14 -0
  86. aiecs/domain/agent/migration/conversion.py +160 -0
  87. aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
  88. aiecs/domain/agent/models.py +317 -0
  89. aiecs/domain/agent/observability.py +407 -0
  90. aiecs/domain/agent/persistence.py +289 -0
  91. aiecs/domain/agent/prompts/__init__.py +29 -0
  92. aiecs/domain/agent/prompts/builder.py +161 -0
  93. aiecs/domain/agent/prompts/formatters.py +189 -0
  94. aiecs/domain/agent/prompts/template.py +255 -0
  95. aiecs/domain/agent/registry.py +260 -0
  96. aiecs/domain/agent/tool_agent.py +257 -0
  97. aiecs/domain/agent/tools/__init__.py +12 -0
  98. aiecs/domain/agent/tools/schema_generator.py +221 -0
  99. aiecs/domain/community/__init__.py +155 -0
  100. aiecs/domain/community/agent_adapter.py +477 -0
  101. aiecs/domain/community/analytics.py +481 -0
  102. aiecs/domain/community/collaborative_workflow.py +642 -0
  103. aiecs/domain/community/communication_hub.py +645 -0
  104. aiecs/domain/community/community_builder.py +320 -0
  105. aiecs/domain/community/community_integration.py +800 -0
  106. aiecs/domain/community/community_manager.py +813 -0
  107. aiecs/domain/community/decision_engine.py +879 -0
  108. aiecs/domain/community/exceptions.py +225 -0
  109. aiecs/domain/community/models/__init__.py +33 -0
  110. aiecs/domain/community/models/community_models.py +268 -0
  111. aiecs/domain/community/resource_manager.py +457 -0
  112. aiecs/domain/community/shared_context_manager.py +603 -0
  113. aiecs/domain/context/__init__.py +58 -0
  114. aiecs/domain/context/context_engine.py +989 -0
  115. aiecs/domain/context/conversation_models.py +354 -0
  116. aiecs/domain/context/graph_memory.py +467 -0
  117. aiecs/domain/execution/__init__.py +12 -0
  118. aiecs/domain/execution/model.py +57 -0
  119. aiecs/domain/knowledge_graph/__init__.py +19 -0
  120. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  121. aiecs/domain/knowledge_graph/models/entity.py +130 -0
  122. aiecs/domain/knowledge_graph/models/evidence.py +194 -0
  123. aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
  124. aiecs/domain/knowledge_graph/models/path.py +179 -0
  125. aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
  126. aiecs/domain/knowledge_graph/models/query.py +272 -0
  127. aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
  128. aiecs/domain/knowledge_graph/models/relation.py +136 -0
  129. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  130. aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
  131. aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
  132. aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
  133. aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
  134. aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
  135. aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
  136. aiecs/domain/task/__init__.py +13 -0
  137. aiecs/domain/task/dsl_processor.py +613 -0
  138. aiecs/domain/task/model.py +62 -0
  139. aiecs/domain/task/task_context.py +268 -0
  140. aiecs/infrastructure/__init__.py +24 -0
  141. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  142. aiecs/infrastructure/graph_storage/base.py +601 -0
  143. aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
  144. aiecs/infrastructure/graph_storage/cache.py +429 -0
  145. aiecs/infrastructure/graph_storage/distributed.py +226 -0
  146. aiecs/infrastructure/graph_storage/error_handling.py +390 -0
  147. aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
  148. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  149. aiecs/infrastructure/graph_storage/in_memory.py +514 -0
  150. aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
  151. aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
  152. aiecs/infrastructure/graph_storage/metrics.py +357 -0
  153. aiecs/infrastructure/graph_storage/migration.py +413 -0
  154. aiecs/infrastructure/graph_storage/pagination.py +471 -0
  155. aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
  156. aiecs/infrastructure/graph_storage/postgres.py +871 -0
  157. aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
  158. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  159. aiecs/infrastructure/graph_storage/sqlite.py +623 -0
  160. aiecs/infrastructure/graph_storage/streaming.py +495 -0
  161. aiecs/infrastructure/messaging/__init__.py +13 -0
  162. aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
  163. aiecs/infrastructure/messaging/websocket_manager.py +298 -0
  164. aiecs/infrastructure/monitoring/__init__.py +34 -0
  165. aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
  166. aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
  167. aiecs/infrastructure/monitoring/structured_logger.py +48 -0
  168. aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
  169. aiecs/infrastructure/persistence/__init__.py +24 -0
  170. aiecs/infrastructure/persistence/context_engine_client.py +187 -0
  171. aiecs/infrastructure/persistence/database_manager.py +333 -0
  172. aiecs/infrastructure/persistence/file_storage.py +754 -0
  173. aiecs/infrastructure/persistence/redis_client.py +220 -0
  174. aiecs/llm/__init__.py +86 -0
  175. aiecs/llm/callbacks/__init__.py +11 -0
  176. aiecs/llm/callbacks/custom_callbacks.py +264 -0
  177. aiecs/llm/client_factory.py +420 -0
  178. aiecs/llm/clients/__init__.py +33 -0
  179. aiecs/llm/clients/base_client.py +193 -0
  180. aiecs/llm/clients/googleai_client.py +181 -0
  181. aiecs/llm/clients/openai_client.py +131 -0
  182. aiecs/llm/clients/vertex_client.py +437 -0
  183. aiecs/llm/clients/xai_client.py +184 -0
  184. aiecs/llm/config/__init__.py +51 -0
  185. aiecs/llm/config/config_loader.py +275 -0
  186. aiecs/llm/config/config_validator.py +236 -0
  187. aiecs/llm/config/model_config.py +151 -0
  188. aiecs/llm/utils/__init__.py +10 -0
  189. aiecs/llm/utils/validate_config.py +91 -0
  190. aiecs/main.py +363 -0
  191. aiecs/scripts/__init__.py +3 -0
  192. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  193. aiecs/scripts/aid/__init__.py +19 -0
  194. aiecs/scripts/aid/version_manager.py +215 -0
  195. aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
  196. aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
  197. aiecs/scripts/dependance_check/__init__.py +17 -0
  198. aiecs/scripts/dependance_check/dependency_checker.py +938 -0
  199. aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
  200. aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
  201. aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
  202. aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
  203. aiecs/scripts/dependance_patch/__init__.py +7 -0
  204. aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
  205. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  206. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
  207. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
  208. aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
  209. aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
  210. aiecs/scripts/tools_develop/README.md +449 -0
  211. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  212. aiecs/scripts/tools_develop/__init__.py +21 -0
  213. aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
  214. aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
  215. aiecs/scripts/tools_develop/verify_tools.py +356 -0
  216. aiecs/tasks/__init__.py +1 -0
  217. aiecs/tasks/worker.py +172 -0
  218. aiecs/tools/__init__.py +299 -0
  219. aiecs/tools/apisource/__init__.py +99 -0
  220. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  221. aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
  222. aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
  223. aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
  224. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  225. aiecs/tools/apisource/monitoring/metrics.py +303 -0
  226. aiecs/tools/apisource/providers/__init__.py +115 -0
  227. aiecs/tools/apisource/providers/base.py +664 -0
  228. aiecs/tools/apisource/providers/census.py +401 -0
  229. aiecs/tools/apisource/providers/fred.py +564 -0
  230. aiecs/tools/apisource/providers/newsapi.py +412 -0
  231. aiecs/tools/apisource/providers/worldbank.py +357 -0
  232. aiecs/tools/apisource/reliability/__init__.py +12 -0
  233. aiecs/tools/apisource/reliability/error_handler.py +375 -0
  234. aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
  235. aiecs/tools/apisource/tool.py +850 -0
  236. aiecs/tools/apisource/utils/__init__.py +9 -0
  237. aiecs/tools/apisource/utils/validators.py +338 -0
  238. aiecs/tools/base_tool.py +201 -0
  239. aiecs/tools/docs/__init__.py +121 -0
  240. aiecs/tools/docs/ai_document_orchestrator.py +599 -0
  241. aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
  242. aiecs/tools/docs/content_insertion_tool.py +1333 -0
  243. aiecs/tools/docs/document_creator_tool.py +1317 -0
  244. aiecs/tools/docs/document_layout_tool.py +1166 -0
  245. aiecs/tools/docs/document_parser_tool.py +994 -0
  246. aiecs/tools/docs/document_writer_tool.py +1818 -0
  247. aiecs/tools/knowledge_graph/__init__.py +17 -0
  248. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
  249. aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
  250. aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
  251. aiecs/tools/langchain_adapter.py +542 -0
  252. aiecs/tools/schema_generator.py +275 -0
  253. aiecs/tools/search_tool/__init__.py +100 -0
  254. aiecs/tools/search_tool/analyzers.py +589 -0
  255. aiecs/tools/search_tool/cache.py +260 -0
  256. aiecs/tools/search_tool/constants.py +128 -0
  257. aiecs/tools/search_tool/context.py +216 -0
  258. aiecs/tools/search_tool/core.py +749 -0
  259. aiecs/tools/search_tool/deduplicator.py +123 -0
  260. aiecs/tools/search_tool/error_handler.py +271 -0
  261. aiecs/tools/search_tool/metrics.py +371 -0
  262. aiecs/tools/search_tool/rate_limiter.py +178 -0
  263. aiecs/tools/search_tool/schemas.py +277 -0
  264. aiecs/tools/statistics/__init__.py +80 -0
  265. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
  266. aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
  267. aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
  268. aiecs/tools/statistics/data_loader_tool.py +564 -0
  269. aiecs/tools/statistics/data_profiler_tool.py +658 -0
  270. aiecs/tools/statistics/data_transformer_tool.py +573 -0
  271. aiecs/tools/statistics/data_visualizer_tool.py +495 -0
  272. aiecs/tools/statistics/model_trainer_tool.py +487 -0
  273. aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
  274. aiecs/tools/task_tools/__init__.py +86 -0
  275. aiecs/tools/task_tools/chart_tool.py +732 -0
  276. aiecs/tools/task_tools/classfire_tool.py +922 -0
  277. aiecs/tools/task_tools/image_tool.py +447 -0
  278. aiecs/tools/task_tools/office_tool.py +684 -0
  279. aiecs/tools/task_tools/pandas_tool.py +635 -0
  280. aiecs/tools/task_tools/report_tool.py +635 -0
  281. aiecs/tools/task_tools/research_tool.py +392 -0
  282. aiecs/tools/task_tools/scraper_tool.py +715 -0
  283. aiecs/tools/task_tools/stats_tool.py +688 -0
  284. aiecs/tools/temp_file_manager.py +130 -0
  285. aiecs/tools/tool_executor/__init__.py +37 -0
  286. aiecs/tools/tool_executor/tool_executor.py +881 -0
  287. aiecs/utils/LLM_output_structor.py +445 -0
  288. aiecs/utils/__init__.py +34 -0
  289. aiecs/utils/base_callback.py +47 -0
  290. aiecs/utils/cache_provider.py +695 -0
  291. aiecs/utils/execution_utils.py +184 -0
  292. aiecs/utils/logging.py +1 -0
  293. aiecs/utils/prompt_loader.py +14 -0
  294. aiecs/utils/token_usage_repository.py +323 -0
  295. aiecs/ws/__init__.py +0 -0
  296. aiecs/ws/socket_server.py +52 -0
  297. aiecs-1.5.1.dist-info/METADATA +608 -0
  298. aiecs-1.5.1.dist-info/RECORD +302 -0
  299. aiecs-1.5.1.dist-info/WHEEL +5 -0
  300. aiecs-1.5.1.dist-info/entry_points.txt +10 -0
  301. aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
  302. aiecs-1.5.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,396 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Automated script to download required NLP data for AIECS ClassifierTool.
4
+
5
+ This script downloads:
6
+ 1. NLTK stopwords data package for keyword extraction
7
+ 2. spaCy English model (en_core_web_sm) for text processing
8
+ 3. spaCy Chinese model (zh_core_web_sm) for Chinese text processing
9
+ """
10
+
11
+ import sys
12
+ import subprocess
13
+ import logging
14
+ from typing import List, Tuple
15
+
16
+
17
+ def setup_logging():
18
+ """Setup logging configuration."""
19
+ logging.basicConfig(
20
+ level=logging.INFO,
21
+ format="%(asctime)s - %(levelname)s - %(message)s",
22
+ handlers=[
23
+ logging.StreamHandler(sys.stdout),
24
+ logging.FileHandler("nlp_data_download.log"),
25
+ ],
26
+ )
27
+ return logging.getLogger(__name__)
28
+
29
+
30
+ def run_command(cmd: List[str], logger: logging.Logger) -> Tuple[bool, str]:
31
+ """
32
+ Run a shell command and return success status and output.
33
+
34
+ Args:
35
+ cmd: List of command arguments
36
+ logger: Logger instance
37
+
38
+ Returns:
39
+ Tuple of (success, output)
40
+ """
41
+ try:
42
+ logger.info(f"Running command: {' '.join(cmd)}")
43
+ result = subprocess.run(cmd, capture_output=True, text=True, check=True)
44
+ logger.info(f"Command succeeded: {result.stdout}")
45
+ return True, result.stdout
46
+ except subprocess.CalledProcessError as e:
47
+ error_msg = f"Command failed with exit code {e.returncode}: {e.stderr}"
48
+ logger.error(error_msg)
49
+ return False, error_msg
50
+ except FileNotFoundError:
51
+ error_msg = f"Command not found: {cmd[0]}"
52
+ logger.error(error_msg)
53
+ return False, error_msg
54
+
55
+
56
+ def check_python_package(package_name: str, logger: logging.Logger) -> bool:
57
+ """
58
+ Check if a Python package is installed.
59
+
60
+ Args:
61
+ package_name: Name of the package to check
62
+ logger: Logger instance
63
+
64
+ Returns:
65
+ True if package is installed, False otherwise
66
+ """
67
+ try:
68
+ __import__(package_name)
69
+ logger.info(f"Package {package_name} is already installed")
70
+ return True
71
+ except ImportError:
72
+ logger.warning(f"Package {package_name} is not installed")
73
+ return False
74
+
75
+
76
+ def download_nltk_data(logger: logging.Logger) -> bool:
77
+ """
78
+ Download required NLTK data packages.
79
+
80
+ Args:
81
+ logger: Logger instance
82
+
83
+ Returns:
84
+ True if successful, False otherwise
85
+ """
86
+ logger.info("Starting NLTK data download...")
87
+
88
+ if not check_python_package("nltk", logger):
89
+ logger.error("NLTK is not installed. Please install it first with: pip install nltk")
90
+ return False
91
+
92
+ try:
93
+ import nltk
94
+
95
+ # Download required NLTK data
96
+ packages_to_download = [
97
+ "stopwords",
98
+ "punkt",
99
+ "punkt_tab", # Added for RAKE-NLTK compatibility
100
+ "wordnet",
101
+ "averaged_perceptron_tagger",
102
+ ]
103
+
104
+ for package in packages_to_download:
105
+ try:
106
+ logger.info(f"Downloading NLTK package: {package}")
107
+ nltk.download(package, quiet=True)
108
+ logger.info(f"Successfully downloaded NLTK package: {package}")
109
+ except Exception as e:
110
+ logger.error(f"Failed to download NLTK package {package}: {e}")
111
+ return False
112
+
113
+ logger.info("All NLTK data packages downloaded successfully")
114
+ return True
115
+
116
+ except Exception as e:
117
+ logger.error(f"Error downloading NLTK data: {e}")
118
+ return False
119
+
120
+
121
+ def download_spacy_model(model_name: str, logger: logging.Logger) -> bool:
122
+ """
123
+ Download a spaCy model.
124
+
125
+ Args:
126
+ model_name: Name of the spaCy model to download
127
+ logger: Logger instance
128
+
129
+ Returns:
130
+ True if successful, False otherwise
131
+ """
132
+ logger.info(f"Starting spaCy model download: {model_name}")
133
+
134
+ if not check_python_package("spacy", logger):
135
+ logger.error("spaCy is not installed. Please install it first with: pip install spacy")
136
+ return False
137
+
138
+ # Check if model is already installed
139
+ try:
140
+ import spacy
141
+
142
+ spacy.load(model_name)
143
+ logger.info(f"spaCy model {model_name} is already installed")
144
+ return True
145
+ except OSError:
146
+ # Model not installed, proceed with download
147
+ pass
148
+ except Exception as e:
149
+ logger.error(f"Error checking spaCy model {model_name}: {e}")
150
+ return False
151
+
152
+ # Download the model
153
+ cmd = [sys.executable, "-m", "spacy", "download", model_name]
154
+ success, output = run_command(cmd, logger)
155
+
156
+ if success:
157
+ logger.info(f"Successfully downloaded spaCy model: {model_name}")
158
+
159
+ # Verify the model can be loaded
160
+ try:
161
+ import spacy
162
+
163
+ spacy.load(model_name)
164
+ logger.info(f"Verified spaCy model {model_name} can be loaded")
165
+ return True
166
+ except Exception as e:
167
+ logger.error(f"Downloaded model {model_name} cannot be loaded: {e}")
168
+ return False
169
+ else:
170
+ logger.error(f"Failed to download spaCy model {model_name}: {output}")
171
+ return False
172
+
173
+
174
+ def download_spacy_pkuseg_model(logger: logging.Logger) -> bool:
175
+ """
176
+ Download and install spaCy PKUSeg model for Chinese text segmentation.
177
+
178
+ Args:
179
+ logger: Logger instance
180
+
181
+ Returns:
182
+ True if successful, False otherwise
183
+ """
184
+ logger.info("Starting spaCy PKUSeg model installation...")
185
+
186
+ if not check_python_package("spacy", logger):
187
+ logger.error("spaCy is not installed. Please install it first with: pip install spacy")
188
+ return False
189
+
190
+ # Check if spacy_pkuseg is already installed
191
+ if check_python_package("spacy_pkuseg", logger):
192
+ logger.info("spacy_pkuseg is already installed")
193
+ return True
194
+
195
+ # Install spacy_pkuseg package
196
+ cmd = [sys.executable, "-m", "pip", "install", "spacy_pkuseg"]
197
+ success, output = run_command(cmd, logger)
198
+
199
+ if success:
200
+ logger.info("Successfully installed spacy_pkuseg")
201
+
202
+ # Verify the package can be imported
203
+ try:
204
+ import spacy_pkuseg
205
+
206
+ logger.info("Verified spacy_pkuseg can be imported")
207
+
208
+ # Test basic functionality
209
+ seg = spacy_pkuseg.pkuseg()
210
+ test_result = seg.cut("čæ™ę˜Æäø€äøŖęµ‹čÆ•å„å­")
211
+ logger.info(f"spacy_pkuseg test successful: {list(test_result)}")
212
+ return True
213
+ except Exception as e:
214
+ logger.error(f"Installed spacy_pkuseg cannot be used: {e}")
215
+ return False
216
+ else:
217
+ logger.error(f"Failed to install spacy_pkuseg: {output}")
218
+ return False
219
+
220
+
221
+ def download_rake_nltk_data(logger: logging.Logger) -> bool:
222
+ """
223
+ Ensure RAKE-NLTK has required data.
224
+
225
+ Args:
226
+ logger: Logger instance
227
+
228
+ Returns:
229
+ True if successful, False otherwise
230
+ """
231
+ logger.info("Checking RAKE-NLTK data...")
232
+
233
+ if not check_python_package("rake_nltk", logger):
234
+ logger.warning(
235
+ "RAKE-NLTK is not installed. This is optional for English keyword extraction."
236
+ )
237
+ return True # Not critical, return True
238
+
239
+ try:
240
+ from rake_nltk import Rake
241
+
242
+ # Test RAKE functionality
243
+ rake = Rake()
244
+ rake.extract_keywords_from_text("This is a test sentence for RAKE.")
245
+ rake.get_ranked_phrases()
246
+ logger.info("RAKE-NLTK is working correctly")
247
+ return True
248
+ except Exception as e:
249
+ logger.warning(f"RAKE-NLTK test failed: {e}. This is not critical.")
250
+ return True # Not critical, return True
251
+
252
+
253
+ def verify_installation(logger: logging.Logger) -> bool:
254
+ """
255
+ Verify all NLP components are properly installed.
256
+
257
+ Args:
258
+ logger: Logger instance
259
+
260
+ Returns:
261
+ True if all components work, False otherwise
262
+ """
263
+ logger.info("Verifying NLP data installation...")
264
+
265
+ success = True
266
+
267
+ # Test NLTK
268
+ try:
269
+ from nltk.corpus import stopwords
270
+
271
+ english_stopwords = stopwords.words("english")
272
+ logger.info(
273
+ f"NLTK verification successful. Loaded {len(english_stopwords)} English stopwords"
274
+ )
275
+ except Exception as e:
276
+ logger.error(f"NLTK verification failed: {e}")
277
+ success = False
278
+
279
+ # Test spaCy English model
280
+ try:
281
+ import spacy
282
+
283
+ nlp_en = spacy.load("en_core_web_sm")
284
+ doc = nlp_en("This is a test sentence.")
285
+ logger.info(f"spaCy English model verification successful. Processed {len(doc)} tokens")
286
+ except Exception as e:
287
+ logger.error(f"spaCy English model verification failed: {e}")
288
+ success = False
289
+
290
+ # Test spaCy Chinese model (optional)
291
+ try:
292
+ import spacy
293
+
294
+ nlp_zh = spacy.load("zh_core_web_sm")
295
+ doc = nlp_zh("čæ™ę˜Æäø€äøŖęµ‹čÆ•å„å­ć€‚")
296
+ logger.info(f"spaCy Chinese model verification successful. Processed {len(doc)} tokens")
297
+ except Exception as e:
298
+ logger.warning(f"spaCy Chinese model verification failed: {e}. This is optional.")
299
+
300
+ # Test spaCy PKUSeg model (optional)
301
+ try:
302
+ import spacy_pkuseg
303
+
304
+ seg = spacy_pkuseg.pkuseg()
305
+ result = list(seg.cut("čæ™ę˜Æäø€äøŖęµ‹čÆ•å„å­"))
306
+ logger.info(f"spaCy PKUSeg model verification successful. Segmented: {result}")
307
+ except Exception as e:
308
+ logger.warning(f"spaCy PKUSeg model verification failed: {e}. This is optional.")
309
+
310
+ return success
311
+
312
+
313
+ def download_all_nlp_data():
314
+ """Download all required NLP data."""
315
+ logger = setup_logging()
316
+ logger.info("Starting AIECS NLP data download process...")
317
+
318
+ success = True
319
+
320
+ # Download NLTK data
321
+ if not download_nltk_data(logger):
322
+ success = False
323
+
324
+ # Download spaCy English model
325
+ if not download_spacy_model("en_core_web_sm", logger):
326
+ success = False
327
+
328
+ # Download spaCy Chinese model (optional)
329
+ if not download_spacy_model("zh_core_web_sm", logger):
330
+ logger.warning("Chinese model download failed, but this is optional")
331
+ # Don't mark as failure for Chinese model
332
+
333
+ # Download spaCy Chinese segmentation model (optional)
334
+ if not download_spacy_pkuseg_model(logger):
335
+ logger.warning("spaCy PKUSeg model download failed, but this is optional")
336
+ # Don't mark as failure for PKUSeg model
337
+
338
+ # Check RAKE-NLTK (optional)
339
+ download_rake_nltk_data(logger)
340
+
341
+ # Verify installation
342
+ if success and verify_installation(logger):
343
+ logger.info("āœ… All NLP data downloaded and verified successfully!")
344
+ logger.info("AIECS ClassifierTool is ready to use.")
345
+ return 0
346
+ else:
347
+ logger.error("āŒ Some NLP data downloads failed. Please check the logs above.")
348
+ logger.error("You may need to install missing packages or run this script again.")
349
+ return 1
350
+
351
+
352
+ def main():
353
+ """Main entry point with argument parsing."""
354
+ import argparse
355
+
356
+ parser = argparse.ArgumentParser(
357
+ description="Download NLP data for AIECS tools",
358
+ formatter_class=argparse.RawDescriptionHelpFormatter,
359
+ epilog="""
360
+ Examples:
361
+ # Show this help message
362
+ aiecs-download-nlp-data --help
363
+
364
+ # Download all NLP data
365
+ aiecs-download-nlp-data --download
366
+ aiecs-download-nlp-data -d
367
+
368
+ NLP Data Includes:
369
+ - NLTK packages: stopwords, punkt, wordnet, averaged_perceptron_tagger
370
+ - spaCy models: en_core_web_sm (English), zh_core_web_sm (Chinese, optional)
371
+ - spaCy PKUSeg model (Chinese segmentation, optional)
372
+ - RAKE-NLTK data (keyword extraction, optional)
373
+ """,
374
+ )
375
+
376
+ parser.add_argument(
377
+ "-d",
378
+ "--download",
379
+ action="store_true",
380
+ help="Download all NLP data packages",
381
+ )
382
+
383
+ args = parser.parse_args()
384
+
385
+ # If no arguments provided, show help
386
+ if not args.download:
387
+ parser.print_help()
388
+ print("\nāš ļø No action specified. Use --download or -d to download NLP data.")
389
+ return 0
390
+
391
+ # Execute download
392
+ return download_all_nlp_data()
393
+
394
+
395
+ if __name__ == "__main__":
396
+ sys.exit(main())
@@ -0,0 +1,270 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Quick dependency checker for AIECS post-installation.
4
+
5
+ This script performs a fast check of critical dependencies and provides
6
+ installation guidance for missing components.
7
+ """
8
+
9
+ import sys
10
+ import subprocess
11
+ import platform
12
+ import logging
13
+ from typing import Dict, List
14
+
15
+
16
+ class QuickDependencyChecker:
17
+ """Quick dependency checker for post-installation."""
18
+
19
+ def __init__(self):
20
+ self.logger = self._setup_logging()
21
+ self.system = platform.system().lower()
22
+ self.issues = []
23
+ self.critical_issues = []
24
+
25
+ def _setup_logging(self) -> logging.Logger:
26
+ """Setup logging configuration."""
27
+ logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
28
+ return logging.getLogger(__name__)
29
+
30
+ def check_command(self, command: str, version_flag: str = "--version") -> bool:
31
+ """Check if a system command is available."""
32
+ try:
33
+ result = subprocess.run(
34
+ [command, version_flag],
35
+ capture_output=True,
36
+ text=True,
37
+ timeout=5,
38
+ )
39
+ return result.returncode == 0
40
+ except (
41
+ subprocess.TimeoutExpired,
42
+ FileNotFoundError,
43
+ subprocess.CalledProcessError,
44
+ ):
45
+ return False
46
+
47
+ def check_python_package(self, package_name: str) -> bool:
48
+ """Check if a Python package is installed."""
49
+ try:
50
+ __import__(package_name)
51
+ return True
52
+ except ImportError:
53
+ return False
54
+
55
+ def check_critical_dependencies(self) -> Dict[str, bool]:
56
+ """Check critical dependencies that affect core functionality."""
57
+ results = {}
58
+
59
+ # Core Python packages
60
+ core_packages = [
61
+ "fastapi",
62
+ "uvicorn",
63
+ "pydantic",
64
+ "httpx",
65
+ "celery",
66
+ "redis",
67
+ "pandas",
68
+ "numpy",
69
+ "scipy",
70
+ "scikit-learn",
71
+ "matplotlib",
72
+ ]
73
+
74
+ for pkg in core_packages:
75
+ results[f"python_{pkg}"] = self.check_python_package(pkg)
76
+ if not results[f"python_{pkg}"]:
77
+ self.critical_issues.append(f"Missing Python package: {pkg}")
78
+
79
+ # System dependencies for tools
80
+ system_deps = {
81
+ "java": ("Java Runtime Environment", "java", "-version"),
82
+ "tesseract": ("Tesseract OCR", "tesseract", "--version"),
83
+ }
84
+
85
+ for key, (name, cmd, flag) in system_deps.items():
86
+ results[f"system_{key}"] = self.check_command(cmd, flag)
87
+ if not results[f"system_{key}"]:
88
+ self.issues.append(f"Missing system dependency: {name}")
89
+
90
+ return results
91
+
92
+ def check_tool_specific_dependencies(self) -> Dict[str, Dict[str, bool]]:
93
+ """Check dependencies for specific tools."""
94
+ tool_results = {}
95
+
96
+ # Image Tool dependencies
97
+ image_deps = {
98
+ "tesseract": self.check_command("tesseract"),
99
+ "PIL": self.check_python_package("PIL"),
100
+ "pytesseract": self.check_python_package("pytesseract"),
101
+ }
102
+ tool_results["image"] = image_deps
103
+
104
+ # ClassFire Tool dependencies
105
+ classfire_deps = {
106
+ "spacy": self.check_python_package("spacy"),
107
+ "nltk": self.check_python_package("nltk"),
108
+ "transformers": self.check_python_package("transformers"),
109
+ }
110
+ tool_results["classfire"] = classfire_deps
111
+
112
+ # Office Tool dependencies
113
+ office_deps = {
114
+ "java": self.check_command("java"),
115
+ "tika": self.check_python_package("tika"),
116
+ "python-docx": self.check_python_package("python-docx"),
117
+ "openpyxl": self.check_python_package("openpyxl"),
118
+ }
119
+ tool_results["office"] = office_deps
120
+
121
+ # Stats Tool dependencies
122
+ stats_deps = {
123
+ "pandas": self.check_python_package("pandas"),
124
+ "pyreadstat": self.check_python_package("pyreadstat"),
125
+ "statsmodels": self.check_python_package("statsmodels"),
126
+ }
127
+ tool_results["stats"] = stats_deps
128
+
129
+ # Report Tool dependencies
130
+ report_deps = {
131
+ "jinja2": self.check_python_package("jinja2"),
132
+ "matplotlib": self.check_python_package("matplotlib"),
133
+ "weasyprint": self.check_python_package("weasyprint"),
134
+ }
135
+ tool_results["report"] = report_deps
136
+
137
+ # Scraper Tool dependencies
138
+ scraper_deps = {
139
+ "playwright": self.check_python_package("playwright"),
140
+ "beautifulsoup4": self.check_python_package("beautifulsoup4"),
141
+ "scrapy": self.check_python_package("scrapy"),
142
+ }
143
+ tool_results["scraper"] = scraper_deps
144
+
145
+ return tool_results
146
+
147
+ def get_installation_commands(self) -> Dict[str, List[str]]:
148
+ """Get installation commands for missing dependencies."""
149
+ commands = {"system": [], "python": [], "models": []}
150
+
151
+ # System dependencies
152
+ if self.system == "linux":
153
+ if not self.check_command("java"):
154
+ commands["system"].append("sudo apt-get install openjdk-11-jdk")
155
+ if not self.check_command("tesseract"):
156
+ commands["system"].append("sudo apt-get install tesseract-ocr tesseract-ocr-eng")
157
+ elif self.system == "darwin":
158
+ if not self.check_command("java"):
159
+ commands["system"].append("brew install openjdk@11")
160
+ if not self.check_command("tesseract"):
161
+ commands["system"].append("brew install tesseract")
162
+
163
+ # Python packages (these should already be installed via pip)
164
+ missing_packages = []
165
+ for issue in self.critical_issues:
166
+ if "Missing Python package:" in issue:
167
+ pkg = issue.split(": ")[1]
168
+ missing_packages.append(pkg)
169
+
170
+ if missing_packages:
171
+ commands["python"].append(f"pip install {' '.join(missing_packages)}")
172
+
173
+ # Models and data
174
+ commands["models"].append("python -m aiecs.scripts.download_nlp_data")
175
+ commands["models"].append("playwright install")
176
+
177
+ return commands
178
+
179
+ def generate_quick_report(self) -> str:
180
+ """Generate a quick dependency report."""
181
+ report = []
182
+ report.append("šŸ” AIECS Quick Dependency Check")
183
+ report.append("=" * 50)
184
+
185
+ # Check critical dependencies
186
+ critical_results = self.check_critical_dependencies()
187
+ tool_results = self.check_tool_specific_dependencies()
188
+
189
+ # Critical dependencies status
190
+ report.append("\nšŸ“¦ Critical Dependencies:")
191
+ critical_ok = all(critical_results.values())
192
+ if critical_ok:
193
+ report.append("āœ… All critical dependencies are available")
194
+ else:
195
+ report.append("āŒ Some critical dependencies are missing")
196
+ for key, available in critical_results.items():
197
+ if not available:
198
+ dep_name = key.replace("python_", "").replace("system_", "")
199
+ report.append(f" āŒ {dep_name}")
200
+
201
+ # Tool-specific dependencies
202
+ report.append("\nšŸ”§ Tool-Specific Dependencies:")
203
+ for tool, deps in tool_results.items():
204
+ tool_ok = all(deps.values())
205
+ status = "āœ…" if tool_ok else "āš ļø"
206
+ report.append(f" {status} {tool.title()} Tool")
207
+
208
+ if not tool_ok:
209
+ for dep, available in deps.items():
210
+ if not available:
211
+ report.append(f" āŒ {dep}")
212
+
213
+ # Installation commands
214
+ commands = self.get_installation_commands()
215
+ if any(commands.values()):
216
+ report.append("\nšŸ› ļø Installation Commands:")
217
+
218
+ if commands["system"]:
219
+ report.append(" System Dependencies:")
220
+ for cmd in commands["system"]:
221
+ report.append(f" {cmd}")
222
+
223
+ if commands["python"]:
224
+ report.append(" Python Packages:")
225
+ for cmd in commands["python"]:
226
+ report.append(f" {cmd}")
227
+
228
+ if commands["models"]:
229
+ report.append(" Models and Data:")
230
+ for cmd in commands["models"]:
231
+ report.append(f" {cmd}")
232
+
233
+ # Summary
234
+ total_issues = len(self.issues) + len(self.critical_issues)
235
+ if total_issues == 0:
236
+ report.append("\nšŸŽ‰ All dependencies are available!")
237
+ report.append("AIECS is ready to use with full functionality.")
238
+ else:
239
+ report.append(f"\nāš ļø Found {total_issues} dependency issues.")
240
+ if self.critical_issues:
241
+ report.append(f" Critical: {len(self.critical_issues)}")
242
+ if self.issues:
243
+ report.append(f" Optional: {len(self.issues)}")
244
+ report.append("Please install missing dependencies for full functionality.")
245
+
246
+ return "\n".join(report)
247
+
248
+ def run_check(self) -> int:
249
+ """Run the quick dependency check."""
250
+ print("šŸ” Running quick dependency check...")
251
+
252
+ # Generate and display report
253
+ report = self.generate_quick_report()
254
+ print(report)
255
+
256
+ # Return exit code
257
+ if self.critical_issues:
258
+ return 1
259
+ else:
260
+ return 0
261
+
262
+
263
+ def main():
264
+ """Main function."""
265
+ checker = QuickDependencyChecker()
266
+ return checker.run_check()
267
+
268
+
269
+ if __name__ == "__main__":
270
+ sys.exit(main())