aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show
  1. aiecs/__init__.py +13 -16
  2. aiecs/__main__.py +7 -7
  3. aiecs/aiecs_client.py +269 -75
  4. aiecs/application/executors/operation_executor.py +79 -54
  5. aiecs/application/knowledge_graph/__init__.py +7 -0
  6. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  7. aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
  8. aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
  11. aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
  12. aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
  13. aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
  14. aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
  15. aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
  16. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  17. aiecs/application/knowledge_graph/extractors/base.py +98 -0
  18. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
  19. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
  20. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
  21. aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
  22. aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
  23. aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
  24. aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
  25. aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
  26. aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
  27. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
  28. aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
  29. aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
  30. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
  31. aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
  32. aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
  33. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
  34. aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
  35. aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
  36. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  37. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
  38. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
  39. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  40. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
  41. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  42. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  43. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
  44. aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
  45. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
  46. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  47. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  48. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
  49. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
  50. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
  51. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
  52. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
  53. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
  54. aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
  55. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
  56. aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
  57. aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
  58. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
  59. aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
  60. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  61. aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
  62. aiecs/application/knowledge_graph/search/reranker.py +293 -0
  63. aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
  64. aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
  65. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  66. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
  67. aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
  68. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  69. aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
  70. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  71. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
  72. aiecs/common/__init__.py +9 -0
  73. aiecs/common/knowledge_graph/__init__.py +17 -0
  74. aiecs/common/knowledge_graph/runnable.py +471 -0
  75. aiecs/config/__init__.py +20 -5
  76. aiecs/config/config.py +762 -31
  77. aiecs/config/graph_config.py +131 -0
  78. aiecs/config/tool_config.py +399 -0
  79. aiecs/core/__init__.py +29 -13
  80. aiecs/core/interface/__init__.py +2 -2
  81. aiecs/core/interface/execution_interface.py +22 -22
  82. aiecs/core/interface/storage_interface.py +37 -88
  83. aiecs/core/registry/__init__.py +31 -0
  84. aiecs/core/registry/service_registry.py +92 -0
  85. aiecs/domain/__init__.py +270 -1
  86. aiecs/domain/agent/__init__.py +191 -0
  87. aiecs/domain/agent/base_agent.py +3870 -0
  88. aiecs/domain/agent/exceptions.py +99 -0
  89. aiecs/domain/agent/graph_aware_mixin.py +569 -0
  90. aiecs/domain/agent/hybrid_agent.py +1435 -0
  91. aiecs/domain/agent/integration/__init__.py +29 -0
  92. aiecs/domain/agent/integration/context_compressor.py +216 -0
  93. aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
  94. aiecs/domain/agent/integration/protocols.py +281 -0
  95. aiecs/domain/agent/integration/retry_policy.py +218 -0
  96. aiecs/domain/agent/integration/role_config.py +213 -0
  97. aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
  98. aiecs/domain/agent/lifecycle.py +291 -0
  99. aiecs/domain/agent/llm_agent.py +692 -0
  100. aiecs/domain/agent/memory/__init__.py +12 -0
  101. aiecs/domain/agent/memory/conversation.py +1124 -0
  102. aiecs/domain/agent/migration/__init__.py +14 -0
  103. aiecs/domain/agent/migration/conversion.py +163 -0
  104. aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
  105. aiecs/domain/agent/models.py +884 -0
  106. aiecs/domain/agent/observability.py +479 -0
  107. aiecs/domain/agent/persistence.py +449 -0
  108. aiecs/domain/agent/prompts/__init__.py +29 -0
  109. aiecs/domain/agent/prompts/builder.py +159 -0
  110. aiecs/domain/agent/prompts/formatters.py +187 -0
  111. aiecs/domain/agent/prompts/template.py +255 -0
  112. aiecs/domain/agent/registry.py +253 -0
  113. aiecs/domain/agent/tool_agent.py +444 -0
  114. aiecs/domain/agent/tools/__init__.py +15 -0
  115. aiecs/domain/agent/tools/schema_generator.py +364 -0
  116. aiecs/domain/community/__init__.py +155 -0
  117. aiecs/domain/community/agent_adapter.py +469 -0
  118. aiecs/domain/community/analytics.py +432 -0
  119. aiecs/domain/community/collaborative_workflow.py +648 -0
  120. aiecs/domain/community/communication_hub.py +634 -0
  121. aiecs/domain/community/community_builder.py +320 -0
  122. aiecs/domain/community/community_integration.py +796 -0
  123. aiecs/domain/community/community_manager.py +803 -0
  124. aiecs/domain/community/decision_engine.py +849 -0
  125. aiecs/domain/community/exceptions.py +231 -0
  126. aiecs/domain/community/models/__init__.py +33 -0
  127. aiecs/domain/community/models/community_models.py +234 -0
  128. aiecs/domain/community/resource_manager.py +461 -0
  129. aiecs/domain/community/shared_context_manager.py +589 -0
  130. aiecs/domain/context/__init__.py +40 -10
  131. aiecs/domain/context/context_engine.py +1910 -0
  132. aiecs/domain/context/conversation_models.py +87 -53
  133. aiecs/domain/context/graph_memory.py +582 -0
  134. aiecs/domain/execution/model.py +12 -4
  135. aiecs/domain/knowledge_graph/__init__.py +19 -0
  136. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  137. aiecs/domain/knowledge_graph/models/entity.py +148 -0
  138. aiecs/domain/knowledge_graph/models/evidence.py +178 -0
  139. aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
  140. aiecs/domain/knowledge_graph/models/path.py +171 -0
  141. aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
  142. aiecs/domain/knowledge_graph/models/query.py +261 -0
  143. aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
  144. aiecs/domain/knowledge_graph/models/relation.py +202 -0
  145. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  146. aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
  147. aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
  148. aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
  149. aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
  150. aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
  151. aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
  152. aiecs/domain/task/dsl_processor.py +172 -56
  153. aiecs/domain/task/model.py +20 -8
  154. aiecs/domain/task/task_context.py +27 -24
  155. aiecs/infrastructure/__init__.py +0 -2
  156. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  157. aiecs/infrastructure/graph_storage/base.py +837 -0
  158. aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
  159. aiecs/infrastructure/graph_storage/cache.py +424 -0
  160. aiecs/infrastructure/graph_storage/distributed.py +223 -0
  161. aiecs/infrastructure/graph_storage/error_handling.py +380 -0
  162. aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
  163. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  164. aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
  165. aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
  166. aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
  167. aiecs/infrastructure/graph_storage/metrics.py +344 -0
  168. aiecs/infrastructure/graph_storage/migration.py +400 -0
  169. aiecs/infrastructure/graph_storage/pagination.py +483 -0
  170. aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
  171. aiecs/infrastructure/graph_storage/postgres.py +1563 -0
  172. aiecs/infrastructure/graph_storage/property_storage.py +353 -0
  173. aiecs/infrastructure/graph_storage/protocols.py +76 -0
  174. aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
  175. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  176. aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
  177. aiecs/infrastructure/graph_storage/streaming.py +487 -0
  178. aiecs/infrastructure/graph_storage/tenant.py +412 -0
  179. aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
  180. aiecs/infrastructure/messaging/websocket_manager.py +51 -35
  181. aiecs/infrastructure/monitoring/__init__.py +22 -0
  182. aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
  183. aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
  184. aiecs/infrastructure/monitoring/structured_logger.py +3 -7
  185. aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
  186. aiecs/infrastructure/persistence/__init__.py +14 -1
  187. aiecs/infrastructure/persistence/context_engine_client.py +184 -0
  188. aiecs/infrastructure/persistence/database_manager.py +67 -43
  189. aiecs/infrastructure/persistence/file_storage.py +180 -103
  190. aiecs/infrastructure/persistence/redis_client.py +74 -21
  191. aiecs/llm/__init__.py +73 -25
  192. aiecs/llm/callbacks/__init__.py +11 -0
  193. aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
  194. aiecs/llm/client_factory.py +224 -36
  195. aiecs/llm/client_resolver.py +155 -0
  196. aiecs/llm/clients/__init__.py +38 -0
  197. aiecs/llm/clients/base_client.py +324 -0
  198. aiecs/llm/clients/google_function_calling_mixin.py +457 -0
  199. aiecs/llm/clients/googleai_client.py +241 -0
  200. aiecs/llm/clients/openai_client.py +158 -0
  201. aiecs/llm/clients/openai_compatible_mixin.py +367 -0
  202. aiecs/llm/clients/vertex_client.py +897 -0
  203. aiecs/llm/clients/xai_client.py +201 -0
  204. aiecs/llm/config/__init__.py +51 -0
  205. aiecs/llm/config/config_loader.py +272 -0
  206. aiecs/llm/config/config_validator.py +206 -0
  207. aiecs/llm/config/model_config.py +143 -0
  208. aiecs/llm/protocols.py +149 -0
  209. aiecs/llm/utils/__init__.py +10 -0
  210. aiecs/llm/utils/validate_config.py +89 -0
  211. aiecs/main.py +140 -121
  212. aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
  213. aiecs/scripts/aid/__init__.py +19 -0
  214. aiecs/scripts/aid/module_checker.py +499 -0
  215. aiecs/scripts/aid/version_manager.py +235 -0
  216. aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
  217. aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
  218. aiecs/scripts/dependance_check/__init__.py +15 -0
  219. aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
  220. aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
  221. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
  222. aiecs/scripts/dependance_patch/__init__.py +7 -0
  223. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  224. aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
  225. aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
  226. aiecs/scripts/knowledge_graph/__init__.py +3 -0
  227. aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
  228. aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
  229. aiecs/scripts/tools_develop/README.md +671 -0
  230. aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
  231. aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
  232. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  233. aiecs/scripts/tools_develop/__init__.py +21 -0
  234. aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
  235. aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
  236. aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
  237. aiecs/scripts/tools_develop/schema_coverage.py +511 -0
  238. aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
  239. aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
  240. aiecs/scripts/tools_develop/verify_tools.py +352 -0
  241. aiecs/tasks/__init__.py +0 -1
  242. aiecs/tasks/worker.py +115 -47
  243. aiecs/tools/__init__.py +194 -72
  244. aiecs/tools/apisource/__init__.py +99 -0
  245. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  246. aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
  247. aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
  248. aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
  249. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  250. aiecs/tools/apisource/monitoring/metrics.py +330 -0
  251. aiecs/tools/apisource/providers/__init__.py +112 -0
  252. aiecs/tools/apisource/providers/base.py +671 -0
  253. aiecs/tools/apisource/providers/census.py +397 -0
  254. aiecs/tools/apisource/providers/fred.py +535 -0
  255. aiecs/tools/apisource/providers/newsapi.py +409 -0
  256. aiecs/tools/apisource/providers/worldbank.py +352 -0
  257. aiecs/tools/apisource/reliability/__init__.py +12 -0
  258. aiecs/tools/apisource/reliability/error_handler.py +363 -0
  259. aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
  260. aiecs/tools/apisource/tool.py +832 -0
  261. aiecs/tools/apisource/utils/__init__.py +9 -0
  262. aiecs/tools/apisource/utils/validators.py +334 -0
  263. aiecs/tools/base_tool.py +415 -21
  264. aiecs/tools/docs/__init__.py +121 -0
  265. aiecs/tools/docs/ai_document_orchestrator.py +607 -0
  266. aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
  267. aiecs/tools/docs/content_insertion_tool.py +1320 -0
  268. aiecs/tools/docs/document_creator_tool.py +1323 -0
  269. aiecs/tools/docs/document_layout_tool.py +1160 -0
  270. aiecs/tools/docs/document_parser_tool.py +1011 -0
  271. aiecs/tools/docs/document_writer_tool.py +1829 -0
  272. aiecs/tools/knowledge_graph/__init__.py +17 -0
  273. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
  274. aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
  275. aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
  276. aiecs/tools/langchain_adapter.py +300 -138
  277. aiecs/tools/schema_generator.py +455 -0
  278. aiecs/tools/search_tool/__init__.py +100 -0
  279. aiecs/tools/search_tool/analyzers.py +581 -0
  280. aiecs/tools/search_tool/cache.py +264 -0
  281. aiecs/tools/search_tool/constants.py +128 -0
  282. aiecs/tools/search_tool/context.py +224 -0
  283. aiecs/tools/search_tool/core.py +778 -0
  284. aiecs/tools/search_tool/deduplicator.py +119 -0
  285. aiecs/tools/search_tool/error_handler.py +242 -0
  286. aiecs/tools/search_tool/metrics.py +343 -0
  287. aiecs/tools/search_tool/rate_limiter.py +172 -0
  288. aiecs/tools/search_tool/schemas.py +275 -0
  289. aiecs/tools/statistics/__init__.py +80 -0
  290. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
  291. aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
  292. aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
  293. aiecs/tools/statistics/data_loader_tool.py +555 -0
  294. aiecs/tools/statistics/data_profiler_tool.py +638 -0
  295. aiecs/tools/statistics/data_transformer_tool.py +580 -0
  296. aiecs/tools/statistics/data_visualizer_tool.py +498 -0
  297. aiecs/tools/statistics/model_trainer_tool.py +507 -0
  298. aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
  299. aiecs/tools/task_tools/__init__.py +49 -36
  300. aiecs/tools/task_tools/chart_tool.py +200 -184
  301. aiecs/tools/task_tools/classfire_tool.py +268 -267
  302. aiecs/tools/task_tools/image_tool.py +175 -131
  303. aiecs/tools/task_tools/office_tool.py +226 -146
  304. aiecs/tools/task_tools/pandas_tool.py +477 -121
  305. aiecs/tools/task_tools/report_tool.py +390 -142
  306. aiecs/tools/task_tools/research_tool.py +149 -79
  307. aiecs/tools/task_tools/scraper_tool.py +339 -145
  308. aiecs/tools/task_tools/stats_tool.py +448 -209
  309. aiecs/tools/temp_file_manager.py +26 -24
  310. aiecs/tools/tool_executor/__init__.py +18 -16
  311. aiecs/tools/tool_executor/tool_executor.py +364 -52
  312. aiecs/utils/LLM_output_structor.py +74 -48
  313. aiecs/utils/__init__.py +14 -3
  314. aiecs/utils/base_callback.py +0 -3
  315. aiecs/utils/cache_provider.py +696 -0
  316. aiecs/utils/execution_utils.py +50 -31
  317. aiecs/utils/prompt_loader.py +1 -0
  318. aiecs/utils/token_usage_repository.py +37 -11
  319. aiecs/ws/socket_server.py +14 -4
  320. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
  321. aiecs-1.7.6.dist-info/RECORD +337 -0
  322. aiecs-1.7.6.dist-info/entry_points.txt +13 -0
  323. aiecs/config/registry.py +0 -19
  324. aiecs/domain/context/content_engine.py +0 -982
  325. aiecs/llm/base_client.py +0 -99
  326. aiecs/llm/openai_client.py +0 -125
  327. aiecs/llm/vertex_client.py +0 -186
  328. aiecs/llm/xai_client.py +0 -184
  329. aiecs/scripts/dependency_checker.py +0 -857
  330. aiecs/scripts/quick_dependency_check.py +0 -269
  331. aiecs/tools/task_tools/search_api.py +0 -7
  332. aiecs-1.0.1.dist-info/RECORD +0 -90
  333. aiecs-1.0.1.dist-info/entry_points.txt +0 -7
  334. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  335. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  336. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  337. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  338. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
  339. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
  340. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,396 @@
1
+ """
2
+ Import Speed Optimization Utilities
3
+
4
+ Provides optimizations for structured data import:
5
+ - Parallel batch processing with worker pools
6
+ - Async I/O for file reading
7
+ - Batch size auto-tuning
8
+ - Performance metrics tracking
9
+ - Streaming import for large files
10
+ """
11
+
12
+ import asyncio
13
+ import os
14
+ import time
15
+ import psutil
16
+ from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
17
+ from dataclasses import dataclass, field
18
+ from typing import Any, Callable, Dict, List, Optional, TypeVar
19
+ import logging
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ @dataclass
25
+ class PerformanceMetrics:
26
+ """
27
+ Import performance metrics
28
+
29
+ Tracks detailed timing and throughput information during import.
30
+ """
31
+
32
+ # Timing
33
+ start_time: float = 0.0
34
+ end_time: float = 0.0
35
+ read_time_seconds: float = 0.0
36
+ transform_time_seconds: float = 0.0
37
+ write_time_seconds: float = 0.0
38
+
39
+ # Throughput
40
+ total_rows: int = 0
41
+ rows_per_second: float = 0.0
42
+
43
+ # Memory
44
+ peak_memory_mb: float = 0.0
45
+ initial_memory_mb: float = 0.0
46
+
47
+ # Batch info
48
+ batch_count: int = 0
49
+ avg_batch_time_seconds: float = 0.0
50
+
51
+ # Parallelism
52
+ worker_count: int = 1
53
+ parallel_speedup: float = 1.0
54
+
55
+ def calculate_throughput(self) -> None:
56
+ """Calculate derived metrics after import completes"""
57
+ duration = self.end_time - self.start_time
58
+ if duration > 0:
59
+ self.rows_per_second = self.total_rows / duration
60
+ if self.batch_count > 0:
61
+ total_batch_time = self.read_time_seconds + self.transform_time_seconds + self.write_time_seconds
62
+ self.avg_batch_time_seconds = total_batch_time / self.batch_count
63
+
64
+ def get_summary(self) -> Dict[str, Any]:
65
+ """Get summary dictionary for logging/reporting"""
66
+ duration = self.end_time - self.start_time
67
+ return {
68
+ "total_rows": self.total_rows,
69
+ "duration_seconds": round(duration, 2),
70
+ "rows_per_second": round(self.rows_per_second, 1),
71
+ "read_time_seconds": round(self.read_time_seconds, 2),
72
+ "transform_time_seconds": round(self.transform_time_seconds, 2),
73
+ "write_time_seconds": round(self.write_time_seconds, 2),
74
+ "peak_memory_mb": round(self.peak_memory_mb, 1),
75
+ "batch_count": self.batch_count,
76
+ "worker_count": self.worker_count,
77
+ }
78
+
79
+
80
+ class BatchSizeOptimizer:
81
+ """
82
+ Auto-tunes batch size based on system resources and data characteristics.
83
+
84
+ Factors considered:
85
+ - Available memory
86
+ - Number of columns/properties
87
+ - Data type complexity
88
+ - Historical performance
89
+ """
90
+
91
+ # Memory thresholds
92
+ MIN_BATCH_SIZE = 50
93
+ MAX_BATCH_SIZE = 10000
94
+ DEFAULT_BATCH_SIZE = 1000
95
+
96
+ # Memory allocation per row (estimated)
97
+ BASE_MEMORY_PER_ROW_BYTES = 1024 # 1KB base
98
+ MEMORY_PER_COLUMN_BYTES = 100 # 100 bytes per column
99
+
100
+ def __init__(self, target_memory_percent: float = 0.25):
101
+ """
102
+ Initialize batch size optimizer
103
+
104
+ Args:
105
+ target_memory_percent: Target percentage of available memory to use (0-1)
106
+ """
107
+ self.target_memory_percent = target_memory_percent
108
+ self._batch_times: List[float] = []
109
+ self._current_batch_size = self.DEFAULT_BATCH_SIZE
110
+
111
+ def estimate_batch_size(
112
+ self,
113
+ column_count: int,
114
+ sample_row_size_bytes: Optional[int] = None,
115
+ ) -> int:
116
+ """
117
+ Estimate optimal batch size based on system resources.
118
+
119
+ Args:
120
+ column_count: Number of columns in the data
121
+ sample_row_size_bytes: Optional measured row size
122
+
123
+ Returns:
124
+ Recommended batch size
125
+ """
126
+ try:
127
+ available_memory = psutil.virtual_memory().available
128
+ except Exception:
129
+ # Fallback if psutil fails
130
+ return self.DEFAULT_BATCH_SIZE
131
+
132
+ # Calculate target memory for batches
133
+ target_memory = available_memory * self.target_memory_percent
134
+
135
+ # Estimate memory per row
136
+ if sample_row_size_bytes:
137
+ memory_per_row = sample_row_size_bytes
138
+ else:
139
+ memory_per_row = self.BASE_MEMORY_PER_ROW_BYTES + (column_count * self.MEMORY_PER_COLUMN_BYTES)
140
+
141
+ # Calculate batch size
142
+ batch_size = int(target_memory / memory_per_row)
143
+
144
+ # Clamp to reasonable range
145
+ batch_size = max(self.MIN_BATCH_SIZE, min(batch_size, self.MAX_BATCH_SIZE))
146
+
147
+ self._current_batch_size = batch_size
148
+ logger.debug(f"Estimated batch size: {batch_size} (columns={column_count}, memory_per_row={memory_per_row})")
149
+
150
+ return batch_size
151
+
152
+ def record_batch_time(self, batch_time: float, rows_processed: int) -> None:
153
+ """
154
+ Record batch processing time for adaptive tuning.
155
+
156
+ Args:
157
+ batch_time: Time to process the batch in seconds
158
+ rows_processed: Number of rows processed in the batch
159
+ """
160
+ self._batch_times.append(batch_time / max(rows_processed, 1))
161
+
162
+ def adjust_batch_size(self) -> int:
163
+ """
164
+ Adjust batch size based on historical performance.
165
+
166
+ Returns:
167
+ Adjusted batch size
168
+ """
169
+ if len(self._batch_times) < 3:
170
+ return self._current_batch_size
171
+
172
+ # Calculate average time per row
173
+ recent_times = self._batch_times[-5:]
174
+ avg_time_per_row = sum(recent_times) / len(recent_times)
175
+
176
+ # If processing is fast, increase batch size
177
+ if avg_time_per_row < 0.001: # < 1ms per row
178
+ self._current_batch_size = min(
179
+ self._current_batch_size * 2,
180
+ self.MAX_BATCH_SIZE
181
+ )
182
+ # If processing is slow, decrease batch size
183
+ elif avg_time_per_row > 0.01: # > 10ms per row
184
+ self._current_batch_size = max(
185
+ self._current_batch_size // 2,
186
+ self.MIN_BATCH_SIZE
187
+ )
188
+
189
+ return self._current_batch_size
190
+
191
+
192
+ class ParallelBatchProcessor:
193
+ """
194
+ Processes batches in parallel using a worker pool.
195
+
196
+ Uses ThreadPoolExecutor for I/O-bound work (default) or
197
+ ProcessPoolExecutor for CPU-bound work.
198
+ """
199
+
200
+ def __init__(
201
+ self,
202
+ max_workers: Optional[int] = None,
203
+ use_processes: bool = False,
204
+ ):
205
+ """
206
+ Initialize parallel batch processor.
207
+
208
+ Args:
209
+ max_workers: Maximum number of workers. Default: CPU count - 1
210
+ use_processes: Use ProcessPoolExecutor instead of ThreadPoolExecutor
211
+ """
212
+ if max_workers is None:
213
+ max_workers = max(1, os.cpu_count() - 1) if os.cpu_count() else 1
214
+
215
+ self.max_workers = max_workers
216
+ self.use_processes = use_processes
217
+ self._executor: Optional[ThreadPoolExecutor] = None
218
+ self._progress_lock = asyncio.Lock()
219
+ self._processed_rows = 0
220
+ self._total_rows = 0
221
+
222
+ async def __aenter__(self):
223
+ """Enter async context manager"""
224
+ if self.use_processes:
225
+ self._executor = ProcessPoolExecutor(max_workers=self.max_workers)
226
+ else:
227
+ self._executor = ThreadPoolExecutor(max_workers=self.max_workers)
228
+ return self
229
+
230
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
231
+ """Exit async context manager"""
232
+ if self._executor:
233
+ self._executor.shutdown(wait=True)
234
+ self._executor = None
235
+
236
+ async def process_batches_parallel(
237
+ self,
238
+ batches: List[List[Dict[str, Any]]],
239
+ process_func: Callable[[List[Dict[str, Any]]], Any],
240
+ progress_callback: Optional[Callable[[int, int], None]] = None,
241
+ ) -> List[Any]:
242
+ """
243
+ Process multiple batches in parallel.
244
+
245
+ Args:
246
+ batches: List of batch data (each batch is a list of row dicts)
247
+ process_func: Function to process each batch
248
+ progress_callback: Optional callback(processed_rows, total_rows)
249
+
250
+ Returns:
251
+ List of results from each batch
252
+ """
253
+ if not self._executor:
254
+ raise RuntimeError("ParallelBatchProcessor must be used as async context manager")
255
+
256
+ self._total_rows = sum(len(batch) for batch in batches)
257
+ self._processed_rows = 0
258
+
259
+ loop = asyncio.get_event_loop()
260
+
261
+ async def process_with_progress(batch: List[Dict[str, Any]]) -> Any:
262
+ # Run in thread pool
263
+ result = await loop.run_in_executor(self._executor, process_func, batch)
264
+
265
+ # Update progress
266
+ async with self._progress_lock:
267
+ self._processed_rows += len(batch)
268
+ if progress_callback:
269
+ progress_callback(self._processed_rows, self._total_rows)
270
+
271
+ return result
272
+
273
+ # Create tasks for all batches
274
+ tasks = [process_with_progress(batch) for batch in batches]
275
+
276
+ # Process in parallel
277
+ results = await asyncio.gather(*tasks, return_exceptions=True)
278
+
279
+ return results
280
+
281
+ @property
282
+ def worker_count(self) -> int:
283
+ """Get the number of workers"""
284
+ return self.max_workers
285
+
286
+
287
+ class MemoryTracker:
288
+ """
289
+ Tracks memory usage during import.
290
+ """
291
+
292
+ def __init__(self):
293
+ self._initial_memory = 0
294
+ self._peak_memory = 0
295
+ self._current_memory = 0
296
+
297
+ def start_tracking(self) -> None:
298
+ """Start memory tracking"""
299
+ try:
300
+ process = psutil.Process()
301
+ self._initial_memory = process.memory_info().rss
302
+ self._peak_memory = self._initial_memory
303
+ except Exception:
304
+ pass
305
+
306
+ def update(self) -> None:
307
+ """Update memory tracking"""
308
+ try:
309
+ process = psutil.Process()
310
+ self._current_memory = process.memory_info().rss
311
+ self._peak_memory = max(self._peak_memory, self._current_memory)
312
+ except Exception:
313
+ pass
314
+
315
+ @property
316
+ def initial_memory_mb(self) -> float:
317
+ """Get initial memory in MB"""
318
+ return self._initial_memory / (1024 * 1024)
319
+
320
+ @property
321
+ def peak_memory_mb(self) -> float:
322
+ """Get peak memory in MB"""
323
+ return self._peak_memory / (1024 * 1024)
324
+
325
+ @property
326
+ def current_memory_mb(self) -> float:
327
+ """Get current memory in MB"""
328
+ return self._current_memory / (1024 * 1024)
329
+
330
+
331
+ class StreamingCSVReader:
332
+ """
333
+ Streaming CSV reader for large files.
334
+
335
+ Reads CSV file in chunks without loading entire file into memory.
336
+ """
337
+
338
+ def __init__(
339
+ self,
340
+ file_path: str,
341
+ chunk_size: int = 10000,
342
+ encoding: str = "utf-8",
343
+ delimiter: str = ",",
344
+ ):
345
+ """
346
+ Initialize streaming CSV reader.
347
+
348
+ Args:
349
+ file_path: Path to CSV file
350
+ chunk_size: Number of rows per chunk
351
+ encoding: File encoding
352
+ delimiter: CSV delimiter
353
+ """
354
+ self.file_path = file_path
355
+ self.chunk_size = chunk_size
356
+ self.encoding = encoding
357
+ self.delimiter = delimiter
358
+
359
+ async def read_chunks(self):
360
+ """
361
+ Async generator that yields chunks of data.
362
+
363
+ Yields:
364
+ pandas DataFrame chunks
365
+ """
366
+ try:
367
+ import pandas as pd
368
+ except ImportError:
369
+ raise ImportError("pandas is required for streaming CSV reading")
370
+
371
+ # Use pandas chunked reading
372
+ for chunk in pd.read_csv(
373
+ self.file_path,
374
+ chunksize=self.chunk_size,
375
+ encoding=self.encoding,
376
+ delimiter=self.delimiter,
377
+ ):
378
+ yield chunk
379
+ # Allow other async tasks to run
380
+ await asyncio.sleep(0)
381
+
382
+ def count_rows(self) -> int:
383
+ """
384
+ Count total rows in file (for progress tracking).
385
+
386
+ Returns:
387
+ Total row count (excluding header)
388
+ """
389
+ count = 0
390
+ with open(self.file_path, 'r', encoding=self.encoding) as f:
391
+ # Skip header
392
+ next(f, None)
393
+ for _ in f:
394
+ count += 1
395
+ return count
396
+