aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show
  1. aiecs/__init__.py +13 -16
  2. aiecs/__main__.py +7 -7
  3. aiecs/aiecs_client.py +269 -75
  4. aiecs/application/executors/operation_executor.py +79 -54
  5. aiecs/application/knowledge_graph/__init__.py +7 -0
  6. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  7. aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
  8. aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
  11. aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
  12. aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
  13. aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
  14. aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
  15. aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
  16. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  17. aiecs/application/knowledge_graph/extractors/base.py +98 -0
  18. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
  19. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
  20. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
  21. aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
  22. aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
  23. aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
  24. aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
  25. aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
  26. aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
  27. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
  28. aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
  29. aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
  30. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
  31. aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
  32. aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
  33. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
  34. aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
  35. aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
  36. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  37. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
  38. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
  39. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  40. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
  41. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  42. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  43. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
  44. aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
  45. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
  46. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  47. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  48. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
  49. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
  50. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
  51. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
  52. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
  53. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
  54. aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
  55. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
  56. aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
  57. aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
  58. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
  59. aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
  60. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  61. aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
  62. aiecs/application/knowledge_graph/search/reranker.py +293 -0
  63. aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
  64. aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
  65. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  66. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
  67. aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
  68. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  69. aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
  70. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  71. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
  72. aiecs/common/__init__.py +9 -0
  73. aiecs/common/knowledge_graph/__init__.py +17 -0
  74. aiecs/common/knowledge_graph/runnable.py +471 -0
  75. aiecs/config/__init__.py +20 -5
  76. aiecs/config/config.py +762 -31
  77. aiecs/config/graph_config.py +131 -0
  78. aiecs/config/tool_config.py +399 -0
  79. aiecs/core/__init__.py +29 -13
  80. aiecs/core/interface/__init__.py +2 -2
  81. aiecs/core/interface/execution_interface.py +22 -22
  82. aiecs/core/interface/storage_interface.py +37 -88
  83. aiecs/core/registry/__init__.py +31 -0
  84. aiecs/core/registry/service_registry.py +92 -0
  85. aiecs/domain/__init__.py +270 -1
  86. aiecs/domain/agent/__init__.py +191 -0
  87. aiecs/domain/agent/base_agent.py +3870 -0
  88. aiecs/domain/agent/exceptions.py +99 -0
  89. aiecs/domain/agent/graph_aware_mixin.py +569 -0
  90. aiecs/domain/agent/hybrid_agent.py +1435 -0
  91. aiecs/domain/agent/integration/__init__.py +29 -0
  92. aiecs/domain/agent/integration/context_compressor.py +216 -0
  93. aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
  94. aiecs/domain/agent/integration/protocols.py +281 -0
  95. aiecs/domain/agent/integration/retry_policy.py +218 -0
  96. aiecs/domain/agent/integration/role_config.py +213 -0
  97. aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
  98. aiecs/domain/agent/lifecycle.py +291 -0
  99. aiecs/domain/agent/llm_agent.py +692 -0
  100. aiecs/domain/agent/memory/__init__.py +12 -0
  101. aiecs/domain/agent/memory/conversation.py +1124 -0
  102. aiecs/domain/agent/migration/__init__.py +14 -0
  103. aiecs/domain/agent/migration/conversion.py +163 -0
  104. aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
  105. aiecs/domain/agent/models.py +884 -0
  106. aiecs/domain/agent/observability.py +479 -0
  107. aiecs/domain/agent/persistence.py +449 -0
  108. aiecs/domain/agent/prompts/__init__.py +29 -0
  109. aiecs/domain/agent/prompts/builder.py +159 -0
  110. aiecs/domain/agent/prompts/formatters.py +187 -0
  111. aiecs/domain/agent/prompts/template.py +255 -0
  112. aiecs/domain/agent/registry.py +253 -0
  113. aiecs/domain/agent/tool_agent.py +444 -0
  114. aiecs/domain/agent/tools/__init__.py +15 -0
  115. aiecs/domain/agent/tools/schema_generator.py +364 -0
  116. aiecs/domain/community/__init__.py +155 -0
  117. aiecs/domain/community/agent_adapter.py +469 -0
  118. aiecs/domain/community/analytics.py +432 -0
  119. aiecs/domain/community/collaborative_workflow.py +648 -0
  120. aiecs/domain/community/communication_hub.py +634 -0
  121. aiecs/domain/community/community_builder.py +320 -0
  122. aiecs/domain/community/community_integration.py +796 -0
  123. aiecs/domain/community/community_manager.py +803 -0
  124. aiecs/domain/community/decision_engine.py +849 -0
  125. aiecs/domain/community/exceptions.py +231 -0
  126. aiecs/domain/community/models/__init__.py +33 -0
  127. aiecs/domain/community/models/community_models.py +234 -0
  128. aiecs/domain/community/resource_manager.py +461 -0
  129. aiecs/domain/community/shared_context_manager.py +589 -0
  130. aiecs/domain/context/__init__.py +40 -10
  131. aiecs/domain/context/context_engine.py +1910 -0
  132. aiecs/domain/context/conversation_models.py +87 -53
  133. aiecs/domain/context/graph_memory.py +582 -0
  134. aiecs/domain/execution/model.py +12 -4
  135. aiecs/domain/knowledge_graph/__init__.py +19 -0
  136. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  137. aiecs/domain/knowledge_graph/models/entity.py +148 -0
  138. aiecs/domain/knowledge_graph/models/evidence.py +178 -0
  139. aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
  140. aiecs/domain/knowledge_graph/models/path.py +171 -0
  141. aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
  142. aiecs/domain/knowledge_graph/models/query.py +261 -0
  143. aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
  144. aiecs/domain/knowledge_graph/models/relation.py +202 -0
  145. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  146. aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
  147. aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
  148. aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
  149. aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
  150. aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
  151. aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
  152. aiecs/domain/task/dsl_processor.py +172 -56
  153. aiecs/domain/task/model.py +20 -8
  154. aiecs/domain/task/task_context.py +27 -24
  155. aiecs/infrastructure/__init__.py +0 -2
  156. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  157. aiecs/infrastructure/graph_storage/base.py +837 -0
  158. aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
  159. aiecs/infrastructure/graph_storage/cache.py +424 -0
  160. aiecs/infrastructure/graph_storage/distributed.py +223 -0
  161. aiecs/infrastructure/graph_storage/error_handling.py +380 -0
  162. aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
  163. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  164. aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
  165. aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
  166. aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
  167. aiecs/infrastructure/graph_storage/metrics.py +344 -0
  168. aiecs/infrastructure/graph_storage/migration.py +400 -0
  169. aiecs/infrastructure/graph_storage/pagination.py +483 -0
  170. aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
  171. aiecs/infrastructure/graph_storage/postgres.py +1563 -0
  172. aiecs/infrastructure/graph_storage/property_storage.py +353 -0
  173. aiecs/infrastructure/graph_storage/protocols.py +76 -0
  174. aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
  175. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  176. aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
  177. aiecs/infrastructure/graph_storage/streaming.py +487 -0
  178. aiecs/infrastructure/graph_storage/tenant.py +412 -0
  179. aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
  180. aiecs/infrastructure/messaging/websocket_manager.py +51 -35
  181. aiecs/infrastructure/monitoring/__init__.py +22 -0
  182. aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
  183. aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
  184. aiecs/infrastructure/monitoring/structured_logger.py +3 -7
  185. aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
  186. aiecs/infrastructure/persistence/__init__.py +14 -1
  187. aiecs/infrastructure/persistence/context_engine_client.py +184 -0
  188. aiecs/infrastructure/persistence/database_manager.py +67 -43
  189. aiecs/infrastructure/persistence/file_storage.py +180 -103
  190. aiecs/infrastructure/persistence/redis_client.py +74 -21
  191. aiecs/llm/__init__.py +73 -25
  192. aiecs/llm/callbacks/__init__.py +11 -0
  193. aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
  194. aiecs/llm/client_factory.py +224 -36
  195. aiecs/llm/client_resolver.py +155 -0
  196. aiecs/llm/clients/__init__.py +38 -0
  197. aiecs/llm/clients/base_client.py +324 -0
  198. aiecs/llm/clients/google_function_calling_mixin.py +457 -0
  199. aiecs/llm/clients/googleai_client.py +241 -0
  200. aiecs/llm/clients/openai_client.py +158 -0
  201. aiecs/llm/clients/openai_compatible_mixin.py +367 -0
  202. aiecs/llm/clients/vertex_client.py +897 -0
  203. aiecs/llm/clients/xai_client.py +201 -0
  204. aiecs/llm/config/__init__.py +51 -0
  205. aiecs/llm/config/config_loader.py +272 -0
  206. aiecs/llm/config/config_validator.py +206 -0
  207. aiecs/llm/config/model_config.py +143 -0
  208. aiecs/llm/protocols.py +149 -0
  209. aiecs/llm/utils/__init__.py +10 -0
  210. aiecs/llm/utils/validate_config.py +89 -0
  211. aiecs/main.py +140 -121
  212. aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
  213. aiecs/scripts/aid/__init__.py +19 -0
  214. aiecs/scripts/aid/module_checker.py +499 -0
  215. aiecs/scripts/aid/version_manager.py +235 -0
  216. aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
  217. aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
  218. aiecs/scripts/dependance_check/__init__.py +15 -0
  219. aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
  220. aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
  221. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
  222. aiecs/scripts/dependance_patch/__init__.py +7 -0
  223. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  224. aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
  225. aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
  226. aiecs/scripts/knowledge_graph/__init__.py +3 -0
  227. aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
  228. aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
  229. aiecs/scripts/tools_develop/README.md +671 -0
  230. aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
  231. aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
  232. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  233. aiecs/scripts/tools_develop/__init__.py +21 -0
  234. aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
  235. aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
  236. aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
  237. aiecs/scripts/tools_develop/schema_coverage.py +511 -0
  238. aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
  239. aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
  240. aiecs/scripts/tools_develop/verify_tools.py +352 -0
  241. aiecs/tasks/__init__.py +0 -1
  242. aiecs/tasks/worker.py +115 -47
  243. aiecs/tools/__init__.py +194 -72
  244. aiecs/tools/apisource/__init__.py +99 -0
  245. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  246. aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
  247. aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
  248. aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
  249. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  250. aiecs/tools/apisource/monitoring/metrics.py +330 -0
  251. aiecs/tools/apisource/providers/__init__.py +112 -0
  252. aiecs/tools/apisource/providers/base.py +671 -0
  253. aiecs/tools/apisource/providers/census.py +397 -0
  254. aiecs/tools/apisource/providers/fred.py +535 -0
  255. aiecs/tools/apisource/providers/newsapi.py +409 -0
  256. aiecs/tools/apisource/providers/worldbank.py +352 -0
  257. aiecs/tools/apisource/reliability/__init__.py +12 -0
  258. aiecs/tools/apisource/reliability/error_handler.py +363 -0
  259. aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
  260. aiecs/tools/apisource/tool.py +832 -0
  261. aiecs/tools/apisource/utils/__init__.py +9 -0
  262. aiecs/tools/apisource/utils/validators.py +334 -0
  263. aiecs/tools/base_tool.py +415 -21
  264. aiecs/tools/docs/__init__.py +121 -0
  265. aiecs/tools/docs/ai_document_orchestrator.py +607 -0
  266. aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
  267. aiecs/tools/docs/content_insertion_tool.py +1320 -0
  268. aiecs/tools/docs/document_creator_tool.py +1323 -0
  269. aiecs/tools/docs/document_layout_tool.py +1160 -0
  270. aiecs/tools/docs/document_parser_tool.py +1011 -0
  271. aiecs/tools/docs/document_writer_tool.py +1829 -0
  272. aiecs/tools/knowledge_graph/__init__.py +17 -0
  273. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
  274. aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
  275. aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
  276. aiecs/tools/langchain_adapter.py +300 -138
  277. aiecs/tools/schema_generator.py +455 -0
  278. aiecs/tools/search_tool/__init__.py +100 -0
  279. aiecs/tools/search_tool/analyzers.py +581 -0
  280. aiecs/tools/search_tool/cache.py +264 -0
  281. aiecs/tools/search_tool/constants.py +128 -0
  282. aiecs/tools/search_tool/context.py +224 -0
  283. aiecs/tools/search_tool/core.py +778 -0
  284. aiecs/tools/search_tool/deduplicator.py +119 -0
  285. aiecs/tools/search_tool/error_handler.py +242 -0
  286. aiecs/tools/search_tool/metrics.py +343 -0
  287. aiecs/tools/search_tool/rate_limiter.py +172 -0
  288. aiecs/tools/search_tool/schemas.py +275 -0
  289. aiecs/tools/statistics/__init__.py +80 -0
  290. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
  291. aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
  292. aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
  293. aiecs/tools/statistics/data_loader_tool.py +555 -0
  294. aiecs/tools/statistics/data_profiler_tool.py +638 -0
  295. aiecs/tools/statistics/data_transformer_tool.py +580 -0
  296. aiecs/tools/statistics/data_visualizer_tool.py +498 -0
  297. aiecs/tools/statistics/model_trainer_tool.py +507 -0
  298. aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
  299. aiecs/tools/task_tools/__init__.py +49 -36
  300. aiecs/tools/task_tools/chart_tool.py +200 -184
  301. aiecs/tools/task_tools/classfire_tool.py +268 -267
  302. aiecs/tools/task_tools/image_tool.py +175 -131
  303. aiecs/tools/task_tools/office_tool.py +226 -146
  304. aiecs/tools/task_tools/pandas_tool.py +477 -121
  305. aiecs/tools/task_tools/report_tool.py +390 -142
  306. aiecs/tools/task_tools/research_tool.py +149 -79
  307. aiecs/tools/task_tools/scraper_tool.py +339 -145
  308. aiecs/tools/task_tools/stats_tool.py +448 -209
  309. aiecs/tools/temp_file_manager.py +26 -24
  310. aiecs/tools/tool_executor/__init__.py +18 -16
  311. aiecs/tools/tool_executor/tool_executor.py +364 -52
  312. aiecs/utils/LLM_output_structor.py +74 -48
  313. aiecs/utils/__init__.py +14 -3
  314. aiecs/utils/base_callback.py +0 -3
  315. aiecs/utils/cache_provider.py +696 -0
  316. aiecs/utils/execution_utils.py +50 -31
  317. aiecs/utils/prompt_loader.py +1 -0
  318. aiecs/utils/token_usage_repository.py +37 -11
  319. aiecs/ws/socket_server.py +14 -4
  320. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
  321. aiecs-1.7.6.dist-info/RECORD +337 -0
  322. aiecs-1.7.6.dist-info/entry_points.txt +13 -0
  323. aiecs/config/registry.py +0 -19
  324. aiecs/domain/context/content_engine.py +0 -982
  325. aiecs/llm/base_client.py +0 -99
  326. aiecs/llm/openai_client.py +0 -125
  327. aiecs/llm/vertex_client.py +0 -186
  328. aiecs/llm/xai_client.py +0 -184
  329. aiecs/scripts/dependency_checker.py +0 -857
  330. aiecs/scripts/quick_dependency_check.py +0 -269
  331. aiecs/tools/task_tools/search_api.py +0 -7
  332. aiecs-1.0.1.dist-info/RECORD +0 -90
  333. aiecs-1.0.1.dist-info/entry_points.txt +0 -7
  334. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  335. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  336. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  337. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  338. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
  339. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
  340. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,462 @@
1
+ """
2
+ Schema Inference for Structured Data Import
3
+
4
+ Automatically infers schema mappings from data structure, reducing manual configuration effort.
5
+ """
6
+
7
+ from typing import Dict, List, Optional, Any, Union, Set
8
+ from pathlib import Path
9
+ from dataclasses import dataclass
10
+ import re
11
+ import logging
12
+ import warnings
13
+
14
+ from aiecs.application.knowledge_graph.builder.schema_mapping import (
15
+ SchemaMapping,
16
+ EntityMapping,
17
+ RelationMapping,
18
+ )
19
+ from aiecs.domain.knowledge_graph.schema.property_schema import PropertyType
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+ # Check for pandas availability
24
+ try:
25
+ import pandas as pd
26
+ PANDAS_AVAILABLE = True
27
+ except ImportError:
28
+ PANDAS_AVAILABLE = False
29
+
30
+
31
+ @dataclass
32
+ class InferredSchema:
33
+ """
34
+ Result of schema inference
35
+
36
+ Contains inferred entity and relation mappings that can be reviewed and modified.
37
+ """
38
+ entity_mappings: List[EntityMapping]
39
+ relation_mappings: List[RelationMapping]
40
+ confidence_scores: Dict[str, float] # Mapping name -> confidence score (0-1)
41
+ warnings: List[str]
42
+
43
+ def to_schema_mapping(self) -> SchemaMapping:
44
+ """Convert to SchemaMapping for use in pipeline"""
45
+ return SchemaMapping(
46
+ entity_mappings=self.entity_mappings,
47
+ relation_mappings=self.relation_mappings,
48
+ )
49
+
50
+
51
+ class SchemaInference:
52
+ """
53
+ Automatic schema inference from structured data
54
+
55
+ Analyzes data structure and content to automatically generate schema mappings.
56
+ """
57
+
58
+ # Common ID column patterns
59
+ ID_PATTERNS = [
60
+ r'^id$',
61
+ r'^.*_id$',
62
+ r'^key$',
63
+ r'^.*_key$',
64
+ r'^pk$',
65
+ r'^.*_pk$',
66
+ ]
67
+
68
+ # Foreign key patterns (for relation inference)
69
+ FK_PATTERNS = [
70
+ r'^(.+)_id$', # e.g., dept_id -> dept
71
+ r'^(.+)_key$', # e.g., dept_key -> dept
72
+ r'^fk_(.+)$', # e.g., fk_dept -> dept
73
+ ]
74
+
75
+ def __init__(self, sample_size: int = 1000):
76
+ """
77
+ Initialize schema inference
78
+
79
+ Args:
80
+ sample_size: Number of rows to sample for inference (default: 1000)
81
+ """
82
+ self.sample_size = sample_size
83
+
84
+ def infer_from_dataframe(
85
+ self,
86
+ df: 'pd.DataFrame',
87
+ entity_type_hint: Optional[str] = None,
88
+ metadata: Optional[Dict[str, Any]] = None,
89
+ ) -> InferredSchema:
90
+ """
91
+ Infer schema from pandas DataFrame
92
+
93
+ Args:
94
+ df: DataFrame to analyze
95
+ entity_type_hint: Optional hint for entity type name
96
+ metadata: Optional metadata (e.g., SPSS variable labels)
97
+
98
+ Returns:
99
+ InferredSchema with entity and relation mappings
100
+ """
101
+ if not PANDAS_AVAILABLE:
102
+ raise ImportError("pandas is required for schema inference")
103
+
104
+ warnings = []
105
+ confidence_scores = {}
106
+
107
+ # Sample data if too large
108
+ if len(df) > self.sample_size:
109
+ df_sample = df.sample(n=self.sample_size, random_state=42)
110
+ warnings.append(f"Sampled {self.sample_size} rows from {len(df)} for inference")
111
+ else:
112
+ df_sample = df
113
+
114
+ # Detect ID column
115
+ id_column = self._detect_id_column(df_sample)
116
+ if id_column:
117
+ confidence_scores['id_column'] = 0.9
118
+ else:
119
+ warnings.append("No clear ID column detected, will use first column")
120
+ id_column = df.columns[0] if len(df.columns) > 0 else None
121
+ confidence_scores['id_column'] = 0.5
122
+
123
+ # Infer property types
124
+ property_types = self._infer_property_types(df_sample, metadata)
125
+
126
+ # Determine entity type
127
+ entity_type = entity_type_hint or self._infer_entity_type(df.columns.tolist(), id_column)
128
+
129
+ # Create entity mapping
130
+ entity_mapping = EntityMapping(
131
+ source_columns=df.columns.tolist(),
132
+ entity_type=entity_type,
133
+ property_mapping={col: col for col in df.columns},
134
+ id_column=id_column,
135
+ )
136
+
137
+ # Infer relations from foreign key patterns
138
+ relation_mappings = self._infer_relations(df_sample, id_column)
139
+ if relation_mappings:
140
+ confidence_scores['relations'] = 0.7
141
+
142
+ return InferredSchema(
143
+ entity_mappings=[entity_mapping],
144
+ relation_mappings=relation_mappings,
145
+ confidence_scores=confidence_scores,
146
+ warnings=warnings,
147
+ )
148
+
149
+ def _detect_id_column(self, df: 'pd.DataFrame') -> Optional[str]:
150
+ """
151
+ Detect ID column from DataFrame
152
+
153
+ Looks for columns matching ID patterns or columns with unique values.
154
+
155
+ Args:
156
+ df: DataFrame to analyze
157
+
158
+ Returns:
159
+ Name of ID column, or None if not found
160
+ """
161
+ # Check for columns matching ID patterns
162
+ for col in df.columns:
163
+ col_lower = col.lower()
164
+ for pattern in self.ID_PATTERNS:
165
+ if re.match(pattern, col_lower):
166
+ return col
167
+
168
+ # Check for columns with all unique values
169
+ for col in df.columns:
170
+ if df[col].nunique() == len(df):
171
+ return col
172
+
173
+ return None
174
+
175
+ def _infer_property_types(
176
+ self,
177
+ df: 'pd.DataFrame',
178
+ metadata: Optional[Dict[str, Any]] = None,
179
+ ) -> Dict[str, PropertyType]:
180
+ """
181
+ Infer property types from DataFrame columns
182
+
183
+ Args:
184
+ df: DataFrame to analyze
185
+ metadata: Optional metadata (e.g., SPSS variable labels)
186
+
187
+ Returns:
188
+ Dictionary mapping column name to PropertyType
189
+ """
190
+ property_types = {}
191
+
192
+ for col in df.columns:
193
+ # Get pandas dtype
194
+ dtype = df[col].dtype
195
+
196
+ # Infer PropertyType from pandas dtype
197
+ if pd.api.types.is_integer_dtype(dtype):
198
+ property_types[col] = PropertyType.INTEGER
199
+ elif pd.api.types.is_float_dtype(dtype):
200
+ property_types[col] = PropertyType.FLOAT
201
+ elif pd.api.types.is_bool_dtype(dtype):
202
+ property_types[col] = PropertyType.BOOLEAN
203
+ elif pd.api.types.is_datetime64_any_dtype(dtype):
204
+ property_types[col] = PropertyType.DATETIME
205
+ else:
206
+ # Default to string, but check if it could be a date
207
+ if self._could_be_date(df[col]):
208
+ property_types[col] = PropertyType.DATETIME
209
+ else:
210
+ property_types[col] = PropertyType.STRING
211
+
212
+ return property_types
213
+
214
+ def _could_be_date(self, series: 'pd.Series') -> bool:
215
+ """
216
+ Check if a string series could be dates
217
+
218
+ Args:
219
+ series: Pandas series to check
220
+
221
+ Returns:
222
+ True if series looks like dates
223
+ """
224
+ # Sample a few non-null values
225
+ sample = series.dropna().head(10)
226
+ if len(sample) == 0:
227
+ return False
228
+
229
+ # Try to parse as dates
230
+ # Suppress UserWarning about dateutil fallback - this is expected behavior
231
+ # when pandas can't infer the date format automatically
232
+ try:
233
+ with warnings.catch_warnings():
234
+ warnings.simplefilter("ignore", UserWarning)
235
+ pd.to_datetime(sample, errors='raise')
236
+ return True
237
+ except (ValueError, TypeError):
238
+ return False
239
+
240
+ def _infer_entity_type(self, columns: List[str], id_column: Optional[str]) -> str:
241
+ """
242
+ Infer entity type name from column names
243
+
244
+ Args:
245
+ columns: List of column names
246
+ id_column: Name of ID column (if detected)
247
+
248
+ Returns:
249
+ Inferred entity type name
250
+ """
251
+ # If ID column has a prefix, use that as entity type
252
+ if id_column:
253
+ # Try to extract entity type from ID column name
254
+ for pattern in self.FK_PATTERNS:
255
+ match = re.match(pattern, id_column.lower())
256
+ if match:
257
+ entity_type = match.group(1)
258
+ # Capitalize first letter
259
+ return entity_type.capitalize()
260
+
261
+ # Default to "Entity"
262
+ return "Entity"
263
+
264
+ def infer_from_csv(
265
+ self,
266
+ file_path: Union[str, Path],
267
+ encoding: str = "utf-8",
268
+ ) -> InferredSchema:
269
+ """
270
+ Infer schema from CSV file
271
+
272
+ Args:
273
+ file_path: Path to CSV file
274
+ encoding: File encoding
275
+
276
+ Returns:
277
+ InferredSchema with entity and relation mappings
278
+ """
279
+ if not PANDAS_AVAILABLE:
280
+ raise ImportError("pandas is required for CSV schema inference")
281
+
282
+ df = pd.read_csv(file_path, encoding=encoding, nrows=self.sample_size)
283
+ return self.infer_from_dataframe(df)
284
+
285
+ def infer_from_spss(
286
+ self,
287
+ file_path: Union[str, Path],
288
+ encoding: str = "utf-8",
289
+ ) -> InferredSchema:
290
+ """
291
+ Infer schema from SPSS file
292
+
293
+ Uses SPSS variable labels as property names and value labels for categorical data.
294
+
295
+ Args:
296
+ file_path: Path to SPSS file
297
+ encoding: File encoding
298
+
299
+ Returns:
300
+ InferredSchema with entity and relation mappings
301
+ """
302
+ try:
303
+ import pyreadstat # type: ignore[import-untyped]
304
+ except ImportError:
305
+ raise ImportError("pyreadstat is required for SPSS schema inference")
306
+
307
+ if not PANDAS_AVAILABLE:
308
+ raise ImportError("pandas is required for SPSS schema inference")
309
+
310
+ # Read SPSS file with metadata
311
+ df, meta = pyreadstat.read_sav(str(file_path), encoding=encoding, row_limit=self.sample_size)
312
+
313
+ # Extract metadata
314
+ metadata = {
315
+ "column_names": meta.column_names if hasattr(meta, 'column_names') else [],
316
+ "column_labels": meta.column_labels if hasattr(meta, 'column_labels') else [],
317
+ "variable_value_labels": meta.variable_value_labels if hasattr(meta, 'variable_value_labels') else {},
318
+ }
319
+
320
+ return self.infer_from_dataframe(df, metadata=metadata)
321
+
322
+ def _infer_relations(
323
+ self,
324
+ df: 'pd.DataFrame',
325
+ id_column: Optional[str],
326
+ ) -> List[RelationMapping]:
327
+ """
328
+ Infer relation mappings from foreign key patterns
329
+
330
+ Detects columns that look like foreign keys and creates relation mappings.
331
+
332
+ Args:
333
+ df: DataFrame to analyze
334
+ id_column: Name of ID column (source entity)
335
+
336
+ Returns:
337
+ List of inferred RelationMapping objects
338
+ """
339
+ if not id_column:
340
+ return []
341
+
342
+ relation_mappings = []
343
+
344
+ # Look for foreign key columns
345
+ for col in df.columns:
346
+ if col == id_column:
347
+ continue
348
+
349
+ col_lower = col.lower()
350
+
351
+ # Check if column matches FK pattern
352
+ for pattern in self.FK_PATTERNS:
353
+ match = re.match(pattern, col_lower)
354
+ if match:
355
+ # Extract target entity type from FK column name
356
+ target_entity_type = match.group(1).capitalize()
357
+
358
+ # Infer relation type from column name
359
+ # e.g., "dept_id" -> "BELONGS_TO_DEPT" or "HAS_DEPT"
360
+ relation_type = self._infer_relation_type(id_column, col, target_entity_type)
361
+
362
+ # Create relation mapping
363
+ relation_mapping = RelationMapping(
364
+ source_columns=[id_column, col],
365
+ relation_type=relation_type,
366
+ source_entity_column=id_column,
367
+ target_entity_column=col,
368
+ property_mapping={},
369
+ )
370
+
371
+ relation_mappings.append(relation_mapping)
372
+ break
373
+
374
+ return relation_mappings
375
+
376
+ def _infer_relation_type(
377
+ self,
378
+ source_column: str,
379
+ target_column: str,
380
+ target_entity_type: str,
381
+ ) -> str:
382
+ """
383
+ Infer relation type name from column names
384
+
385
+ Args:
386
+ source_column: Source entity column name
387
+ target_column: Target entity (FK) column name
388
+ target_entity_type: Inferred target entity type
389
+
390
+ Returns:
391
+ Inferred relation type name (e.g., "BELONGS_TO", "HAS")
392
+ """
393
+ # Common relation patterns
394
+ # e.g., "emp_id" -> "dept_id" = "WORKS_IN" or "BELONGS_TO"
395
+
396
+ # Extract base names
397
+ source_base = source_column.lower().replace('_id', '').replace('_key', '')
398
+ target_base = target_column.lower().replace('_id', '').replace('_key', '').replace('fk_', '')
399
+
400
+ # Common relation verbs based on context
401
+ if 'dept' in target_base or 'department' in target_base:
402
+ return "WORKS_IN"
403
+ elif 'manager' in target_base or 'supervisor' in target_base:
404
+ return "REPORTS_TO"
405
+ elif 'company' in target_base or 'organization' in target_base:
406
+ return "BELONGS_TO"
407
+ elif 'project' in target_base:
408
+ return "ASSIGNED_TO"
409
+ elif 'team' in target_base or 'group' in target_base:
410
+ return "MEMBER_OF"
411
+ else:
412
+ # Generic relation type
413
+ return f"HAS_{target_entity_type.upper()}"
414
+
415
+ def merge_with_partial_schema(
416
+ self,
417
+ inferred: InferredSchema,
418
+ partial_mapping: SchemaMapping,
419
+ ) -> InferredSchema:
420
+ """
421
+ Merge inferred schema with user-provided partial schema
422
+
423
+ User-defined mappings take precedence over inferred ones.
424
+
425
+ Args:
426
+ inferred: Inferred schema
427
+ partial_mapping: User-provided partial schema mapping
428
+
429
+ Returns:
430
+ Merged InferredSchema
431
+ """
432
+ # Start with user-defined mappings
433
+ entity_mappings = list(partial_mapping.entity_mappings)
434
+ relation_mappings = list(partial_mapping.relation_mappings)
435
+
436
+ # Get entity types already defined by user
437
+ user_entity_types = {em.entity_type for em in partial_mapping.entity_mappings}
438
+
439
+ # Add inferred entity mappings that don't conflict
440
+ for inferred_em in inferred.entity_mappings:
441
+ if inferred_em.entity_type not in user_entity_types:
442
+ entity_mappings.append(inferred_em)
443
+
444
+ # Get relation types already defined by user
445
+ user_relation_types = {
446
+ (rm.source_entity_column, rm.target_entity_column, rm.relation_type)
447
+ for rm in partial_mapping.relation_mappings
448
+ }
449
+
450
+ # Add inferred relation mappings that don't conflict
451
+ for inferred_rm in inferred.relation_mappings:
452
+ key = (inferred_rm.source_entity_column, inferred_rm.target_entity_column, inferred_rm.relation_type)
453
+ if key not in user_relation_types:
454
+ relation_mappings.append(inferred_rm)
455
+
456
+ return InferredSchema(
457
+ entity_mappings=entity_mappings,
458
+ relation_mappings=relation_mappings,
459
+ confidence_scores=inferred.confidence_scores,
460
+ warnings=inferred.warnings + ["Merged with user-provided partial schema"],
461
+ )
462
+