aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show
  1. aiecs/__init__.py +13 -16
  2. aiecs/__main__.py +7 -7
  3. aiecs/aiecs_client.py +269 -75
  4. aiecs/application/executors/operation_executor.py +79 -54
  5. aiecs/application/knowledge_graph/__init__.py +7 -0
  6. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  7. aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
  8. aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
  11. aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
  12. aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
  13. aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
  14. aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
  15. aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
  16. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  17. aiecs/application/knowledge_graph/extractors/base.py +98 -0
  18. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
  19. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
  20. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
  21. aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
  22. aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
  23. aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
  24. aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
  25. aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
  26. aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
  27. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
  28. aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
  29. aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
  30. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
  31. aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
  32. aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
  33. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
  34. aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
  35. aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
  36. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  37. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
  38. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
  39. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  40. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
  41. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  42. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  43. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
  44. aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
  45. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
  46. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  47. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  48. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
  49. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
  50. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
  51. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
  52. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
  53. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
  54. aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
  55. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
  56. aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
  57. aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
  58. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
  59. aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
  60. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  61. aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
  62. aiecs/application/knowledge_graph/search/reranker.py +293 -0
  63. aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
  64. aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
  65. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  66. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
  67. aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
  68. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  69. aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
  70. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  71. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
  72. aiecs/common/__init__.py +9 -0
  73. aiecs/common/knowledge_graph/__init__.py +17 -0
  74. aiecs/common/knowledge_graph/runnable.py +471 -0
  75. aiecs/config/__init__.py +20 -5
  76. aiecs/config/config.py +762 -31
  77. aiecs/config/graph_config.py +131 -0
  78. aiecs/config/tool_config.py +399 -0
  79. aiecs/core/__init__.py +29 -13
  80. aiecs/core/interface/__init__.py +2 -2
  81. aiecs/core/interface/execution_interface.py +22 -22
  82. aiecs/core/interface/storage_interface.py +37 -88
  83. aiecs/core/registry/__init__.py +31 -0
  84. aiecs/core/registry/service_registry.py +92 -0
  85. aiecs/domain/__init__.py +270 -1
  86. aiecs/domain/agent/__init__.py +191 -0
  87. aiecs/domain/agent/base_agent.py +3870 -0
  88. aiecs/domain/agent/exceptions.py +99 -0
  89. aiecs/domain/agent/graph_aware_mixin.py +569 -0
  90. aiecs/domain/agent/hybrid_agent.py +1435 -0
  91. aiecs/domain/agent/integration/__init__.py +29 -0
  92. aiecs/domain/agent/integration/context_compressor.py +216 -0
  93. aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
  94. aiecs/domain/agent/integration/protocols.py +281 -0
  95. aiecs/domain/agent/integration/retry_policy.py +218 -0
  96. aiecs/domain/agent/integration/role_config.py +213 -0
  97. aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
  98. aiecs/domain/agent/lifecycle.py +291 -0
  99. aiecs/domain/agent/llm_agent.py +692 -0
  100. aiecs/domain/agent/memory/__init__.py +12 -0
  101. aiecs/domain/agent/memory/conversation.py +1124 -0
  102. aiecs/domain/agent/migration/__init__.py +14 -0
  103. aiecs/domain/agent/migration/conversion.py +163 -0
  104. aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
  105. aiecs/domain/agent/models.py +884 -0
  106. aiecs/domain/agent/observability.py +479 -0
  107. aiecs/domain/agent/persistence.py +449 -0
  108. aiecs/domain/agent/prompts/__init__.py +29 -0
  109. aiecs/domain/agent/prompts/builder.py +159 -0
  110. aiecs/domain/agent/prompts/formatters.py +187 -0
  111. aiecs/domain/agent/prompts/template.py +255 -0
  112. aiecs/domain/agent/registry.py +253 -0
  113. aiecs/domain/agent/tool_agent.py +444 -0
  114. aiecs/domain/agent/tools/__init__.py +15 -0
  115. aiecs/domain/agent/tools/schema_generator.py +364 -0
  116. aiecs/domain/community/__init__.py +155 -0
  117. aiecs/domain/community/agent_adapter.py +469 -0
  118. aiecs/domain/community/analytics.py +432 -0
  119. aiecs/domain/community/collaborative_workflow.py +648 -0
  120. aiecs/domain/community/communication_hub.py +634 -0
  121. aiecs/domain/community/community_builder.py +320 -0
  122. aiecs/domain/community/community_integration.py +796 -0
  123. aiecs/domain/community/community_manager.py +803 -0
  124. aiecs/domain/community/decision_engine.py +849 -0
  125. aiecs/domain/community/exceptions.py +231 -0
  126. aiecs/domain/community/models/__init__.py +33 -0
  127. aiecs/domain/community/models/community_models.py +234 -0
  128. aiecs/domain/community/resource_manager.py +461 -0
  129. aiecs/domain/community/shared_context_manager.py +589 -0
  130. aiecs/domain/context/__init__.py +40 -10
  131. aiecs/domain/context/context_engine.py +1910 -0
  132. aiecs/domain/context/conversation_models.py +87 -53
  133. aiecs/domain/context/graph_memory.py +582 -0
  134. aiecs/domain/execution/model.py +12 -4
  135. aiecs/domain/knowledge_graph/__init__.py +19 -0
  136. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  137. aiecs/domain/knowledge_graph/models/entity.py +148 -0
  138. aiecs/domain/knowledge_graph/models/evidence.py +178 -0
  139. aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
  140. aiecs/domain/knowledge_graph/models/path.py +171 -0
  141. aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
  142. aiecs/domain/knowledge_graph/models/query.py +261 -0
  143. aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
  144. aiecs/domain/knowledge_graph/models/relation.py +202 -0
  145. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  146. aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
  147. aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
  148. aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
  149. aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
  150. aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
  151. aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
  152. aiecs/domain/task/dsl_processor.py +172 -56
  153. aiecs/domain/task/model.py +20 -8
  154. aiecs/domain/task/task_context.py +27 -24
  155. aiecs/infrastructure/__init__.py +0 -2
  156. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  157. aiecs/infrastructure/graph_storage/base.py +837 -0
  158. aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
  159. aiecs/infrastructure/graph_storage/cache.py +424 -0
  160. aiecs/infrastructure/graph_storage/distributed.py +223 -0
  161. aiecs/infrastructure/graph_storage/error_handling.py +380 -0
  162. aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
  163. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  164. aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
  165. aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
  166. aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
  167. aiecs/infrastructure/graph_storage/metrics.py +344 -0
  168. aiecs/infrastructure/graph_storage/migration.py +400 -0
  169. aiecs/infrastructure/graph_storage/pagination.py +483 -0
  170. aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
  171. aiecs/infrastructure/graph_storage/postgres.py +1563 -0
  172. aiecs/infrastructure/graph_storage/property_storage.py +353 -0
  173. aiecs/infrastructure/graph_storage/protocols.py +76 -0
  174. aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
  175. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  176. aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
  177. aiecs/infrastructure/graph_storage/streaming.py +487 -0
  178. aiecs/infrastructure/graph_storage/tenant.py +412 -0
  179. aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
  180. aiecs/infrastructure/messaging/websocket_manager.py +51 -35
  181. aiecs/infrastructure/monitoring/__init__.py +22 -0
  182. aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
  183. aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
  184. aiecs/infrastructure/monitoring/structured_logger.py +3 -7
  185. aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
  186. aiecs/infrastructure/persistence/__init__.py +14 -1
  187. aiecs/infrastructure/persistence/context_engine_client.py +184 -0
  188. aiecs/infrastructure/persistence/database_manager.py +67 -43
  189. aiecs/infrastructure/persistence/file_storage.py +180 -103
  190. aiecs/infrastructure/persistence/redis_client.py +74 -21
  191. aiecs/llm/__init__.py +73 -25
  192. aiecs/llm/callbacks/__init__.py +11 -0
  193. aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
  194. aiecs/llm/client_factory.py +224 -36
  195. aiecs/llm/client_resolver.py +155 -0
  196. aiecs/llm/clients/__init__.py +38 -0
  197. aiecs/llm/clients/base_client.py +324 -0
  198. aiecs/llm/clients/google_function_calling_mixin.py +457 -0
  199. aiecs/llm/clients/googleai_client.py +241 -0
  200. aiecs/llm/clients/openai_client.py +158 -0
  201. aiecs/llm/clients/openai_compatible_mixin.py +367 -0
  202. aiecs/llm/clients/vertex_client.py +897 -0
  203. aiecs/llm/clients/xai_client.py +201 -0
  204. aiecs/llm/config/__init__.py +51 -0
  205. aiecs/llm/config/config_loader.py +272 -0
  206. aiecs/llm/config/config_validator.py +206 -0
  207. aiecs/llm/config/model_config.py +143 -0
  208. aiecs/llm/protocols.py +149 -0
  209. aiecs/llm/utils/__init__.py +10 -0
  210. aiecs/llm/utils/validate_config.py +89 -0
  211. aiecs/main.py +140 -121
  212. aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
  213. aiecs/scripts/aid/__init__.py +19 -0
  214. aiecs/scripts/aid/module_checker.py +499 -0
  215. aiecs/scripts/aid/version_manager.py +235 -0
  216. aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
  217. aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
  218. aiecs/scripts/dependance_check/__init__.py +15 -0
  219. aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
  220. aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
  221. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
  222. aiecs/scripts/dependance_patch/__init__.py +7 -0
  223. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  224. aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
  225. aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
  226. aiecs/scripts/knowledge_graph/__init__.py +3 -0
  227. aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
  228. aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
  229. aiecs/scripts/tools_develop/README.md +671 -0
  230. aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
  231. aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
  232. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  233. aiecs/scripts/tools_develop/__init__.py +21 -0
  234. aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
  235. aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
  236. aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
  237. aiecs/scripts/tools_develop/schema_coverage.py +511 -0
  238. aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
  239. aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
  240. aiecs/scripts/tools_develop/verify_tools.py +352 -0
  241. aiecs/tasks/__init__.py +0 -1
  242. aiecs/tasks/worker.py +115 -47
  243. aiecs/tools/__init__.py +194 -72
  244. aiecs/tools/apisource/__init__.py +99 -0
  245. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  246. aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
  247. aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
  248. aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
  249. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  250. aiecs/tools/apisource/monitoring/metrics.py +330 -0
  251. aiecs/tools/apisource/providers/__init__.py +112 -0
  252. aiecs/tools/apisource/providers/base.py +671 -0
  253. aiecs/tools/apisource/providers/census.py +397 -0
  254. aiecs/tools/apisource/providers/fred.py +535 -0
  255. aiecs/tools/apisource/providers/newsapi.py +409 -0
  256. aiecs/tools/apisource/providers/worldbank.py +352 -0
  257. aiecs/tools/apisource/reliability/__init__.py +12 -0
  258. aiecs/tools/apisource/reliability/error_handler.py +363 -0
  259. aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
  260. aiecs/tools/apisource/tool.py +832 -0
  261. aiecs/tools/apisource/utils/__init__.py +9 -0
  262. aiecs/tools/apisource/utils/validators.py +334 -0
  263. aiecs/tools/base_tool.py +415 -21
  264. aiecs/tools/docs/__init__.py +121 -0
  265. aiecs/tools/docs/ai_document_orchestrator.py +607 -0
  266. aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
  267. aiecs/tools/docs/content_insertion_tool.py +1320 -0
  268. aiecs/tools/docs/document_creator_tool.py +1323 -0
  269. aiecs/tools/docs/document_layout_tool.py +1160 -0
  270. aiecs/tools/docs/document_parser_tool.py +1011 -0
  271. aiecs/tools/docs/document_writer_tool.py +1829 -0
  272. aiecs/tools/knowledge_graph/__init__.py +17 -0
  273. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
  274. aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
  275. aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
  276. aiecs/tools/langchain_adapter.py +300 -138
  277. aiecs/tools/schema_generator.py +455 -0
  278. aiecs/tools/search_tool/__init__.py +100 -0
  279. aiecs/tools/search_tool/analyzers.py +581 -0
  280. aiecs/tools/search_tool/cache.py +264 -0
  281. aiecs/tools/search_tool/constants.py +128 -0
  282. aiecs/tools/search_tool/context.py +224 -0
  283. aiecs/tools/search_tool/core.py +778 -0
  284. aiecs/tools/search_tool/deduplicator.py +119 -0
  285. aiecs/tools/search_tool/error_handler.py +242 -0
  286. aiecs/tools/search_tool/metrics.py +343 -0
  287. aiecs/tools/search_tool/rate_limiter.py +172 -0
  288. aiecs/tools/search_tool/schemas.py +275 -0
  289. aiecs/tools/statistics/__init__.py +80 -0
  290. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
  291. aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
  292. aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
  293. aiecs/tools/statistics/data_loader_tool.py +555 -0
  294. aiecs/tools/statistics/data_profiler_tool.py +638 -0
  295. aiecs/tools/statistics/data_transformer_tool.py +580 -0
  296. aiecs/tools/statistics/data_visualizer_tool.py +498 -0
  297. aiecs/tools/statistics/model_trainer_tool.py +507 -0
  298. aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
  299. aiecs/tools/task_tools/__init__.py +49 -36
  300. aiecs/tools/task_tools/chart_tool.py +200 -184
  301. aiecs/tools/task_tools/classfire_tool.py +268 -267
  302. aiecs/tools/task_tools/image_tool.py +175 -131
  303. aiecs/tools/task_tools/office_tool.py +226 -146
  304. aiecs/tools/task_tools/pandas_tool.py +477 -121
  305. aiecs/tools/task_tools/report_tool.py +390 -142
  306. aiecs/tools/task_tools/research_tool.py +149 -79
  307. aiecs/tools/task_tools/scraper_tool.py +339 -145
  308. aiecs/tools/task_tools/stats_tool.py +448 -209
  309. aiecs/tools/temp_file_manager.py +26 -24
  310. aiecs/tools/tool_executor/__init__.py +18 -16
  311. aiecs/tools/tool_executor/tool_executor.py +364 -52
  312. aiecs/utils/LLM_output_structor.py +74 -48
  313. aiecs/utils/__init__.py +14 -3
  314. aiecs/utils/base_callback.py +0 -3
  315. aiecs/utils/cache_provider.py +696 -0
  316. aiecs/utils/execution_utils.py +50 -31
  317. aiecs/utils/prompt_loader.py +1 -0
  318. aiecs/utils/token_usage_repository.py +37 -11
  319. aiecs/ws/socket_server.py +14 -4
  320. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
  321. aiecs-1.7.6.dist-info/RECORD +337 -0
  322. aiecs-1.7.6.dist-info/entry_points.txt +13 -0
  323. aiecs/config/registry.py +0 -19
  324. aiecs/domain/context/content_engine.py +0 -982
  325. aiecs/llm/base_client.py +0 -99
  326. aiecs/llm/openai_client.py +0 -125
  327. aiecs/llm/vertex_client.py +0 -186
  328. aiecs/llm/xai_client.py +0 -184
  329. aiecs/scripts/dependency_checker.py +0 -857
  330. aiecs/scripts/quick_dependency_check.py +0 -269
  331. aiecs/tools/task_tools/search_api.py +0 -7
  332. aiecs-1.0.1.dist-info/RECORD +0 -90
  333. aiecs-1.0.1.dist-info/entry_points.txt +0 -7
  334. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  335. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  336. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  337. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  338. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
  339. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
  340. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,464 @@
1
+ """
2
+ Semantic Name Matcher for Knowledge Graph Entity Fusion.
3
+
4
+ Provides embedding-based semantic matching for entity names using LLM embeddings.
5
+ Supports configurable similarity thresholds and caching to minimize API calls.
6
+ """
7
+
8
+ import asyncio
9
+ import logging
10
+ import math
11
+ from dataclasses import dataclass, field
12
+ from typing import Dict, List, Optional, Tuple, Any
13
+ from collections import OrderedDict
14
+ import threading
15
+
16
+ from aiecs.llm import LLMClientFactory, AIProvider
17
+ from aiecs.llm.protocols import LLMClientProtocol
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ @dataclass
23
+ class SemanticMatchResult:
24
+ """Result of semantic name matching."""
25
+ name1: str
26
+ name2: str
27
+ similarity: float
28
+ is_match: bool
29
+ embedding1: Optional[List[float]] = None
30
+ embedding2: Optional[List[float]] = None
31
+
32
+
33
+ @dataclass
34
+ class EmbeddingCacheConfig:
35
+ """Configuration for embedding cache."""
36
+ max_size: int = 10000
37
+ ttl_seconds: Optional[int] = None # None = no TTL
38
+
39
+
40
+ @dataclass
41
+ class SemanticMatcherConfig:
42
+ """Configuration for SemanticNameMatcher."""
43
+ # Similarity threshold for match
44
+ similarity_threshold: float = 0.85
45
+ # LLM provider for embeddings
46
+ embedding_provider: str = "OpenAI"
47
+ # Embedding model name (optional, uses provider default)
48
+ embedding_model: Optional[str] = None
49
+ # Cache configuration
50
+ cache_max_size: int = 10000
51
+ # Batch size for embedding API calls
52
+ batch_size: int = 100
53
+ # Enable/disable semantic matching
54
+ enabled: bool = True
55
+
56
+
57
+ class LRUEmbeddingCache:
58
+ """
59
+ Thread-safe LRU cache for name embeddings.
60
+
61
+ Provides O(1) lookup and insertion with configurable max size.
62
+ Uses OrderedDict for LRU ordering.
63
+ """
64
+
65
+ def __init__(self, max_size: int = 10000):
66
+ self._cache: OrderedDict[str, List[float]] = OrderedDict()
67
+ self._max_size = max_size
68
+ self._lock = threading.Lock()
69
+ self._hits = 0
70
+ self._misses = 0
71
+
72
+ def get(self, key: str) -> Optional[List[float]]:
73
+ """Get embedding from cache. Returns None if not found."""
74
+ normalized_key = key.lower().strip()
75
+ with self._lock:
76
+ if normalized_key in self._cache:
77
+ # Move to end (most recently used)
78
+ self._cache.move_to_end(normalized_key)
79
+ self._hits += 1
80
+ return self._cache[normalized_key]
81
+ self._misses += 1
82
+ return None
83
+
84
+ def set(self, key: str, embedding: List[float]) -> None:
85
+ """Set embedding in cache. Evicts LRU entry if full."""
86
+ normalized_key = key.lower().strip()
87
+ with self._lock:
88
+ if normalized_key in self._cache:
89
+ # Update the embedding value and move to end (most recently used)
90
+ self._cache[normalized_key] = embedding
91
+ self._cache.move_to_end(normalized_key)
92
+ else:
93
+ if len(self._cache) >= self._max_size:
94
+ # Evict least recently used
95
+ self._cache.popitem(last=False)
96
+ self._cache[normalized_key] = embedding
97
+
98
+ def invalidate(self, key: str) -> bool:
99
+ """Remove entry from cache. Returns True if entry was removed."""
100
+ normalized_key = key.lower().strip()
101
+ with self._lock:
102
+ if normalized_key in self._cache:
103
+ del self._cache[normalized_key]
104
+ return True
105
+ return False
106
+
107
+ def invalidate_many(self, keys: List[str]) -> int:
108
+ """Remove multiple entries from cache. Returns count of removed."""
109
+ removed = 0
110
+ with self._lock:
111
+ for key in keys:
112
+ normalized_key = key.lower().strip()
113
+ if normalized_key in self._cache:
114
+ del self._cache[normalized_key]
115
+ removed += 1
116
+ return removed
117
+
118
+ def clear(self) -> None:
119
+ """Clear all entries from cache."""
120
+ with self._lock:
121
+ self._cache.clear()
122
+ self._hits = 0
123
+ self._misses = 0
124
+
125
+ def size(self) -> int:
126
+ """Return number of entries in cache."""
127
+ with self._lock:
128
+ return len(self._cache)
129
+
130
+ def get_stats(self) -> Dict[str, Any]:
131
+ """Get cache statistics."""
132
+ with self._lock:
133
+ total = self._hits + self._misses
134
+ hit_rate = self._hits / total if total > 0 else 0.0
135
+ return {
136
+ "size": len(self._cache),
137
+ "max_size": self._max_size,
138
+ "hits": self._hits,
139
+ "misses": self._misses,
140
+ "hit_rate": hit_rate,
141
+ }
142
+
143
+ def contains(self, key: str) -> bool:
144
+ """Check if key exists in cache."""
145
+ normalized_key = key.lower().strip()
146
+ with self._lock:
147
+ return normalized_key in self._cache
148
+
149
+ def get_all_keys(self) -> List[str]:
150
+ """Get all keys in cache (for debugging/testing)."""
151
+ with self._lock:
152
+ return list(self._cache.keys())
153
+
154
+
155
+ class SemanticNameMatcher:
156
+ """
157
+ Semantic name matcher using LLM embeddings.
158
+
159
+ Provides embedding-based entity name matching with:
160
+ - Configurable similarity threshold
161
+ - LRU embedding cache to minimize API calls
162
+ - Batch embedding generation
163
+ - Cosine similarity calculation
164
+
165
+ Example:
166
+ ```python
167
+ config = SemanticMatcherConfig(similarity_threshold=0.85)
168
+ matcher = SemanticNameMatcher(config)
169
+
170
+ # Check if two names match semantically
171
+ result = await matcher.match("Albert Einstein", "A. Einstein")
172
+ if result.is_match:
173
+ print(f"Match! Similarity: {result.similarity}")
174
+
175
+ # Get embedding for a name (cached)
176
+ embedding = await matcher.get_embedding("Albert Einstein")
177
+ ```
178
+ """
179
+
180
+ def __init__(
181
+ self,
182
+ config: Optional[SemanticMatcherConfig] = None,
183
+ llm_client: Optional[LLMClientProtocol] = None,
184
+ ):
185
+ """
186
+ Initialize semantic name matcher.
187
+
188
+ Args:
189
+ config: Configuration for matching behavior
190
+ llm_client: Optional LLM client for embeddings (uses factory if not provided)
191
+ """
192
+ self._config = config or SemanticMatcherConfig()
193
+ self._cache = LRUEmbeddingCache(max_size=self._config.cache_max_size)
194
+ self._llm_client = llm_client
195
+ self._lock = asyncio.Lock()
196
+
197
+ async def _get_llm_client(self) -> LLMClientProtocol:
198
+ """Get or create LLM client for embeddings."""
199
+ async with self._lock:
200
+ if self._llm_client is None:
201
+ try:
202
+ provider = AIProvider(self._config.embedding_provider)
203
+ except ValueError:
204
+ # Try as custom provider
205
+ provider = self._config.embedding_provider
206
+ self._llm_client = LLMClientFactory.get_client(provider)
207
+ return self._llm_client
208
+
209
+ async def get_embedding(self, name: str) -> List[float]:
210
+ """
211
+ Get embedding for a name.
212
+
213
+ Uses cache to minimize API calls. Generates new embedding if not cached.
214
+
215
+ Args:
216
+ name: Name to embed
217
+
218
+ Returns:
219
+ Embedding vector
220
+ """
221
+ if not self._config.enabled:
222
+ return []
223
+
224
+ # Check cache first
225
+ cached = self._cache.get(name)
226
+ if cached is not None:
227
+ return cached
228
+
229
+ # Generate embedding
230
+ client = await self._get_llm_client()
231
+ try:
232
+ embeddings = await client.get_embeddings(
233
+ [name],
234
+ model=self._config.embedding_model,
235
+ )
236
+ if embeddings and embeddings[0]:
237
+ embedding = embeddings[0]
238
+ self._cache.set(name, embedding)
239
+ return embedding
240
+ except Exception as e:
241
+ logger.warning(f"Failed to generate embedding for '{name}': {e}")
242
+
243
+ return []
244
+
245
+ async def get_embeddings_batch(
246
+ self, names: List[str]
247
+ ) -> Dict[str, List[float]]:
248
+ """
249
+ Get embeddings for multiple names in batch.
250
+
251
+ Uses cache for already-embedded names and batches API calls for new ones.
252
+
253
+ Args:
254
+ names: List of names to embed
255
+
256
+ Returns:
257
+ Dict mapping name to embedding
258
+ """
259
+ if not self._config.enabled:
260
+ return {name: [] for name in names}
261
+
262
+ results: Dict[str, List[float]] = {}
263
+ names_to_embed: List[str] = []
264
+
265
+ # Check cache for each name
266
+ for name in names:
267
+ cached = self._cache.get(name)
268
+ if cached is not None:
269
+ results[name] = cached
270
+ else:
271
+ names_to_embed.append(name)
272
+
273
+ # Batch embed uncached names
274
+ if names_to_embed:
275
+ client = await self._get_llm_client()
276
+ try:
277
+ # Process in batches
278
+ for i in range(0, len(names_to_embed), self._config.batch_size):
279
+ batch = names_to_embed[i:i + self._config.batch_size]
280
+ embeddings = await client.get_embeddings(
281
+ batch,
282
+ model=self._config.embedding_model,
283
+ )
284
+
285
+ for name, embedding in zip(batch, embeddings):
286
+ if embedding:
287
+ self._cache.set(name, embedding)
288
+ results[name] = embedding
289
+ else:
290
+ results[name] = []
291
+ except Exception as e:
292
+ logger.warning(f"Failed to generate batch embeddings: {e}")
293
+ for name in names_to_embed:
294
+ results[name] = []
295
+
296
+ return results
297
+
298
+ def cosine_similarity(
299
+ self, embedding1: List[float], embedding2: List[float]
300
+ ) -> float:
301
+ """
302
+ Calculate cosine similarity between two embeddings.
303
+
304
+ Args:
305
+ embedding1: First embedding vector
306
+ embedding2: Second embedding vector
307
+
308
+ Returns:
309
+ Cosine similarity score between -1 and 1
310
+ """
311
+ if not embedding1 or not embedding2:
312
+ return 0.0
313
+
314
+ if len(embedding1) != len(embedding2):
315
+ logger.warning(
316
+ f"Embedding dimension mismatch: {len(embedding1)} vs {len(embedding2)}"
317
+ )
318
+ return 0.0
319
+
320
+ # Calculate dot product and magnitudes
321
+ dot_product = sum(a * b for a, b in zip(embedding1, embedding2))
322
+ magnitude1 = math.sqrt(sum(a * a for a in embedding1))
323
+ magnitude2 = math.sqrt(sum(b * b for b in embedding2))
324
+
325
+ if magnitude1 == 0 or magnitude2 == 0:
326
+ return 0.0
327
+
328
+ return dot_product / (magnitude1 * magnitude2)
329
+
330
+ async def match(
331
+ self,
332
+ name1: str,
333
+ name2: str,
334
+ threshold: Optional[float] = None,
335
+ ) -> SemanticMatchResult:
336
+ """
337
+ Check if two names match semantically.
338
+
339
+ Args:
340
+ name1: First name
341
+ name2: Second name
342
+ threshold: Override similarity threshold (uses config default if None)
343
+
344
+ Returns:
345
+ SemanticMatchResult with similarity score and match status
346
+ """
347
+ effective_threshold = threshold or self._config.similarity_threshold
348
+
349
+ if not self._config.enabled:
350
+ return SemanticMatchResult(
351
+ name1=name1,
352
+ name2=name2,
353
+ similarity=0.0,
354
+ is_match=False,
355
+ )
356
+
357
+ # Get embeddings
358
+ embedding1 = await self.get_embedding(name1)
359
+ embedding2 = await self.get_embedding(name2)
360
+
361
+ # Calculate similarity
362
+ similarity = self.cosine_similarity(embedding1, embedding2)
363
+ is_match = similarity >= effective_threshold
364
+
365
+ return SemanticMatchResult(
366
+ name1=name1,
367
+ name2=name2,
368
+ similarity=similarity,
369
+ is_match=is_match,
370
+ embedding1=embedding1,
371
+ embedding2=embedding2,
372
+ )
373
+
374
+ async def find_best_match(
375
+ self,
376
+ name: str,
377
+ candidates: List[str],
378
+ threshold: Optional[float] = None,
379
+ ) -> Optional[Tuple[str, float]]:
380
+ """
381
+ Find the best semantic match for a name among candidates.
382
+
383
+ Args:
384
+ name: Name to match
385
+ candidates: List of candidate names
386
+ threshold: Minimum similarity threshold
387
+
388
+ Returns:
389
+ Tuple of (best_match_name, similarity) or None if no match above threshold
390
+ """
391
+ if not candidates or not self._config.enabled:
392
+ return None
393
+
394
+ effective_threshold = threshold or self._config.similarity_threshold
395
+
396
+ # Get embedding for target name
397
+ target_embedding = await self.get_embedding(name)
398
+ if not target_embedding:
399
+ return None
400
+
401
+ # Get embeddings for all candidates in batch
402
+ candidate_embeddings = await self.get_embeddings_batch(candidates)
403
+
404
+ # Find best match
405
+ best_match = None
406
+ best_similarity = effective_threshold
407
+
408
+ for candidate in candidates:
409
+ candidate_embedding = candidate_embeddings.get(candidate, [])
410
+ if candidate_embedding:
411
+ similarity = self.cosine_similarity(target_embedding, candidate_embedding)
412
+ if similarity > best_similarity:
413
+ best_similarity = similarity
414
+ best_match = candidate
415
+
416
+ if best_match:
417
+ return (best_match, best_similarity)
418
+ return None
419
+
420
+ def invalidate_cache(self, name: str) -> bool:
421
+ """
422
+ Invalidate cache entry for a name.
423
+
424
+ Args:
425
+ name: Name to invalidate
426
+
427
+ Returns:
428
+ True if entry was removed
429
+ """
430
+ return self._cache.invalidate(name)
431
+
432
+ def invalidate_cache_many(self, names: List[str]) -> int:
433
+ """
434
+ Invalidate cache entries for multiple names.
435
+
436
+ Args:
437
+ names: Names to invalidate
438
+
439
+ Returns:
440
+ Number of entries removed
441
+ """
442
+ return self._cache.invalidate_many(names)
443
+
444
+ def clear_cache(self) -> None:
445
+ """Clear all cache entries."""
446
+ self._cache.clear()
447
+
448
+ def get_cache_stats(self) -> Dict[str, Any]:
449
+ """Get cache statistics."""
450
+ return self._cache.get_stats()
451
+
452
+ @property
453
+ def cache(self) -> LRUEmbeddingCache:
454
+ """Access to the embedding cache."""
455
+ return self._cache
456
+
457
+ @property
458
+ def config(self) -> SemanticMatcherConfig:
459
+ """Get current configuration."""
460
+ return self._config
461
+
462
+
463
+
464
+