aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show
  1. aiecs/__init__.py +13 -16
  2. aiecs/__main__.py +7 -7
  3. aiecs/aiecs_client.py +269 -75
  4. aiecs/application/executors/operation_executor.py +79 -54
  5. aiecs/application/knowledge_graph/__init__.py +7 -0
  6. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  7. aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
  8. aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
  11. aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
  12. aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
  13. aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
  14. aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
  15. aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
  16. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  17. aiecs/application/knowledge_graph/extractors/base.py +98 -0
  18. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
  19. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
  20. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
  21. aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
  22. aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
  23. aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
  24. aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
  25. aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
  26. aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
  27. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
  28. aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
  29. aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
  30. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
  31. aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
  32. aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
  33. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
  34. aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
  35. aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
  36. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  37. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
  38. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
  39. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  40. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
  41. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  42. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  43. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
  44. aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
  45. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
  46. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  47. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  48. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
  49. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
  50. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
  51. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
  52. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
  53. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
  54. aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
  55. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
  56. aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
  57. aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
  58. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
  59. aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
  60. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  61. aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
  62. aiecs/application/knowledge_graph/search/reranker.py +293 -0
  63. aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
  64. aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
  65. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  66. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
  67. aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
  68. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  69. aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
  70. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  71. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
  72. aiecs/common/__init__.py +9 -0
  73. aiecs/common/knowledge_graph/__init__.py +17 -0
  74. aiecs/common/knowledge_graph/runnable.py +471 -0
  75. aiecs/config/__init__.py +20 -5
  76. aiecs/config/config.py +762 -31
  77. aiecs/config/graph_config.py +131 -0
  78. aiecs/config/tool_config.py +399 -0
  79. aiecs/core/__init__.py +29 -13
  80. aiecs/core/interface/__init__.py +2 -2
  81. aiecs/core/interface/execution_interface.py +22 -22
  82. aiecs/core/interface/storage_interface.py +37 -88
  83. aiecs/core/registry/__init__.py +31 -0
  84. aiecs/core/registry/service_registry.py +92 -0
  85. aiecs/domain/__init__.py +270 -1
  86. aiecs/domain/agent/__init__.py +191 -0
  87. aiecs/domain/agent/base_agent.py +3870 -0
  88. aiecs/domain/agent/exceptions.py +99 -0
  89. aiecs/domain/agent/graph_aware_mixin.py +569 -0
  90. aiecs/domain/agent/hybrid_agent.py +1435 -0
  91. aiecs/domain/agent/integration/__init__.py +29 -0
  92. aiecs/domain/agent/integration/context_compressor.py +216 -0
  93. aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
  94. aiecs/domain/agent/integration/protocols.py +281 -0
  95. aiecs/domain/agent/integration/retry_policy.py +218 -0
  96. aiecs/domain/agent/integration/role_config.py +213 -0
  97. aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
  98. aiecs/domain/agent/lifecycle.py +291 -0
  99. aiecs/domain/agent/llm_agent.py +692 -0
  100. aiecs/domain/agent/memory/__init__.py +12 -0
  101. aiecs/domain/agent/memory/conversation.py +1124 -0
  102. aiecs/domain/agent/migration/__init__.py +14 -0
  103. aiecs/domain/agent/migration/conversion.py +163 -0
  104. aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
  105. aiecs/domain/agent/models.py +884 -0
  106. aiecs/domain/agent/observability.py +479 -0
  107. aiecs/domain/agent/persistence.py +449 -0
  108. aiecs/domain/agent/prompts/__init__.py +29 -0
  109. aiecs/domain/agent/prompts/builder.py +159 -0
  110. aiecs/domain/agent/prompts/formatters.py +187 -0
  111. aiecs/domain/agent/prompts/template.py +255 -0
  112. aiecs/domain/agent/registry.py +253 -0
  113. aiecs/domain/agent/tool_agent.py +444 -0
  114. aiecs/domain/agent/tools/__init__.py +15 -0
  115. aiecs/domain/agent/tools/schema_generator.py +364 -0
  116. aiecs/domain/community/__init__.py +155 -0
  117. aiecs/domain/community/agent_adapter.py +469 -0
  118. aiecs/domain/community/analytics.py +432 -0
  119. aiecs/domain/community/collaborative_workflow.py +648 -0
  120. aiecs/domain/community/communication_hub.py +634 -0
  121. aiecs/domain/community/community_builder.py +320 -0
  122. aiecs/domain/community/community_integration.py +796 -0
  123. aiecs/domain/community/community_manager.py +803 -0
  124. aiecs/domain/community/decision_engine.py +849 -0
  125. aiecs/domain/community/exceptions.py +231 -0
  126. aiecs/domain/community/models/__init__.py +33 -0
  127. aiecs/domain/community/models/community_models.py +234 -0
  128. aiecs/domain/community/resource_manager.py +461 -0
  129. aiecs/domain/community/shared_context_manager.py +589 -0
  130. aiecs/domain/context/__init__.py +40 -10
  131. aiecs/domain/context/context_engine.py +1910 -0
  132. aiecs/domain/context/conversation_models.py +87 -53
  133. aiecs/domain/context/graph_memory.py +582 -0
  134. aiecs/domain/execution/model.py +12 -4
  135. aiecs/domain/knowledge_graph/__init__.py +19 -0
  136. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  137. aiecs/domain/knowledge_graph/models/entity.py +148 -0
  138. aiecs/domain/knowledge_graph/models/evidence.py +178 -0
  139. aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
  140. aiecs/domain/knowledge_graph/models/path.py +171 -0
  141. aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
  142. aiecs/domain/knowledge_graph/models/query.py +261 -0
  143. aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
  144. aiecs/domain/knowledge_graph/models/relation.py +202 -0
  145. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  146. aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
  147. aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
  148. aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
  149. aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
  150. aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
  151. aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
  152. aiecs/domain/task/dsl_processor.py +172 -56
  153. aiecs/domain/task/model.py +20 -8
  154. aiecs/domain/task/task_context.py +27 -24
  155. aiecs/infrastructure/__init__.py +0 -2
  156. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  157. aiecs/infrastructure/graph_storage/base.py +837 -0
  158. aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
  159. aiecs/infrastructure/graph_storage/cache.py +424 -0
  160. aiecs/infrastructure/graph_storage/distributed.py +223 -0
  161. aiecs/infrastructure/graph_storage/error_handling.py +380 -0
  162. aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
  163. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  164. aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
  165. aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
  166. aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
  167. aiecs/infrastructure/graph_storage/metrics.py +344 -0
  168. aiecs/infrastructure/graph_storage/migration.py +400 -0
  169. aiecs/infrastructure/graph_storage/pagination.py +483 -0
  170. aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
  171. aiecs/infrastructure/graph_storage/postgres.py +1563 -0
  172. aiecs/infrastructure/graph_storage/property_storage.py +353 -0
  173. aiecs/infrastructure/graph_storage/protocols.py +76 -0
  174. aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
  175. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  176. aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
  177. aiecs/infrastructure/graph_storage/streaming.py +487 -0
  178. aiecs/infrastructure/graph_storage/tenant.py +412 -0
  179. aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
  180. aiecs/infrastructure/messaging/websocket_manager.py +51 -35
  181. aiecs/infrastructure/monitoring/__init__.py +22 -0
  182. aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
  183. aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
  184. aiecs/infrastructure/monitoring/structured_logger.py +3 -7
  185. aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
  186. aiecs/infrastructure/persistence/__init__.py +14 -1
  187. aiecs/infrastructure/persistence/context_engine_client.py +184 -0
  188. aiecs/infrastructure/persistence/database_manager.py +67 -43
  189. aiecs/infrastructure/persistence/file_storage.py +180 -103
  190. aiecs/infrastructure/persistence/redis_client.py +74 -21
  191. aiecs/llm/__init__.py +73 -25
  192. aiecs/llm/callbacks/__init__.py +11 -0
  193. aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
  194. aiecs/llm/client_factory.py +224 -36
  195. aiecs/llm/client_resolver.py +155 -0
  196. aiecs/llm/clients/__init__.py +38 -0
  197. aiecs/llm/clients/base_client.py +324 -0
  198. aiecs/llm/clients/google_function_calling_mixin.py +457 -0
  199. aiecs/llm/clients/googleai_client.py +241 -0
  200. aiecs/llm/clients/openai_client.py +158 -0
  201. aiecs/llm/clients/openai_compatible_mixin.py +367 -0
  202. aiecs/llm/clients/vertex_client.py +897 -0
  203. aiecs/llm/clients/xai_client.py +201 -0
  204. aiecs/llm/config/__init__.py +51 -0
  205. aiecs/llm/config/config_loader.py +272 -0
  206. aiecs/llm/config/config_validator.py +206 -0
  207. aiecs/llm/config/model_config.py +143 -0
  208. aiecs/llm/protocols.py +149 -0
  209. aiecs/llm/utils/__init__.py +10 -0
  210. aiecs/llm/utils/validate_config.py +89 -0
  211. aiecs/main.py +140 -121
  212. aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
  213. aiecs/scripts/aid/__init__.py +19 -0
  214. aiecs/scripts/aid/module_checker.py +499 -0
  215. aiecs/scripts/aid/version_manager.py +235 -0
  216. aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
  217. aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
  218. aiecs/scripts/dependance_check/__init__.py +15 -0
  219. aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
  220. aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
  221. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
  222. aiecs/scripts/dependance_patch/__init__.py +7 -0
  223. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  224. aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
  225. aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
  226. aiecs/scripts/knowledge_graph/__init__.py +3 -0
  227. aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
  228. aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
  229. aiecs/scripts/tools_develop/README.md +671 -0
  230. aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
  231. aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
  232. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  233. aiecs/scripts/tools_develop/__init__.py +21 -0
  234. aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
  235. aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
  236. aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
  237. aiecs/scripts/tools_develop/schema_coverage.py +511 -0
  238. aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
  239. aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
  240. aiecs/scripts/tools_develop/verify_tools.py +352 -0
  241. aiecs/tasks/__init__.py +0 -1
  242. aiecs/tasks/worker.py +115 -47
  243. aiecs/tools/__init__.py +194 -72
  244. aiecs/tools/apisource/__init__.py +99 -0
  245. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  246. aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
  247. aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
  248. aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
  249. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  250. aiecs/tools/apisource/monitoring/metrics.py +330 -0
  251. aiecs/tools/apisource/providers/__init__.py +112 -0
  252. aiecs/tools/apisource/providers/base.py +671 -0
  253. aiecs/tools/apisource/providers/census.py +397 -0
  254. aiecs/tools/apisource/providers/fred.py +535 -0
  255. aiecs/tools/apisource/providers/newsapi.py +409 -0
  256. aiecs/tools/apisource/providers/worldbank.py +352 -0
  257. aiecs/tools/apisource/reliability/__init__.py +12 -0
  258. aiecs/tools/apisource/reliability/error_handler.py +363 -0
  259. aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
  260. aiecs/tools/apisource/tool.py +832 -0
  261. aiecs/tools/apisource/utils/__init__.py +9 -0
  262. aiecs/tools/apisource/utils/validators.py +334 -0
  263. aiecs/tools/base_tool.py +415 -21
  264. aiecs/tools/docs/__init__.py +121 -0
  265. aiecs/tools/docs/ai_document_orchestrator.py +607 -0
  266. aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
  267. aiecs/tools/docs/content_insertion_tool.py +1320 -0
  268. aiecs/tools/docs/document_creator_tool.py +1323 -0
  269. aiecs/tools/docs/document_layout_tool.py +1160 -0
  270. aiecs/tools/docs/document_parser_tool.py +1011 -0
  271. aiecs/tools/docs/document_writer_tool.py +1829 -0
  272. aiecs/tools/knowledge_graph/__init__.py +17 -0
  273. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
  274. aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
  275. aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
  276. aiecs/tools/langchain_adapter.py +300 -138
  277. aiecs/tools/schema_generator.py +455 -0
  278. aiecs/tools/search_tool/__init__.py +100 -0
  279. aiecs/tools/search_tool/analyzers.py +581 -0
  280. aiecs/tools/search_tool/cache.py +264 -0
  281. aiecs/tools/search_tool/constants.py +128 -0
  282. aiecs/tools/search_tool/context.py +224 -0
  283. aiecs/tools/search_tool/core.py +778 -0
  284. aiecs/tools/search_tool/deduplicator.py +119 -0
  285. aiecs/tools/search_tool/error_handler.py +242 -0
  286. aiecs/tools/search_tool/metrics.py +343 -0
  287. aiecs/tools/search_tool/rate_limiter.py +172 -0
  288. aiecs/tools/search_tool/schemas.py +275 -0
  289. aiecs/tools/statistics/__init__.py +80 -0
  290. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
  291. aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
  292. aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
  293. aiecs/tools/statistics/data_loader_tool.py +555 -0
  294. aiecs/tools/statistics/data_profiler_tool.py +638 -0
  295. aiecs/tools/statistics/data_transformer_tool.py +580 -0
  296. aiecs/tools/statistics/data_visualizer_tool.py +498 -0
  297. aiecs/tools/statistics/model_trainer_tool.py +507 -0
  298. aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
  299. aiecs/tools/task_tools/__init__.py +49 -36
  300. aiecs/tools/task_tools/chart_tool.py +200 -184
  301. aiecs/tools/task_tools/classfire_tool.py +268 -267
  302. aiecs/tools/task_tools/image_tool.py +175 -131
  303. aiecs/tools/task_tools/office_tool.py +226 -146
  304. aiecs/tools/task_tools/pandas_tool.py +477 -121
  305. aiecs/tools/task_tools/report_tool.py +390 -142
  306. aiecs/tools/task_tools/research_tool.py +149 -79
  307. aiecs/tools/task_tools/scraper_tool.py +339 -145
  308. aiecs/tools/task_tools/stats_tool.py +448 -209
  309. aiecs/tools/temp_file_manager.py +26 -24
  310. aiecs/tools/tool_executor/__init__.py +18 -16
  311. aiecs/tools/tool_executor/tool_executor.py +364 -52
  312. aiecs/utils/LLM_output_structor.py +74 -48
  313. aiecs/utils/__init__.py +14 -3
  314. aiecs/utils/base_callback.py +0 -3
  315. aiecs/utils/cache_provider.py +696 -0
  316. aiecs/utils/execution_utils.py +50 -31
  317. aiecs/utils/prompt_loader.py +1 -0
  318. aiecs/utils/token_usage_repository.py +37 -11
  319. aiecs/ws/socket_server.py +14 -4
  320. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
  321. aiecs-1.7.6.dist-info/RECORD +337 -0
  322. aiecs-1.7.6.dist-info/entry_points.txt +13 -0
  323. aiecs/config/registry.py +0 -19
  324. aiecs/domain/context/content_engine.py +0 -982
  325. aiecs/llm/base_client.py +0 -99
  326. aiecs/llm/openai_client.py +0 -125
  327. aiecs/llm/vertex_client.py +0 -186
  328. aiecs/llm/xai_client.py +0 -184
  329. aiecs/scripts/dependency_checker.py +0 -857
  330. aiecs/scripts/quick_dependency_check.py +0 -269
  331. aiecs/tools/task_tools/search_api.py +0 -7
  332. aiecs-1.0.1.dist-info/RECORD +0 -90
  333. aiecs-1.0.1.dist-info/entry_points.txt +0 -7
  334. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  335. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  336. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  337. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  338. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
  339. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
  340. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,240 @@
1
+ """
2
+ Evaluation Dataset for Knowledge Fusion Matching.
3
+
4
+ Contains curated test cases with known entity matches and non-matches,
5
+ including edge cases for threshold validation and A/B testing.
6
+ """
7
+
8
+ from dataclasses import dataclass
9
+ from typing import List, Optional, Tuple
10
+
11
+
12
+ @dataclass
13
+ class EntityPair:
14
+ """
15
+ A pair of entity names for evaluation.
16
+
17
+ Attributes:
18
+ name1: First entity name
19
+ name2: Second entity name
20
+ entity_type: Type of entities (e.g., "Person", "Organization")
21
+ should_match: Whether these entities should be considered a match
22
+ match_reason: Reason why they should/shouldn't match (for documentation)
23
+ domain: Domain context (academic, corporate, medical, etc.)
24
+ """
25
+
26
+ name1: str
27
+ name2: str
28
+ entity_type: str = "Person"
29
+ should_match: bool = True
30
+ match_reason: str = ""
31
+ domain: str = "general"
32
+
33
+
34
+ @dataclass
35
+ class EvaluationDataset:
36
+ """
37
+ Collection of entity pairs for evaluation.
38
+
39
+ Attributes:
40
+ pairs: List of entity pairs to evaluate
41
+ name: Name/description of the dataset
42
+ """
43
+
44
+ pairs: List[EntityPair]
45
+ name: str = "default"
46
+
47
+ def __len__(self) -> int:
48
+ """Return number of pairs in dataset."""
49
+ return len(self.pairs)
50
+
51
+ def get_by_domain(self, domain: str) -> "EvaluationDataset":
52
+ """Filter pairs by domain."""
53
+ filtered = [p for p in self.pairs if p.domain == domain]
54
+ return EvaluationDataset(pairs=filtered, name=f"{self.name}_{domain}")
55
+
56
+ def get_by_type(self, entity_type: str) -> "EvaluationDataset":
57
+ """Filter pairs by entity type."""
58
+ filtered = [p for p in self.pairs if p.entity_type == entity_type]
59
+ return EvaluationDataset(pairs=filtered, name=f"{self.name}_{entity_type}")
60
+
61
+ def get_positive_pairs(self) -> List[EntityPair]:
62
+ """Get pairs that should match."""
63
+ return [p for p in self.pairs if p.should_match]
64
+
65
+ def get_negative_pairs(self) -> List[EntityPair]:
66
+ """Get pairs that should not match."""
67
+ return [p for p in self.pairs if not p.should_match]
68
+
69
+
70
+ def create_default_evaluation_dataset() -> EvaluationDataset:
71
+ """
72
+ Create default evaluation dataset with known matches and edge cases.
73
+
74
+ Includes:
75
+ - Name variations (initials, titles, suffixes)
76
+ - Abbreviations/acronyms
77
+ - Normalization cases
78
+ - Semantic matches
79
+ - False positives (similar but different entities)
80
+ """
81
+ pairs: List[EntityPair] = []
82
+
83
+ # ============================================================================
84
+ # Person Entity Matches (Academic Domain)
85
+ # ============================================================================
86
+ academic_person_pairs = [
87
+ # Exact matches
88
+ EntityPair("Albert Einstein", "Albert Einstein", "Person", True, "Exact match", "academic"),
89
+ EntityPair("Dr. John Smith", "John Smith", "Person", True, "Title prefix", "academic"),
90
+ EntityPair("John Smith, PhD", "John Smith", "Person", True, "Suffix", "academic"),
91
+ EntityPair("Prof. Jane Doe", "Jane Doe", "Person", True, "Professor title", "academic"),
92
+
93
+ # Initial variations
94
+ EntityPair("A. Einstein", "Albert Einstein", "Person", True, "Initial expansion", "academic"),
95
+ EntityPair("J. Smith", "John Smith", "Person", True, "Initial expansion", "academic"),
96
+ EntityPair("J. K. Rowling", "Joanne Rowling", "Person", True, "Initial expansion", "academic"),
97
+ EntityPair("M. L. King", "Martin Luther King", "Person", True, "Initial expansion", "academic"),
98
+
99
+ # Name order variations
100
+ EntityPair("Smith, John", "John Smith", "Person", True, "Name order", "academic"),
101
+ EntityPair("Einstein, Albert", "Albert Einstein", "Person", True, "Name order", "academic"),
102
+
103
+ # Title combinations
104
+ EntityPair("Dr. A. Einstein", "Albert Einstein", "Person", True, "Title + initial", "academic"),
105
+ EntityPair("Prof. J. Smith, PhD", "John Smith", "Person", True, "Title + initial + suffix", "academic"),
106
+
107
+ # False positives (should NOT match)
108
+ EntityPair("John Smith", "Jane Smith", "Person", False, "Different first names", "academic"),
109
+ EntityPair("A. Einstein", "A. Newton", "Person", False, "Different surnames", "academic"),
110
+ EntityPair("John Smith", "John Smyth", "Person", False, "Similar but different surname", "academic"),
111
+ ]
112
+ pairs.extend(academic_person_pairs)
113
+
114
+ # ============================================================================
115
+ # Organization Entity Matches (Corporate Domain)
116
+ # ============================================================================
117
+ corporate_org_pairs = [
118
+ # Abbreviation matches
119
+ EntityPair("MIT", "Massachusetts Institute of Technology", "Organization", True, "Abbreviation expansion", "corporate"),
120
+ EntityPair("IBM", "International Business Machines", "Organization", True, "Abbreviation expansion", "corporate"),
121
+ EntityPair("NASA", "National Aeronautics and Space Administration", "Organization", True, "Abbreviation expansion", "corporate"),
122
+ EntityPair("NYC", "New York City", "Organization", True, "Abbreviation expansion", "corporate"),
123
+ EntityPair("USA", "United States of America", "Organization", True, "Abbreviation expansion", "corporate"),
124
+
125
+ # Name variations
126
+ EntityPair("Apple Inc.", "Apple", "Organization", True, "Incorporation suffix", "corporate"),
127
+ EntityPair("Apple Incorporated", "Apple Inc.", "Organization", True, "Full vs abbreviated suffix", "corporate"),
128
+ EntityPair("Microsoft Corporation", "Microsoft", "Organization", True, "Corporation suffix", "corporate"),
129
+ EntityPair("Microsoft Corp.", "Microsoft Corporation", "Organization", True, "Corp abbreviation", "corporate"),
130
+
131
+ # Common name variations
132
+ EntityPair("The New York Times", "New York Times", "Organization", True, "Article prefix", "corporate"),
133
+ EntityPair("AT&T", "AT and T", "Organization", True, "Symbol expansion", "corporate"),
134
+
135
+ # False positives
136
+ EntityPair("Apple Inc.", "Apple Computer", "Organization", False, "Different company names", "corporate"),
137
+ EntityPair("Microsoft", "Microsystems", "Organization", False, "Similar but different", "corporate"),
138
+ EntityPair("IBM", "HP", "Organization", False, "Different abbreviations", "corporate"),
139
+ ]
140
+ pairs.extend(corporate_org_pairs)
141
+
142
+ # ============================================================================
143
+ # Medical Domain Entity Matches
144
+ # ============================================================================
145
+ medical_pairs = [
146
+ # Medical abbreviations
147
+ EntityPair("COVID-19", "Coronavirus Disease 2019", "Concept", True, "Medical abbreviation", "medical"),
148
+ EntityPair("HIV", "Human Immunodeficiency Virus", "Concept", True, "Medical abbreviation", "medical"),
149
+ EntityPair("AIDS", "Acquired Immunodeficiency Syndrome", "Concept", True, "Medical abbreviation", "medical"),
150
+ EntityPair("DNA", "Deoxyribonucleic Acid", "Concept", True, "Scientific abbreviation", "medical"),
151
+ EntityPair("RNA", "Ribonucleic Acid", "Concept", True, "Scientific abbreviation", "medical"),
152
+
153
+ # Medical professional titles
154
+ EntityPair("Dr. Sarah Johnson", "Sarah Johnson, MD", "Person", True, "MD suffix", "medical"),
155
+ EntityPair("Dr. Michael Chen", "Michael Chen, M.D.", "Person", True, "M.D. suffix", "medical"),
156
+ EntityPair("Dr. Emily Brown", "Emily Brown, Doctor", "Person", True, "Doctor title", "medical"),
157
+
158
+ # Medical institution variations
159
+ EntityPair("Mayo Clinic", "Mayo Medical Center", "Organization", True, "Clinic vs center", "medical"),
160
+ EntityPair("Johns Hopkins Hospital", "Johns Hopkins", "Organization", True, "Hospital suffix", "medical"),
161
+
162
+ # False positives
163
+ EntityPair("COVID-19", "COVID-20", "Concept", False, "Different disease variant", "medical"),
164
+ EntityPair("HIV", "HPV", "Concept", False, "Different viruses", "medical"),
165
+ ]
166
+ pairs.extend(medical_pairs)
167
+
168
+ # ============================================================================
169
+ # Edge Cases - Challenging Matches
170
+ # ============================================================================
171
+ edge_case_pairs = [
172
+ # Very similar but different
173
+ EntityPair("John Smith", "Jon Smith", "Person", False, "Different spelling", "general"),
174
+ EntityPair("Steven", "Stephen", "Person", False, "Different spelling", "general"),
175
+ EntityPair("Catherine", "Katherine", "Person", False, "Different spelling", "general"),
176
+
177
+ # Substring cases
178
+ EntityPair("New York", "New York City", "Organization", True, "Substring match", "general"),
179
+ EntityPair("University", "State University", "Organization", False, "Too generic", "general"),
180
+
181
+ # Special characters
182
+ EntityPair("O'Brien", "OBrien", "Person", True, "Apostrophe normalization", "general"),
183
+ EntityPair("José", "Jose", "Person", True, "Accent normalization", "general"),
184
+ EntityPair("Müller", "Mueller", "Person", True, "Umlaut normalization", "general"),
185
+
186
+ # Multiple word variations
187
+ EntityPair("New York University", "NYU", "Organization", True, "Multi-word abbreviation", "general"),
188
+ EntityPair("United States", "US", "Organization", True, "Country abbreviation", "general"),
189
+ EntityPair("United Kingdom", "UK", "Organization", True, "Country abbreviation", "general"),
190
+
191
+ # Case variations
192
+ EntityPair("APPLE INC.", "apple inc.", "Organization", True, "Case normalization", "general"),
193
+ EntityPair("JOHN SMITH", "john smith", "Person", True, "Case normalization", "general"),
194
+
195
+ # Whitespace variations
196
+ EntityPair("John Smith", "John Smith", "Person", True, "Whitespace normalization", "general"),
197
+ EntityPair("New York", "New York", "Organization", True, "Whitespace normalization", "general"),
198
+ ]
199
+ pairs.extend(edge_case_pairs)
200
+
201
+ # ============================================================================
202
+ # Semantic Similarity Cases (should match via embeddings)
203
+ # ============================================================================
204
+ semantic_pairs = [
205
+ # Synonyms and related terms
206
+ EntityPair("Doctor", "Physician", "Person", True, "Semantic synonym", "medical"),
207
+ EntityPair("Hospital", "Medical Center", "Organization", True, "Semantic similarity", "medical"),
208
+ EntityPair("University", "College", "Organization", True, "Semantic similarity", "academic"),
209
+
210
+ # Transliterations (if supported)
211
+ EntityPair("München", "Munich", "Organization", True, "Transliteration", "general"),
212
+ EntityPair("Moskva", "Moscow", "Organization", True, "Transliteration", "general"),
213
+
214
+ # False semantic matches
215
+ EntityPair("Apple", "Orange", "Organization", False, "Different fruits", "general"),
216
+ EntityPair("Microsoft", "Apple", "Organization", False, "Different companies", "corporate"),
217
+ ]
218
+ pairs.extend(semantic_pairs)
219
+
220
+ return EvaluationDataset(pairs=pairs, name="default_evaluation")
221
+
222
+
223
+ def create_minimal_evaluation_dataset() -> EvaluationDataset:
224
+ """
225
+ Create a minimal dataset for quick testing.
226
+
227
+ Returns a small subset of the default dataset.
228
+ """
229
+ pairs = [
230
+ # Positive matches
231
+ EntityPair("Albert Einstein", "A. Einstein", "Person", True, "Initial expansion", "academic"),
232
+ EntityPair("MIT", "Massachusetts Institute of Technology", "Organization", True, "Abbreviation", "corporate"),
233
+ EntityPair("Dr. John Smith", "John Smith", "Person", True, "Title prefix", "academic"),
234
+
235
+ # Negative matches
236
+ EntityPair("John Smith", "Jane Smith", "Person", False, "Different names", "academic"),
237
+ EntityPair("Apple Inc.", "Microsoft", "Organization", False, "Different companies", "corporate"),
238
+ ]
239
+
240
+ return EvaluationDataset(pairs=pairs, name="minimal_evaluation")