aiecs 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. aiecs/__init__.py +72 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +469 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +363 -0
  7. aiecs/application/knowledge_graph/__init__.py +7 -0
  8. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
  11. aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
  12. aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
  13. aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
  14. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  15. aiecs/application/knowledge_graph/extractors/base.py +100 -0
  16. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
  17. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
  18. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
  19. aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
  20. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
  21. aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
  22. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
  23. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
  24. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  25. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
  26. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
  27. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  28. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
  29. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  30. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  31. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
  32. aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
  33. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
  34. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  35. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  36. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
  37. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
  38. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
  39. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
  40. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
  41. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
  42. aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
  43. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
  44. aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
  45. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
  46. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  47. aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
  48. aiecs/application/knowledge_graph/search/reranker.py +295 -0
  49. aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
  50. aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
  51. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  52. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
  53. aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
  54. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  55. aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
  56. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  57. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
  58. aiecs/common/__init__.py +9 -0
  59. aiecs/common/knowledge_graph/__init__.py +17 -0
  60. aiecs/common/knowledge_graph/runnable.py +484 -0
  61. aiecs/config/__init__.py +16 -0
  62. aiecs/config/config.py +498 -0
  63. aiecs/config/graph_config.py +137 -0
  64. aiecs/config/registry.py +23 -0
  65. aiecs/core/__init__.py +46 -0
  66. aiecs/core/interface/__init__.py +34 -0
  67. aiecs/core/interface/execution_interface.py +152 -0
  68. aiecs/core/interface/storage_interface.py +171 -0
  69. aiecs/domain/__init__.py +289 -0
  70. aiecs/domain/agent/__init__.py +189 -0
  71. aiecs/domain/agent/base_agent.py +697 -0
  72. aiecs/domain/agent/exceptions.py +103 -0
  73. aiecs/domain/agent/graph_aware_mixin.py +559 -0
  74. aiecs/domain/agent/hybrid_agent.py +490 -0
  75. aiecs/domain/agent/integration/__init__.py +26 -0
  76. aiecs/domain/agent/integration/context_compressor.py +222 -0
  77. aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
  78. aiecs/domain/agent/integration/retry_policy.py +219 -0
  79. aiecs/domain/agent/integration/role_config.py +213 -0
  80. aiecs/domain/agent/knowledge_aware_agent.py +646 -0
  81. aiecs/domain/agent/lifecycle.py +296 -0
  82. aiecs/domain/agent/llm_agent.py +300 -0
  83. aiecs/domain/agent/memory/__init__.py +12 -0
  84. aiecs/domain/agent/memory/conversation.py +197 -0
  85. aiecs/domain/agent/migration/__init__.py +14 -0
  86. aiecs/domain/agent/migration/conversion.py +160 -0
  87. aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
  88. aiecs/domain/agent/models.py +317 -0
  89. aiecs/domain/agent/observability.py +407 -0
  90. aiecs/domain/agent/persistence.py +289 -0
  91. aiecs/domain/agent/prompts/__init__.py +29 -0
  92. aiecs/domain/agent/prompts/builder.py +161 -0
  93. aiecs/domain/agent/prompts/formatters.py +189 -0
  94. aiecs/domain/agent/prompts/template.py +255 -0
  95. aiecs/domain/agent/registry.py +260 -0
  96. aiecs/domain/agent/tool_agent.py +257 -0
  97. aiecs/domain/agent/tools/__init__.py +12 -0
  98. aiecs/domain/agent/tools/schema_generator.py +221 -0
  99. aiecs/domain/community/__init__.py +155 -0
  100. aiecs/domain/community/agent_adapter.py +477 -0
  101. aiecs/domain/community/analytics.py +481 -0
  102. aiecs/domain/community/collaborative_workflow.py +642 -0
  103. aiecs/domain/community/communication_hub.py +645 -0
  104. aiecs/domain/community/community_builder.py +320 -0
  105. aiecs/domain/community/community_integration.py +800 -0
  106. aiecs/domain/community/community_manager.py +813 -0
  107. aiecs/domain/community/decision_engine.py +879 -0
  108. aiecs/domain/community/exceptions.py +225 -0
  109. aiecs/domain/community/models/__init__.py +33 -0
  110. aiecs/domain/community/models/community_models.py +268 -0
  111. aiecs/domain/community/resource_manager.py +457 -0
  112. aiecs/domain/community/shared_context_manager.py +603 -0
  113. aiecs/domain/context/__init__.py +58 -0
  114. aiecs/domain/context/context_engine.py +989 -0
  115. aiecs/domain/context/conversation_models.py +354 -0
  116. aiecs/domain/context/graph_memory.py +467 -0
  117. aiecs/domain/execution/__init__.py +12 -0
  118. aiecs/domain/execution/model.py +57 -0
  119. aiecs/domain/knowledge_graph/__init__.py +19 -0
  120. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  121. aiecs/domain/knowledge_graph/models/entity.py +130 -0
  122. aiecs/domain/knowledge_graph/models/evidence.py +194 -0
  123. aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
  124. aiecs/domain/knowledge_graph/models/path.py +179 -0
  125. aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
  126. aiecs/domain/knowledge_graph/models/query.py +272 -0
  127. aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
  128. aiecs/domain/knowledge_graph/models/relation.py +136 -0
  129. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  130. aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
  131. aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
  132. aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
  133. aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
  134. aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
  135. aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
  136. aiecs/domain/task/__init__.py +13 -0
  137. aiecs/domain/task/dsl_processor.py +613 -0
  138. aiecs/domain/task/model.py +62 -0
  139. aiecs/domain/task/task_context.py +268 -0
  140. aiecs/infrastructure/__init__.py +24 -0
  141. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  142. aiecs/infrastructure/graph_storage/base.py +601 -0
  143. aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
  144. aiecs/infrastructure/graph_storage/cache.py +429 -0
  145. aiecs/infrastructure/graph_storage/distributed.py +226 -0
  146. aiecs/infrastructure/graph_storage/error_handling.py +390 -0
  147. aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
  148. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  149. aiecs/infrastructure/graph_storage/in_memory.py +514 -0
  150. aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
  151. aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
  152. aiecs/infrastructure/graph_storage/metrics.py +357 -0
  153. aiecs/infrastructure/graph_storage/migration.py +413 -0
  154. aiecs/infrastructure/graph_storage/pagination.py +471 -0
  155. aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
  156. aiecs/infrastructure/graph_storage/postgres.py +871 -0
  157. aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
  158. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  159. aiecs/infrastructure/graph_storage/sqlite.py +623 -0
  160. aiecs/infrastructure/graph_storage/streaming.py +495 -0
  161. aiecs/infrastructure/messaging/__init__.py +13 -0
  162. aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
  163. aiecs/infrastructure/messaging/websocket_manager.py +298 -0
  164. aiecs/infrastructure/monitoring/__init__.py +34 -0
  165. aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
  166. aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
  167. aiecs/infrastructure/monitoring/structured_logger.py +48 -0
  168. aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
  169. aiecs/infrastructure/persistence/__init__.py +24 -0
  170. aiecs/infrastructure/persistence/context_engine_client.py +187 -0
  171. aiecs/infrastructure/persistence/database_manager.py +333 -0
  172. aiecs/infrastructure/persistence/file_storage.py +754 -0
  173. aiecs/infrastructure/persistence/redis_client.py +220 -0
  174. aiecs/llm/__init__.py +86 -0
  175. aiecs/llm/callbacks/__init__.py +11 -0
  176. aiecs/llm/callbacks/custom_callbacks.py +264 -0
  177. aiecs/llm/client_factory.py +420 -0
  178. aiecs/llm/clients/__init__.py +33 -0
  179. aiecs/llm/clients/base_client.py +193 -0
  180. aiecs/llm/clients/googleai_client.py +181 -0
  181. aiecs/llm/clients/openai_client.py +131 -0
  182. aiecs/llm/clients/vertex_client.py +437 -0
  183. aiecs/llm/clients/xai_client.py +184 -0
  184. aiecs/llm/config/__init__.py +51 -0
  185. aiecs/llm/config/config_loader.py +275 -0
  186. aiecs/llm/config/config_validator.py +236 -0
  187. aiecs/llm/config/model_config.py +151 -0
  188. aiecs/llm/utils/__init__.py +10 -0
  189. aiecs/llm/utils/validate_config.py +91 -0
  190. aiecs/main.py +363 -0
  191. aiecs/scripts/__init__.py +3 -0
  192. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  193. aiecs/scripts/aid/__init__.py +19 -0
  194. aiecs/scripts/aid/version_manager.py +215 -0
  195. aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
  196. aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
  197. aiecs/scripts/dependance_check/__init__.py +17 -0
  198. aiecs/scripts/dependance_check/dependency_checker.py +938 -0
  199. aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
  200. aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
  201. aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
  202. aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
  203. aiecs/scripts/dependance_patch/__init__.py +7 -0
  204. aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
  205. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  206. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
  207. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
  208. aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
  209. aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
  210. aiecs/scripts/tools_develop/README.md +449 -0
  211. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  212. aiecs/scripts/tools_develop/__init__.py +21 -0
  213. aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
  214. aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
  215. aiecs/scripts/tools_develop/verify_tools.py +356 -0
  216. aiecs/tasks/__init__.py +1 -0
  217. aiecs/tasks/worker.py +172 -0
  218. aiecs/tools/__init__.py +299 -0
  219. aiecs/tools/apisource/__init__.py +99 -0
  220. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  221. aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
  222. aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
  223. aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
  224. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  225. aiecs/tools/apisource/monitoring/metrics.py +303 -0
  226. aiecs/tools/apisource/providers/__init__.py +115 -0
  227. aiecs/tools/apisource/providers/base.py +664 -0
  228. aiecs/tools/apisource/providers/census.py +401 -0
  229. aiecs/tools/apisource/providers/fred.py +564 -0
  230. aiecs/tools/apisource/providers/newsapi.py +412 -0
  231. aiecs/tools/apisource/providers/worldbank.py +357 -0
  232. aiecs/tools/apisource/reliability/__init__.py +12 -0
  233. aiecs/tools/apisource/reliability/error_handler.py +375 -0
  234. aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
  235. aiecs/tools/apisource/tool.py +850 -0
  236. aiecs/tools/apisource/utils/__init__.py +9 -0
  237. aiecs/tools/apisource/utils/validators.py +338 -0
  238. aiecs/tools/base_tool.py +201 -0
  239. aiecs/tools/docs/__init__.py +121 -0
  240. aiecs/tools/docs/ai_document_orchestrator.py +599 -0
  241. aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
  242. aiecs/tools/docs/content_insertion_tool.py +1333 -0
  243. aiecs/tools/docs/document_creator_tool.py +1317 -0
  244. aiecs/tools/docs/document_layout_tool.py +1166 -0
  245. aiecs/tools/docs/document_parser_tool.py +994 -0
  246. aiecs/tools/docs/document_writer_tool.py +1818 -0
  247. aiecs/tools/knowledge_graph/__init__.py +17 -0
  248. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
  249. aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
  250. aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
  251. aiecs/tools/langchain_adapter.py +542 -0
  252. aiecs/tools/schema_generator.py +275 -0
  253. aiecs/tools/search_tool/__init__.py +100 -0
  254. aiecs/tools/search_tool/analyzers.py +589 -0
  255. aiecs/tools/search_tool/cache.py +260 -0
  256. aiecs/tools/search_tool/constants.py +128 -0
  257. aiecs/tools/search_tool/context.py +216 -0
  258. aiecs/tools/search_tool/core.py +749 -0
  259. aiecs/tools/search_tool/deduplicator.py +123 -0
  260. aiecs/tools/search_tool/error_handler.py +271 -0
  261. aiecs/tools/search_tool/metrics.py +371 -0
  262. aiecs/tools/search_tool/rate_limiter.py +178 -0
  263. aiecs/tools/search_tool/schemas.py +277 -0
  264. aiecs/tools/statistics/__init__.py +80 -0
  265. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
  266. aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
  267. aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
  268. aiecs/tools/statistics/data_loader_tool.py +564 -0
  269. aiecs/tools/statistics/data_profiler_tool.py +658 -0
  270. aiecs/tools/statistics/data_transformer_tool.py +573 -0
  271. aiecs/tools/statistics/data_visualizer_tool.py +495 -0
  272. aiecs/tools/statistics/model_trainer_tool.py +487 -0
  273. aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
  274. aiecs/tools/task_tools/__init__.py +86 -0
  275. aiecs/tools/task_tools/chart_tool.py +732 -0
  276. aiecs/tools/task_tools/classfire_tool.py +922 -0
  277. aiecs/tools/task_tools/image_tool.py +447 -0
  278. aiecs/tools/task_tools/office_tool.py +684 -0
  279. aiecs/tools/task_tools/pandas_tool.py +635 -0
  280. aiecs/tools/task_tools/report_tool.py +635 -0
  281. aiecs/tools/task_tools/research_tool.py +392 -0
  282. aiecs/tools/task_tools/scraper_tool.py +715 -0
  283. aiecs/tools/task_tools/stats_tool.py +688 -0
  284. aiecs/tools/temp_file_manager.py +130 -0
  285. aiecs/tools/tool_executor/__init__.py +37 -0
  286. aiecs/tools/tool_executor/tool_executor.py +881 -0
  287. aiecs/utils/LLM_output_structor.py +445 -0
  288. aiecs/utils/__init__.py +34 -0
  289. aiecs/utils/base_callback.py +47 -0
  290. aiecs/utils/cache_provider.py +695 -0
  291. aiecs/utils/execution_utils.py +184 -0
  292. aiecs/utils/logging.py +1 -0
  293. aiecs/utils/prompt_loader.py +14 -0
  294. aiecs/utils/token_usage_repository.py +323 -0
  295. aiecs/ws/__init__.py +0 -0
  296. aiecs/ws/socket_server.py +52 -0
  297. aiecs-1.5.1.dist-info/METADATA +608 -0
  298. aiecs-1.5.1.dist-info/RECORD +302 -0
  299. aiecs-1.5.1.dist-info/WHEEL +5 -0
  300. aiecs-1.5.1.dist-info/entry_points.txt +10 -0
  301. aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
  302. aiecs-1.5.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,189 @@
1
+ """
2
+ Relation Deduplicator
3
+
4
+ Identifies and removes duplicate relations.
5
+ """
6
+
7
+ from typing import List, Tuple, Dict
8
+ from aiecs.domain.knowledge_graph.models.relation import Relation
9
+
10
+
11
+ class RelationDeduplicator:
12
+ """
13
+ Deduplicate relations based on equivalence
14
+
15
+ Two relations are considered duplicates if they have:
16
+ - Same source entity
17
+ - Same target entity
18
+ - Same relation type
19
+ - (Optionally) Similar properties
20
+
21
+ This handles cases like:
22
+ - Extracting "Alice WORKS_FOR Tech Corp" multiple times from different sentences
23
+ - Multiple mentions of the same relationship with slight variations
24
+
25
+ Example:
26
+ ```python
27
+ deduplicator = RelationDeduplicator()
28
+
29
+ relations = [
30
+ Relation(source="e1", target="e2", type="WORKS_FOR"),
31
+ Relation(source="e1", target="e2", type="WORKS_FOR", properties={"since": "2020"}),
32
+ Relation(source="e1", target="e3", type="KNOWS")
33
+ ]
34
+
35
+ deduplicated = await deduplicator.deduplicate(relations)
36
+ # Returns: [
37
+ # Relation(source="e1", target="e2", type="WORKS_FOR", properties={"since": "2020"}),
38
+ # Relation(source="e1", target="e3", type="KNOWS")
39
+ # ]
40
+ ```
41
+ """
42
+
43
+ def __init__(self, merge_properties: bool = True):
44
+ """
45
+ Initialize relation deduplicator
46
+
47
+ Args:
48
+ merge_properties: If True, merge properties from duplicate relations
49
+ """
50
+ self.merge_properties = merge_properties
51
+
52
+ async def deduplicate(self, relations: List[Relation]) -> List[Relation]:
53
+ """
54
+ Deduplicate a list of relations
55
+
56
+ Args:
57
+ relations: List of relations to deduplicate
58
+
59
+ Returns:
60
+ List of unique relations (with merged properties if enabled)
61
+ """
62
+ if not relations:
63
+ return []
64
+
65
+ # Group relations by (source, target, type) tuple
66
+ relation_groups: Dict[Tuple[str, str, str], List[Relation]] = {}
67
+
68
+ for relation in relations:
69
+ key = (
70
+ relation.source_id,
71
+ relation.target_id,
72
+ relation.relation_type,
73
+ )
74
+
75
+ if key not in relation_groups:
76
+ relation_groups[key] = []
77
+ relation_groups[key].append(relation)
78
+
79
+ # For each group, merge duplicates
80
+ deduplicated = []
81
+ for key, group in relation_groups.items():
82
+ if len(group) == 1:
83
+ deduplicated.append(group[0])
84
+ else:
85
+ merged = self._merge_relations(group)
86
+ deduplicated.append(merged)
87
+
88
+ return deduplicated
89
+
90
+ def _merge_relations(self, relations: List[Relation]) -> Relation:
91
+ """
92
+ Merge a group of duplicate relations into one
93
+
94
+ Strategy:
95
+ - Use first relation as base
96
+ - Merge properties (prefer non-empty values)
97
+ - Keep highest weight
98
+ - Keep highest confidence
99
+
100
+ Args:
101
+ relations: List of duplicate relations
102
+
103
+ Returns:
104
+ Merged relation
105
+ """
106
+ if len(relations) == 1:
107
+ return relations[0]
108
+
109
+ # Use first relation as base
110
+ base = relations[0]
111
+
112
+ # Merge properties
113
+ merged_properties = dict(base.properties) if base.properties else {}
114
+
115
+ if self.merge_properties:
116
+ for relation in relations[1:]:
117
+ if relation.properties:
118
+ for key, value in relation.properties.items():
119
+ # Add property if not exists or current value is empty
120
+ if key not in merged_properties or not merged_properties[key]:
121
+ merged_properties[key] = value
122
+
123
+ # Take highest weight
124
+ max_weight = max(r.weight for r in relations)
125
+
126
+ # Take highest confidence (if present in properties)
127
+ confidences = [
128
+ r.properties.get("_extraction_confidence", 0.5) for r in relations if r.properties
129
+ ]
130
+ if confidences:
131
+ merged_properties["_extraction_confidence"] = max(confidences)
132
+
133
+ # Track merge count
134
+ merged_properties["_merged_count"] = len(relations)
135
+
136
+ # Create merged relation
137
+ merged = Relation(
138
+ id=base.id,
139
+ relation_type=base.relation_type,
140
+ source_id=base.source_id,
141
+ target_id=base.target_id,
142
+ properties=merged_properties,
143
+ weight=max_weight,
144
+ source=base.source,
145
+ )
146
+
147
+ return merged
148
+
149
+ def find_duplicates(self, relations: List[Relation]) -> List[Tuple[Relation, Relation]]:
150
+ """
151
+ Find pairs of duplicate relations without merging
152
+
153
+ Useful for debugging or manual review.
154
+
155
+ Args:
156
+ relations: List of relations to check
157
+
158
+ Returns:
159
+ List of (relation1, relation2) tuples that are duplicates
160
+ """
161
+ duplicates = []
162
+ n = len(relations)
163
+
164
+ for i in range(n):
165
+ for j in range(i + 1, n):
166
+ r1 = relations[i]
167
+ r2 = relations[j]
168
+
169
+ if self._are_duplicates(r1, r2):
170
+ duplicates.append((r1, r2))
171
+
172
+ return duplicates
173
+
174
+ def _are_duplicates(self, r1: Relation, r2: Relation) -> bool:
175
+ """
176
+ Check if two relations are duplicates
177
+
178
+ Args:
179
+ r1: First relation
180
+ r2: Second relation
181
+
182
+ Returns:
183
+ True if relations are duplicates
184
+ """
185
+ return (
186
+ r1.source_id == r2.source_id
187
+ and r1.target_id == r2.target_id
188
+ and r1.relation_type == r2.relation_type
189
+ )
@@ -0,0 +1,21 @@
1
+ """
2
+ Pattern Matching Module
3
+
4
+ Provides graph pattern matching capabilities for custom query execution.
5
+
6
+ Phase: 3.3 - Full Custom Query Execution
7
+ """
8
+
9
+ from aiecs.application.knowledge_graph.pattern_matching.pattern_matcher import (
10
+ PatternMatcher,
11
+ PatternMatch,
12
+ )
13
+ from aiecs.application.knowledge_graph.pattern_matching.query_executor import (
14
+ CustomQueryExecutor,
15
+ )
16
+
17
+ __all__ = [
18
+ "PatternMatcher",
19
+ "PatternMatch",
20
+ "CustomQueryExecutor",
21
+ ]
@@ -0,0 +1,344 @@
1
+ """
2
+ Pattern Matching Engine
3
+
4
+ Implements graph pattern matching for custom query execution.
5
+
6
+ Phase: 3.3 - Full Custom Query Execution
7
+ Version: 1.0
8
+ """
9
+
10
+ from typing import List, Dict, Any, Optional
11
+ from aiecs.domain.knowledge_graph.models.entity import Entity
12
+ from aiecs.domain.knowledge_graph.models.relation import Relation
13
+ from aiecs.domain.knowledge_graph.models.path import Path
14
+ from aiecs.domain.knowledge_graph.models.path_pattern import PathPattern
15
+ from aiecs.infrastructure.graph_storage.base import GraphStore
16
+
17
+
18
+ class PatternMatch:
19
+ """
20
+ Represents a single pattern match result
21
+
22
+ Attributes:
23
+ entities: Matched entities
24
+ relations: Matched relations
25
+ bindings: Variable bindings (if pattern uses variables)
26
+ score: Match score (0.0-1.0)
27
+ """
28
+
29
+ def __init__(
30
+ self,
31
+ entities: List[Entity],
32
+ relations: List[Relation],
33
+ bindings: Optional[Dict[str, Any]] = None,
34
+ score: float = 1.0,
35
+ ):
36
+ self.entities = entities
37
+ self.relations = relations
38
+ self.bindings = bindings or {}
39
+ self.score = score
40
+
41
+ def __repr__(self) -> str:
42
+ return f"PatternMatch(entities={len(self.entities)}, relations={len(self.relations)}, score={self.score})"
43
+
44
+
45
+ class PatternMatcher:
46
+ """
47
+ Graph Pattern Matching Engine
48
+
49
+ Executes pattern matching queries against a graph store.
50
+ Supports:
51
+ - Single pattern matching
52
+ - Multiple pattern matching (AND semantics)
53
+ - Optional pattern matching
54
+ - Cycle detection and handling
55
+ - Result projection and aggregation
56
+ """
57
+
58
+ def __init__(self, graph_store: GraphStore):
59
+ """
60
+ Initialize pattern matcher
61
+
62
+ Args:
63
+ graph_store: Graph storage backend
64
+ """
65
+ self.graph_store = graph_store
66
+
67
+ async def match_pattern(
68
+ self,
69
+ pattern: PathPattern,
70
+ start_entity_id: Optional[str] = None,
71
+ max_matches: int = 100,
72
+ ) -> List[PatternMatch]:
73
+ """
74
+ Match a single pattern in the graph
75
+
76
+ Args:
77
+ pattern: Pattern to match
78
+ start_entity_id: Optional starting entity ID
79
+ max_matches: Maximum number of matches to return
80
+
81
+ Returns:
82
+ List of pattern matches
83
+ """
84
+ matches = []
85
+
86
+ if start_entity_id:
87
+ # Start from specific entity
88
+ start_entity = await self.graph_store.get_entity(start_entity_id)
89
+ if not start_entity:
90
+ return []
91
+
92
+ # Find paths matching the pattern
93
+ paths = await self._find_matching_paths(start_entity, pattern, max_matches)
94
+
95
+ for path in paths:
96
+ match = PatternMatch(entities=path.nodes, relations=path.edges, score=1.0)
97
+ matches.append(match)
98
+ else:
99
+ # Find all entities matching the pattern
100
+ # This is more expensive - iterate through all entities
101
+ all_entities = await self._get_all_entities(pattern.entity_types)
102
+
103
+ for entity in all_entities[:max_matches]:
104
+ paths = await self._find_matching_paths(entity, pattern, max_matches=1)
105
+
106
+ if paths:
107
+ match = PatternMatch(
108
+ entities=paths[0].nodes,
109
+ relations=paths[0].edges,
110
+ score=1.0,
111
+ )
112
+ matches.append(match)
113
+
114
+ if len(matches) >= max_matches:
115
+ break
116
+
117
+ return matches
118
+
119
+ async def match_multiple_patterns(
120
+ self,
121
+ patterns: List[PathPattern],
122
+ start_entity_id: Optional[str] = None,
123
+ max_matches: int = 100,
124
+ ) -> List[PatternMatch]:
125
+ """
126
+ Match multiple patterns (AND semantics)
127
+
128
+ All patterns must match for a result to be included.
129
+
130
+ Args:
131
+ patterns: List of patterns to match
132
+ start_entity_id: Optional starting entity ID
133
+ max_matches: Maximum number of matches to return
134
+
135
+ Returns:
136
+ List of pattern matches where all patterns matched
137
+ """
138
+ if not patterns:
139
+ return []
140
+
141
+ # Match first pattern
142
+ first_matches = await self.match_pattern(patterns[0], start_entity_id, max_matches)
143
+
144
+ if len(patterns) == 1:
145
+ return first_matches
146
+
147
+ # Filter matches that also match remaining patterns
148
+ combined_matches = []
149
+
150
+ for match in first_matches:
151
+ # Check if remaining patterns match
152
+ all_match = True
153
+ combined_entities = list(match.entities)
154
+ combined_relations = list(match.relations)
155
+
156
+ for pattern in patterns[1:]:
157
+ # Try to match pattern starting from any entity in current
158
+ # match
159
+ pattern_matched = False
160
+
161
+ for entity in match.entities:
162
+ sub_matches = await self.match_pattern(pattern, entity.id, max_matches=1)
163
+
164
+ if sub_matches:
165
+ # Add new entities and relations
166
+ for sub_match in sub_matches:
167
+ combined_entities.extend(sub_match.entities)
168
+ combined_relations.extend(sub_match.relations)
169
+ pattern_matched = True
170
+ break
171
+
172
+ if not pattern_matched:
173
+ all_match = False
174
+ break
175
+
176
+ if all_match:
177
+ combined_match = PatternMatch(
178
+ entities=combined_entities,
179
+ relations=combined_relations,
180
+ score=match.score,
181
+ )
182
+ combined_matches.append(combined_match)
183
+
184
+ return combined_matches[:max_matches]
185
+
186
+ async def match_optional_patterns(
187
+ self,
188
+ required_patterns: List[PathPattern],
189
+ optional_patterns: List[PathPattern],
190
+ start_entity_id: Optional[str] = None,
191
+ max_matches: int = 100,
192
+ ) -> List[PatternMatch]:
193
+ """
194
+ Match required patterns with optional patterns
195
+
196
+ Required patterns must match. Optional patterns are included if they match.
197
+
198
+ Args:
199
+ required_patterns: Patterns that must match
200
+ optional_patterns: Patterns that may or may not match
201
+ start_entity_id: Optional starting entity ID
202
+ max_matches: Maximum number of matches to return
203
+
204
+ Returns:
205
+ List of pattern matches
206
+ """
207
+ # Match required patterns first
208
+ required_matches = await self.match_multiple_patterns(
209
+ required_patterns, start_entity_id, max_matches
210
+ )
211
+
212
+ if not optional_patterns:
213
+ return required_matches
214
+
215
+ # Try to extend with optional patterns
216
+ extended_matches = []
217
+
218
+ for match in required_matches:
219
+ combined_entities = list(match.entities)
220
+ combined_relations = list(match.relations)
221
+
222
+ # Try to match each optional pattern
223
+ for pattern in optional_patterns:
224
+ for entity in match.entities:
225
+ sub_matches = await self.match_pattern(pattern, entity.id, max_matches=1)
226
+
227
+ if sub_matches:
228
+ # Add optional entities and relations
229
+ for sub_match in sub_matches:
230
+ combined_entities.extend(sub_match.entities)
231
+ combined_relations.extend(sub_match.relations)
232
+ break
233
+
234
+ extended_match = PatternMatch(
235
+ entities=combined_entities,
236
+ relations=combined_relations,
237
+ score=match.score,
238
+ )
239
+ extended_matches.append(extended_match)
240
+
241
+ return extended_matches
242
+
243
+ async def _find_matching_paths(
244
+ self,
245
+ start_entity: Entity,
246
+ pattern: PathPattern,
247
+ max_matches: int = 100,
248
+ ) -> List[Path]:
249
+ """
250
+ Find paths matching a pattern starting from an entity
251
+
252
+ Args:
253
+ start_entity: Starting entity
254
+ pattern: Pattern to match
255
+ max_matches: Maximum number of paths to return
256
+
257
+ Returns:
258
+ List of matching paths
259
+ """
260
+ # Use graph store's traverse method with pattern constraints
261
+ paths = await self.graph_store.traverse(
262
+ start_entity.id,
263
+ relation_type=(pattern.relation_types[0] if pattern.relation_types else None),
264
+ max_depth=pattern.max_depth,
265
+ max_results=max_matches,
266
+ )
267
+
268
+ # Filter paths based on pattern constraints
269
+ matching_paths = []
270
+
271
+ for path in paths:
272
+ if self._path_matches_pattern(path, pattern):
273
+ matching_paths.append(path)
274
+
275
+ return matching_paths
276
+
277
+ def _path_matches_pattern(self, path: Path, pattern: PathPattern) -> bool:
278
+ """
279
+ Check if a path matches a pattern
280
+
281
+ Args:
282
+ path: Path to check
283
+ pattern: Pattern to match against
284
+
285
+ Returns:
286
+ True if path matches pattern
287
+ """
288
+ # Check path length
289
+ if len(path.edges) < pattern.min_path_length:
290
+ return False
291
+
292
+ if len(path.edges) > pattern.max_depth:
293
+ return False
294
+
295
+ # Check entity types
296
+ if pattern.entity_types:
297
+ for entity in path.nodes:
298
+ if entity.entity_type not in pattern.entity_types:
299
+ return False
300
+
301
+ # Check relation types
302
+ if pattern.relation_types:
303
+ for relation in path.edges:
304
+ if relation.relation_type not in pattern.relation_types:
305
+ return False
306
+
307
+ # Check required relation sequence
308
+ if pattern.required_relation_sequence:
309
+ if len(path.edges) != len(pattern.required_relation_sequence):
310
+ return False
311
+
312
+ for i, relation in enumerate(path.edges):
313
+ if relation.relation_type != pattern.required_relation_sequence[i]:
314
+ return False
315
+
316
+ # Check cycles
317
+ if not pattern.allow_cycles:
318
+ entity_ids = [entity.id for entity in path.nodes]
319
+ if len(entity_ids) != len(set(entity_ids)):
320
+ return False
321
+
322
+ # Check excluded entities
323
+ if pattern.excluded_entity_ids:
324
+ for entity in path.nodes:
325
+ if entity.id in pattern.excluded_entity_ids:
326
+ return False
327
+
328
+ return True
329
+
330
+ async def _get_all_entities(self, entity_types: Optional[List[str]] = None) -> List[Entity]:
331
+ """
332
+ Get all entities, optionally filtered by type
333
+
334
+ Args:
335
+ entity_types: Optional list of entity types to filter by
336
+
337
+ Returns:
338
+ List of entities
339
+ """
340
+ # This is a placeholder - actual implementation depends on graph store
341
+ # For now, we'll return empty list and rely on start_entity_id
342
+ # In a real implementation, this would query the graph store for all
343
+ # entities
344
+ return []