aiecs 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. aiecs/__init__.py +72 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +469 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +363 -0
  7. aiecs/application/knowledge_graph/__init__.py +7 -0
  8. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
  11. aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
  12. aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
  13. aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
  14. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  15. aiecs/application/knowledge_graph/extractors/base.py +100 -0
  16. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
  17. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
  18. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
  19. aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
  20. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
  21. aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
  22. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
  23. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
  24. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  25. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
  26. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
  27. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  28. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
  29. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  30. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  31. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
  32. aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
  33. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
  34. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  35. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  36. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
  37. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
  38. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
  39. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
  40. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
  41. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
  42. aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
  43. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
  44. aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
  45. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
  46. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  47. aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
  48. aiecs/application/knowledge_graph/search/reranker.py +295 -0
  49. aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
  50. aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
  51. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  52. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
  53. aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
  54. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  55. aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
  56. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  57. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
  58. aiecs/common/__init__.py +9 -0
  59. aiecs/common/knowledge_graph/__init__.py +17 -0
  60. aiecs/common/knowledge_graph/runnable.py +484 -0
  61. aiecs/config/__init__.py +16 -0
  62. aiecs/config/config.py +498 -0
  63. aiecs/config/graph_config.py +137 -0
  64. aiecs/config/registry.py +23 -0
  65. aiecs/core/__init__.py +46 -0
  66. aiecs/core/interface/__init__.py +34 -0
  67. aiecs/core/interface/execution_interface.py +152 -0
  68. aiecs/core/interface/storage_interface.py +171 -0
  69. aiecs/domain/__init__.py +289 -0
  70. aiecs/domain/agent/__init__.py +189 -0
  71. aiecs/domain/agent/base_agent.py +697 -0
  72. aiecs/domain/agent/exceptions.py +103 -0
  73. aiecs/domain/agent/graph_aware_mixin.py +559 -0
  74. aiecs/domain/agent/hybrid_agent.py +490 -0
  75. aiecs/domain/agent/integration/__init__.py +26 -0
  76. aiecs/domain/agent/integration/context_compressor.py +222 -0
  77. aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
  78. aiecs/domain/agent/integration/retry_policy.py +219 -0
  79. aiecs/domain/agent/integration/role_config.py +213 -0
  80. aiecs/domain/agent/knowledge_aware_agent.py +646 -0
  81. aiecs/domain/agent/lifecycle.py +296 -0
  82. aiecs/domain/agent/llm_agent.py +300 -0
  83. aiecs/domain/agent/memory/__init__.py +12 -0
  84. aiecs/domain/agent/memory/conversation.py +197 -0
  85. aiecs/domain/agent/migration/__init__.py +14 -0
  86. aiecs/domain/agent/migration/conversion.py +160 -0
  87. aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
  88. aiecs/domain/agent/models.py +317 -0
  89. aiecs/domain/agent/observability.py +407 -0
  90. aiecs/domain/agent/persistence.py +289 -0
  91. aiecs/domain/agent/prompts/__init__.py +29 -0
  92. aiecs/domain/agent/prompts/builder.py +161 -0
  93. aiecs/domain/agent/prompts/formatters.py +189 -0
  94. aiecs/domain/agent/prompts/template.py +255 -0
  95. aiecs/domain/agent/registry.py +260 -0
  96. aiecs/domain/agent/tool_agent.py +257 -0
  97. aiecs/domain/agent/tools/__init__.py +12 -0
  98. aiecs/domain/agent/tools/schema_generator.py +221 -0
  99. aiecs/domain/community/__init__.py +155 -0
  100. aiecs/domain/community/agent_adapter.py +477 -0
  101. aiecs/domain/community/analytics.py +481 -0
  102. aiecs/domain/community/collaborative_workflow.py +642 -0
  103. aiecs/domain/community/communication_hub.py +645 -0
  104. aiecs/domain/community/community_builder.py +320 -0
  105. aiecs/domain/community/community_integration.py +800 -0
  106. aiecs/domain/community/community_manager.py +813 -0
  107. aiecs/domain/community/decision_engine.py +879 -0
  108. aiecs/domain/community/exceptions.py +225 -0
  109. aiecs/domain/community/models/__init__.py +33 -0
  110. aiecs/domain/community/models/community_models.py +268 -0
  111. aiecs/domain/community/resource_manager.py +457 -0
  112. aiecs/domain/community/shared_context_manager.py +603 -0
  113. aiecs/domain/context/__init__.py +58 -0
  114. aiecs/domain/context/context_engine.py +989 -0
  115. aiecs/domain/context/conversation_models.py +354 -0
  116. aiecs/domain/context/graph_memory.py +467 -0
  117. aiecs/domain/execution/__init__.py +12 -0
  118. aiecs/domain/execution/model.py +57 -0
  119. aiecs/domain/knowledge_graph/__init__.py +19 -0
  120. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  121. aiecs/domain/knowledge_graph/models/entity.py +130 -0
  122. aiecs/domain/knowledge_graph/models/evidence.py +194 -0
  123. aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
  124. aiecs/domain/knowledge_graph/models/path.py +179 -0
  125. aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
  126. aiecs/domain/knowledge_graph/models/query.py +272 -0
  127. aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
  128. aiecs/domain/knowledge_graph/models/relation.py +136 -0
  129. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  130. aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
  131. aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
  132. aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
  133. aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
  134. aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
  135. aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
  136. aiecs/domain/task/__init__.py +13 -0
  137. aiecs/domain/task/dsl_processor.py +613 -0
  138. aiecs/domain/task/model.py +62 -0
  139. aiecs/domain/task/task_context.py +268 -0
  140. aiecs/infrastructure/__init__.py +24 -0
  141. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  142. aiecs/infrastructure/graph_storage/base.py +601 -0
  143. aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
  144. aiecs/infrastructure/graph_storage/cache.py +429 -0
  145. aiecs/infrastructure/graph_storage/distributed.py +226 -0
  146. aiecs/infrastructure/graph_storage/error_handling.py +390 -0
  147. aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
  148. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  149. aiecs/infrastructure/graph_storage/in_memory.py +514 -0
  150. aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
  151. aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
  152. aiecs/infrastructure/graph_storage/metrics.py +357 -0
  153. aiecs/infrastructure/graph_storage/migration.py +413 -0
  154. aiecs/infrastructure/graph_storage/pagination.py +471 -0
  155. aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
  156. aiecs/infrastructure/graph_storage/postgres.py +871 -0
  157. aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
  158. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  159. aiecs/infrastructure/graph_storage/sqlite.py +623 -0
  160. aiecs/infrastructure/graph_storage/streaming.py +495 -0
  161. aiecs/infrastructure/messaging/__init__.py +13 -0
  162. aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
  163. aiecs/infrastructure/messaging/websocket_manager.py +298 -0
  164. aiecs/infrastructure/monitoring/__init__.py +34 -0
  165. aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
  166. aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
  167. aiecs/infrastructure/monitoring/structured_logger.py +48 -0
  168. aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
  169. aiecs/infrastructure/persistence/__init__.py +24 -0
  170. aiecs/infrastructure/persistence/context_engine_client.py +187 -0
  171. aiecs/infrastructure/persistence/database_manager.py +333 -0
  172. aiecs/infrastructure/persistence/file_storage.py +754 -0
  173. aiecs/infrastructure/persistence/redis_client.py +220 -0
  174. aiecs/llm/__init__.py +86 -0
  175. aiecs/llm/callbacks/__init__.py +11 -0
  176. aiecs/llm/callbacks/custom_callbacks.py +264 -0
  177. aiecs/llm/client_factory.py +420 -0
  178. aiecs/llm/clients/__init__.py +33 -0
  179. aiecs/llm/clients/base_client.py +193 -0
  180. aiecs/llm/clients/googleai_client.py +181 -0
  181. aiecs/llm/clients/openai_client.py +131 -0
  182. aiecs/llm/clients/vertex_client.py +437 -0
  183. aiecs/llm/clients/xai_client.py +184 -0
  184. aiecs/llm/config/__init__.py +51 -0
  185. aiecs/llm/config/config_loader.py +275 -0
  186. aiecs/llm/config/config_validator.py +236 -0
  187. aiecs/llm/config/model_config.py +151 -0
  188. aiecs/llm/utils/__init__.py +10 -0
  189. aiecs/llm/utils/validate_config.py +91 -0
  190. aiecs/main.py +363 -0
  191. aiecs/scripts/__init__.py +3 -0
  192. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  193. aiecs/scripts/aid/__init__.py +19 -0
  194. aiecs/scripts/aid/version_manager.py +215 -0
  195. aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
  196. aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
  197. aiecs/scripts/dependance_check/__init__.py +17 -0
  198. aiecs/scripts/dependance_check/dependency_checker.py +938 -0
  199. aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
  200. aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
  201. aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
  202. aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
  203. aiecs/scripts/dependance_patch/__init__.py +7 -0
  204. aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
  205. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  206. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
  207. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
  208. aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
  209. aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
  210. aiecs/scripts/tools_develop/README.md +449 -0
  211. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  212. aiecs/scripts/tools_develop/__init__.py +21 -0
  213. aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
  214. aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
  215. aiecs/scripts/tools_develop/verify_tools.py +356 -0
  216. aiecs/tasks/__init__.py +1 -0
  217. aiecs/tasks/worker.py +172 -0
  218. aiecs/tools/__init__.py +299 -0
  219. aiecs/tools/apisource/__init__.py +99 -0
  220. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  221. aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
  222. aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
  223. aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
  224. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  225. aiecs/tools/apisource/monitoring/metrics.py +303 -0
  226. aiecs/tools/apisource/providers/__init__.py +115 -0
  227. aiecs/tools/apisource/providers/base.py +664 -0
  228. aiecs/tools/apisource/providers/census.py +401 -0
  229. aiecs/tools/apisource/providers/fred.py +564 -0
  230. aiecs/tools/apisource/providers/newsapi.py +412 -0
  231. aiecs/tools/apisource/providers/worldbank.py +357 -0
  232. aiecs/tools/apisource/reliability/__init__.py +12 -0
  233. aiecs/tools/apisource/reliability/error_handler.py +375 -0
  234. aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
  235. aiecs/tools/apisource/tool.py +850 -0
  236. aiecs/tools/apisource/utils/__init__.py +9 -0
  237. aiecs/tools/apisource/utils/validators.py +338 -0
  238. aiecs/tools/base_tool.py +201 -0
  239. aiecs/tools/docs/__init__.py +121 -0
  240. aiecs/tools/docs/ai_document_orchestrator.py +599 -0
  241. aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
  242. aiecs/tools/docs/content_insertion_tool.py +1333 -0
  243. aiecs/tools/docs/document_creator_tool.py +1317 -0
  244. aiecs/tools/docs/document_layout_tool.py +1166 -0
  245. aiecs/tools/docs/document_parser_tool.py +994 -0
  246. aiecs/tools/docs/document_writer_tool.py +1818 -0
  247. aiecs/tools/knowledge_graph/__init__.py +17 -0
  248. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
  249. aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
  250. aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
  251. aiecs/tools/langchain_adapter.py +542 -0
  252. aiecs/tools/schema_generator.py +275 -0
  253. aiecs/tools/search_tool/__init__.py +100 -0
  254. aiecs/tools/search_tool/analyzers.py +589 -0
  255. aiecs/tools/search_tool/cache.py +260 -0
  256. aiecs/tools/search_tool/constants.py +128 -0
  257. aiecs/tools/search_tool/context.py +216 -0
  258. aiecs/tools/search_tool/core.py +749 -0
  259. aiecs/tools/search_tool/deduplicator.py +123 -0
  260. aiecs/tools/search_tool/error_handler.py +271 -0
  261. aiecs/tools/search_tool/metrics.py +371 -0
  262. aiecs/tools/search_tool/rate_limiter.py +178 -0
  263. aiecs/tools/search_tool/schemas.py +277 -0
  264. aiecs/tools/statistics/__init__.py +80 -0
  265. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
  266. aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
  267. aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
  268. aiecs/tools/statistics/data_loader_tool.py +564 -0
  269. aiecs/tools/statistics/data_profiler_tool.py +658 -0
  270. aiecs/tools/statistics/data_transformer_tool.py +573 -0
  271. aiecs/tools/statistics/data_visualizer_tool.py +495 -0
  272. aiecs/tools/statistics/model_trainer_tool.py +487 -0
  273. aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
  274. aiecs/tools/task_tools/__init__.py +86 -0
  275. aiecs/tools/task_tools/chart_tool.py +732 -0
  276. aiecs/tools/task_tools/classfire_tool.py +922 -0
  277. aiecs/tools/task_tools/image_tool.py +447 -0
  278. aiecs/tools/task_tools/office_tool.py +684 -0
  279. aiecs/tools/task_tools/pandas_tool.py +635 -0
  280. aiecs/tools/task_tools/report_tool.py +635 -0
  281. aiecs/tools/task_tools/research_tool.py +392 -0
  282. aiecs/tools/task_tools/scraper_tool.py +715 -0
  283. aiecs/tools/task_tools/stats_tool.py +688 -0
  284. aiecs/tools/temp_file_manager.py +130 -0
  285. aiecs/tools/tool_executor/__init__.py +37 -0
  286. aiecs/tools/tool_executor/tool_executor.py +881 -0
  287. aiecs/utils/LLM_output_structor.py +445 -0
  288. aiecs/utils/__init__.py +34 -0
  289. aiecs/utils/base_callback.py +47 -0
  290. aiecs/utils/cache_provider.py +695 -0
  291. aiecs/utils/execution_utils.py +184 -0
  292. aiecs/utils/logging.py +1 -0
  293. aiecs/utils/prompt_loader.py +14 -0
  294. aiecs/utils/token_usage_repository.py +323 -0
  295. aiecs/ws/__init__.py +0 -0
  296. aiecs/ws/socket_server.py +52 -0
  297. aiecs-1.5.1.dist-info/METADATA +608 -0
  298. aiecs-1.5.1.dist-info/RECORD +302 -0
  299. aiecs-1.5.1.dist-info/WHEEL +5 -0
  300. aiecs-1.5.1.dist-info/entry_points.txt +10 -0
  301. aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
  302. aiecs-1.5.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,531 @@
1
+ """
2
+ Schema Mapping for Structured Data Import
3
+
4
+ Maps structured data (CSV, JSON) columns to knowledge graph entity and relation types
5
+ with support for property transformations.
6
+ """
7
+
8
+ from typing import Dict, List, Optional, Any
9
+ from enum import Enum
10
+ from pydantic import BaseModel, Field, field_validator
11
+ from aiecs.domain.knowledge_graph.schema.property_schema import PropertyType
12
+
13
+
14
+ class TransformationType(str, Enum):
15
+ """Types of property transformations"""
16
+
17
+ RENAME = "rename" # Rename column to property
18
+ TYPE_CAST = "type_cast" # Cast value to different type
19
+ COMPUTE = "compute" # Compute value from multiple columns
20
+ CONSTANT = "constant" # Use constant value
21
+ SKIP = "skip" # Skip this column
22
+
23
+
24
+ class PropertyTransformation(BaseModel):
25
+ """
26
+ Property transformation configuration
27
+
28
+ Defines how a source column/value is transformed into a target property.
29
+ """
30
+
31
+ transformation_type: TransformationType = Field(
32
+ ..., description="Type of transformation to apply"
33
+ )
34
+
35
+ source_column: Optional[str] = Field(
36
+ default=None,
37
+ description="Source column name (for rename/type_cast/compute)",
38
+ )
39
+
40
+ target_property: str = Field(..., description="Target property name in entity/relation")
41
+
42
+ target_type: Optional[PropertyType] = Field(
43
+ default=None, description="Target property type (for type_cast)"
44
+ )
45
+
46
+ constant_value: Optional[Any] = Field(
47
+ default=None,
48
+ description="Constant value (for constant transformation)",
49
+ )
50
+
51
+ compute_function: Optional[str] = Field(
52
+ default=None,
53
+ description="Function name for compute transformation (e.g., 'concat', 'sum')",
54
+ )
55
+
56
+ compute_args: Optional[List[str]] = Field(
57
+ default=None,
58
+ description="Additional column names for compute function",
59
+ )
60
+
61
+ @field_validator("transformation_type")
62
+ @classmethod
63
+ def validate_transformation_type(cls, v: TransformationType) -> TransformationType:
64
+ """Validate transformation type"""
65
+ return v
66
+
67
+ def apply(self, row: Dict[str, Any]) -> Any:
68
+ """
69
+ Apply transformation to a data row
70
+
71
+ Args:
72
+ row: Dictionary of column name -> value
73
+
74
+ Returns:
75
+ Transformed value for target property
76
+ """
77
+ if self.transformation_type == TransformationType.RENAME:
78
+ if self.source_column is None:
79
+ raise ValueError("source_column required for rename transformation")
80
+ return row.get(self.source_column)
81
+
82
+ elif self.transformation_type == TransformationType.TYPE_CAST:
83
+ if self.source_column is None:
84
+ raise ValueError("source_column required for type_cast transformation")
85
+ if self.target_type is None:
86
+ raise ValueError("target_type required for type_cast transformation")
87
+
88
+ value = row.get(self.source_column)
89
+ if value is None:
90
+ return None
91
+
92
+ return self._cast_value(value, self.target_type)
93
+
94
+ elif self.transformation_type == TransformationType.COMPUTE:
95
+ if self.compute_function is None:
96
+ raise ValueError("compute_function required for compute transformation")
97
+
98
+ # Get source values
99
+ source_values = []
100
+ if self.source_column:
101
+ source_values.append(row.get(self.source_column))
102
+ if self.compute_args:
103
+ source_values.extend([row.get(col) for col in self.compute_args])
104
+
105
+ return self._compute_value(self.compute_function, source_values)
106
+
107
+ elif self.transformation_type == TransformationType.CONSTANT:
108
+ return self.constant_value
109
+
110
+ elif self.transformation_type == TransformationType.SKIP:
111
+ return None
112
+
113
+ else:
114
+ raise ValueError(f"Unknown transformation type: {self.transformation_type}")
115
+
116
+ def _cast_value(self, value: Any, target_type: PropertyType) -> Any:
117
+ """Cast value to target type"""
118
+ try:
119
+ if target_type == PropertyType.STRING:
120
+ return str(value)
121
+ elif target_type == PropertyType.INTEGER:
122
+ if isinstance(value, bool):
123
+ raise ValueError(f"Cannot cast boolean {value} to integer")
124
+ return int(float(value)) # Handle "123.0" -> 123
125
+ elif target_type == PropertyType.FLOAT:
126
+ if isinstance(value, bool):
127
+ raise ValueError(f"Cannot cast boolean {value} to float")
128
+ return float(value)
129
+ elif target_type == PropertyType.BOOLEAN:
130
+ if isinstance(value, str):
131
+ return value.lower() in ("true", "1", "yes", "on")
132
+ return bool(value)
133
+ elif target_type == PropertyType.LIST:
134
+ if isinstance(value, list):
135
+ return value
136
+ elif isinstance(value, str):
137
+ # Try to parse as JSON list or comma-separated
138
+ import json
139
+
140
+ try:
141
+ return json.loads(value)
142
+ except json.JSONDecodeError:
143
+ return [v.strip() for v in value.split(",")]
144
+ else:
145
+ return [value]
146
+ elif target_type == PropertyType.DICT:
147
+ if isinstance(value, dict):
148
+ return value
149
+ elif isinstance(value, str):
150
+ import json
151
+
152
+ try:
153
+ return json.loads(value)
154
+ except json.JSONDecodeError:
155
+ # If not valid JSON, wrap as dict with "value" key
156
+ return {"value": value}
157
+ else:
158
+ return {"value": value}
159
+ else:
160
+ return value # ANY type or unknown
161
+ except (ValueError, TypeError) as e:
162
+ raise ValueError(f"Failed to cast {value} to {target_type}: {e}")
163
+
164
+ def _compute_value(self, function_name: str, values: List[Any]) -> Any:
165
+ """Compute value using function"""
166
+ # Remove None values for most functions
167
+ non_none_values = [v for v in values if v is not None]
168
+
169
+ if function_name == "concat":
170
+ return "".join(str(v) for v in values if v is not None)
171
+ elif function_name == "concat_space":
172
+ return " ".join(str(v) for v in values if v is not None)
173
+ elif function_name == "concat_comma":
174
+ return ", ".join(str(v) for v in values if v is not None)
175
+ elif function_name == "sum":
176
+ return sum(float(v) for v in non_none_values if self._is_numeric(v))
177
+ elif function_name == "avg" or function_name == "average":
178
+ if not non_none_values:
179
+ return None
180
+ numeric_values = [float(v) for v in non_none_values if self._is_numeric(v)]
181
+ if not numeric_values:
182
+ return None
183
+ return sum(numeric_values) / len(numeric_values)
184
+ elif function_name == "max":
185
+ if not non_none_values:
186
+ return None
187
+ numeric_values = [float(v) for v in non_none_values if self._is_numeric(v)]
188
+ if not numeric_values:
189
+ return max(non_none_values)
190
+ return max(numeric_values)
191
+ elif function_name == "min":
192
+ if not non_none_values:
193
+ return None
194
+ numeric_values = [float(v) for v in non_none_values if self._is_numeric(v)]
195
+ if not numeric_values:
196
+ return min(non_none_values)
197
+ return min(numeric_values)
198
+ else:
199
+ raise ValueError(f"Unknown compute function: {function_name}")
200
+
201
+ @staticmethod
202
+ def _is_numeric(value: Any) -> bool:
203
+ """Check if value is numeric"""
204
+ try:
205
+ float(value)
206
+ return True
207
+ except (ValueError, TypeError):
208
+ return False
209
+
210
+
211
+ class EntityMapping(BaseModel):
212
+ """
213
+ Entity mapping configuration
214
+
215
+ Maps source data columns to an entity type with property transformations.
216
+ """
217
+
218
+ source_columns: List[str] = Field(..., description="Source column names to use for this entity")
219
+
220
+ entity_type: str = Field(..., description="Target entity type name")
221
+
222
+ property_mapping: Dict[str, str] = Field(
223
+ default_factory=dict,
224
+ description="Simple column-to-property mapping (column_name -> property_name)",
225
+ )
226
+
227
+ transformations: List[PropertyTransformation] = Field(
228
+ default_factory=list, description="Property transformations to apply"
229
+ )
230
+
231
+ id_column: Optional[str] = Field(
232
+ default=None,
233
+ description="Column to use as entity ID (default: first column or generated)",
234
+ )
235
+
236
+ @field_validator("source_columns")
237
+ @classmethod
238
+ def validate_source_columns(cls, v: List[str]) -> List[str]:
239
+ """Validate source columns are not empty"""
240
+ if not v:
241
+ raise ValueError("source_columns cannot be empty")
242
+ return v
243
+
244
+ def map_row_to_entity(
245
+ self, row: Dict[str, Any], entity_id: Optional[str] = None
246
+ ) -> Dict[str, Any]:
247
+ """
248
+ Map a data row to entity properties
249
+
250
+ Args:
251
+ row: Dictionary of column name -> value
252
+ entity_id: Optional entity ID (if not provided, will use id_column or generate)
253
+
254
+ Returns:
255
+ Dictionary with entity properties
256
+ """
257
+ properties = {}
258
+
259
+ # Apply simple property mappings first
260
+ for column, property_name in self.property_mapping.items():
261
+ if column in row:
262
+ properties[property_name] = row[column]
263
+
264
+ # Apply transformations
265
+ for transformation in self.transformations:
266
+ try:
267
+ value = transformation.apply(row)
268
+ if (
269
+ value is not None
270
+ or transformation.transformation_type != TransformationType.SKIP
271
+ ):
272
+ properties[transformation.target_property] = value
273
+ except Exception as e:
274
+ # Log warning but continue
275
+ import logging
276
+
277
+ logger = logging.getLogger(__name__)
278
+ logger.warning(f"Transformation failed for {transformation.target_property}: {e}")
279
+
280
+ # Determine entity ID
281
+ if entity_id is None:
282
+ if self.id_column and self.id_column in row:
283
+ entity_id = str(row[self.id_column])
284
+ elif self.source_columns:
285
+ # Use first column as ID
286
+ entity_id = str(row.get(self.source_columns[0], ""))
287
+
288
+ return {
289
+ "id": entity_id,
290
+ "type": self.entity_type,
291
+ "properties": properties,
292
+ }
293
+
294
+
295
+ class RelationMapping(BaseModel):
296
+ """
297
+ Relation mapping configuration
298
+
299
+ Maps source data columns to a relation type between entities.
300
+ """
301
+
302
+ source_columns: List[str] = Field(
303
+ ..., description="Source column names to use for this relation"
304
+ )
305
+
306
+ relation_type: str = Field(..., description="Target relation type name")
307
+
308
+ source_entity_column: str = Field(..., description="Column name containing source entity ID")
309
+
310
+ target_entity_column: str = Field(..., description="Column name containing target entity ID")
311
+
312
+ property_mapping: Dict[str, str] = Field(
313
+ default_factory=dict, description="Simple column-to-property mapping"
314
+ )
315
+
316
+ transformations: List[PropertyTransformation] = Field(
317
+ default_factory=list, description="Property transformations to apply"
318
+ )
319
+
320
+ @field_validator("source_columns")
321
+ @classmethod
322
+ def validate_source_columns(cls, v: List[str]) -> List[str]:
323
+ """Validate source columns are not empty"""
324
+ if not v:
325
+ raise ValueError("source_columns cannot be empty")
326
+ return v
327
+
328
+ @field_validator("source_entity_column", "target_entity_column")
329
+ @classmethod
330
+ def validate_entity_columns(cls, v: str) -> str:
331
+ """Validate entity column names are provided"""
332
+ if not v:
333
+ raise ValueError("Entity column names cannot be empty")
334
+ return v
335
+
336
+ def map_row_to_relation(self, row: Dict[str, Any]) -> Dict[str, Any]:
337
+ """
338
+ Map a data row to relation properties
339
+
340
+ Args:
341
+ row: Dictionary of column name -> value
342
+
343
+ Returns:
344
+ Dictionary with relation properties (source_id, target_id, type, properties)
345
+ """
346
+ # Get source and target entity IDs
347
+ source_id = str(row.get(self.source_entity_column, ""))
348
+ target_id = str(row.get(self.target_entity_column, ""))
349
+
350
+ if not source_id or not target_id:
351
+ raise ValueError(
352
+ f"Missing entity IDs: source={source_id}, target={target_id}. "
353
+ f"Columns: source={self.source_entity_column}, target={self.target_entity_column}"
354
+ )
355
+
356
+ properties = {}
357
+
358
+ # Apply simple property mappings
359
+ for column, property_name in self.property_mapping.items():
360
+ if column in row:
361
+ properties[property_name] = row[column]
362
+
363
+ # Apply transformations
364
+ for transformation in self.transformations:
365
+ try:
366
+ value = transformation.apply(row)
367
+ if (
368
+ value is not None
369
+ or transformation.transformation_type != TransformationType.SKIP
370
+ ):
371
+ properties[transformation.target_property] = value
372
+ except Exception as e:
373
+ # Log warning but continue
374
+ import logging
375
+
376
+ logger = logging.getLogger(__name__)
377
+ logger.warning(f"Transformation failed for {transformation.target_property}: {e}")
378
+
379
+ return {
380
+ "source_id": source_id,
381
+ "target_id": target_id,
382
+ "type": self.relation_type,
383
+ "properties": properties,
384
+ }
385
+
386
+
387
+ class SchemaMapping(BaseModel):
388
+ """
389
+ Schema mapping configuration
390
+
391
+ Defines how structured data (CSV, JSON) maps to knowledge graph entities and relations.
392
+ """
393
+
394
+ entity_mappings: List[EntityMapping] = Field(
395
+ default_factory=list, description="Entity type mappings"
396
+ )
397
+
398
+ relation_mappings: List[RelationMapping] = Field(
399
+ default_factory=list, description="Relation type mappings"
400
+ )
401
+
402
+ description: Optional[str] = Field(
403
+ default=None, description="Human-readable description of this mapping"
404
+ )
405
+
406
+ def validate(self) -> List[str]:
407
+ """
408
+ Validate mapping consistency
409
+
410
+ Returns:
411
+ List of validation error messages (empty if valid)
412
+ """
413
+ errors = []
414
+
415
+ # Check entity mappings
416
+ entity_type_names = set()
417
+ for i, mapping in enumerate(self.entity_mappings):
418
+ if not mapping.entity_type:
419
+ errors.append(f"Entity mapping {i}: entity_type is required")
420
+
421
+ if mapping.entity_type in entity_type_names:
422
+ errors.append(f"Entity mapping {i}: duplicate entity_type '{mapping.entity_type}'")
423
+ entity_type_names.add(mapping.entity_type)
424
+
425
+ # Check that source columns are specified
426
+ if not mapping.source_columns:
427
+ errors.append(f"Entity mapping {i}: source_columns cannot be empty")
428
+
429
+ # Check transformations
430
+ for j, trans in enumerate(mapping.transformations):
431
+ if not trans.target_property:
432
+ errors.append(
433
+ f"Entity mapping {i}, transformation {j}: target_property is required"
434
+ )
435
+
436
+ if trans.transformation_type == TransformationType.RENAME:
437
+ if not trans.source_column:
438
+ errors.append(
439
+ f"Entity mapping {i}, transformation {j}: "
440
+ f"source_column required for rename"
441
+ )
442
+
443
+ elif trans.transformation_type == TransformationType.TYPE_CAST:
444
+ if not trans.source_column:
445
+ errors.append(
446
+ f"Entity mapping {i}, transformation {j}: "
447
+ f"source_column required for type_cast"
448
+ )
449
+ if not trans.target_type:
450
+ errors.append(
451
+ f"Entity mapping {i}, transformation {j}: "
452
+ f"target_type required for type_cast"
453
+ )
454
+
455
+ elif trans.transformation_type == TransformationType.COMPUTE:
456
+ if not trans.compute_function:
457
+ errors.append(
458
+ f"Entity mapping {i}, transformation {j}: "
459
+ f"compute_function required for compute"
460
+ )
461
+
462
+ # Check relation mappings
463
+ relation_type_names = set()
464
+ for i, mapping in enumerate(self.relation_mappings):
465
+ if not mapping.relation_type:
466
+ errors.append(f"Relation mapping {i}: relation_type is required")
467
+
468
+ if mapping.relation_type in relation_type_names:
469
+ errors.append(
470
+ f"Relation mapping {i}: duplicate relation_type '{mapping.relation_type}'"
471
+ )
472
+ relation_type_names.add(mapping.relation_type)
473
+
474
+ # Check entity columns
475
+ if not mapping.source_entity_column:
476
+ errors.append(f"Relation mapping {i}: source_entity_column is required")
477
+ if not mapping.target_entity_column:
478
+ errors.append(f"Relation mapping {i}: target_entity_column is required")
479
+
480
+ # Check that source columns include entity columns
481
+ if mapping.source_entity_column not in mapping.source_columns:
482
+ errors.append(
483
+ f"Relation mapping {i}: source_entity_column '{mapping.source_entity_column}' "
484
+ f"must be in source_columns"
485
+ )
486
+ if mapping.target_entity_column not in mapping.source_columns:
487
+ errors.append(
488
+ f"Relation mapping {i}: target_entity_column '{mapping.target_entity_column}' "
489
+ f"must be in source_columns"
490
+ )
491
+
492
+ return errors
493
+
494
+ def is_valid(self) -> bool:
495
+ """
496
+ Check if mapping is valid
497
+
498
+ Returns:
499
+ True if mapping is valid
500
+ """
501
+ return len(self.validate()) == 0
502
+
503
+ def get_entity_mapping(self, entity_type: str) -> Optional[EntityMapping]:
504
+ """
505
+ Get entity mapping by entity type name
506
+
507
+ Args:
508
+ entity_type: Entity type name
509
+
510
+ Returns:
511
+ Entity mapping or None if not found
512
+ """
513
+ for mapping in self.entity_mappings:
514
+ if mapping.entity_type == entity_type:
515
+ return mapping
516
+ return None
517
+
518
+ def get_relation_mapping(self, relation_type: str) -> Optional[RelationMapping]:
519
+ """
520
+ Get relation mapping by relation type name
521
+
522
+ Args:
523
+ relation_type: Relation type name
524
+
525
+ Returns:
526
+ Relation mapping or None if not found
527
+ """
528
+ for mapping in self.relation_mappings:
529
+ if mapping.relation_type == relation_type:
530
+ return mapping
531
+ return None