aiecs 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. aiecs/__init__.py +72 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +469 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +363 -0
  7. aiecs/application/knowledge_graph/__init__.py +7 -0
  8. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
  11. aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
  12. aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
  13. aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
  14. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  15. aiecs/application/knowledge_graph/extractors/base.py +100 -0
  16. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
  17. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
  18. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
  19. aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
  20. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
  21. aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
  22. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
  23. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
  24. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  25. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
  26. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
  27. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  28. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
  29. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  30. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  31. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
  32. aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
  33. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
  34. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  35. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  36. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
  37. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
  38. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
  39. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
  40. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
  41. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
  42. aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
  43. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
  44. aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
  45. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
  46. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  47. aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
  48. aiecs/application/knowledge_graph/search/reranker.py +295 -0
  49. aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
  50. aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
  51. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  52. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
  53. aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
  54. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  55. aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
  56. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  57. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
  58. aiecs/common/__init__.py +9 -0
  59. aiecs/common/knowledge_graph/__init__.py +17 -0
  60. aiecs/common/knowledge_graph/runnable.py +484 -0
  61. aiecs/config/__init__.py +16 -0
  62. aiecs/config/config.py +498 -0
  63. aiecs/config/graph_config.py +137 -0
  64. aiecs/config/registry.py +23 -0
  65. aiecs/core/__init__.py +46 -0
  66. aiecs/core/interface/__init__.py +34 -0
  67. aiecs/core/interface/execution_interface.py +152 -0
  68. aiecs/core/interface/storage_interface.py +171 -0
  69. aiecs/domain/__init__.py +289 -0
  70. aiecs/domain/agent/__init__.py +189 -0
  71. aiecs/domain/agent/base_agent.py +697 -0
  72. aiecs/domain/agent/exceptions.py +103 -0
  73. aiecs/domain/agent/graph_aware_mixin.py +559 -0
  74. aiecs/domain/agent/hybrid_agent.py +490 -0
  75. aiecs/domain/agent/integration/__init__.py +26 -0
  76. aiecs/domain/agent/integration/context_compressor.py +222 -0
  77. aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
  78. aiecs/domain/agent/integration/retry_policy.py +219 -0
  79. aiecs/domain/agent/integration/role_config.py +213 -0
  80. aiecs/domain/agent/knowledge_aware_agent.py +646 -0
  81. aiecs/domain/agent/lifecycle.py +296 -0
  82. aiecs/domain/agent/llm_agent.py +300 -0
  83. aiecs/domain/agent/memory/__init__.py +12 -0
  84. aiecs/domain/agent/memory/conversation.py +197 -0
  85. aiecs/domain/agent/migration/__init__.py +14 -0
  86. aiecs/domain/agent/migration/conversion.py +160 -0
  87. aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
  88. aiecs/domain/agent/models.py +317 -0
  89. aiecs/domain/agent/observability.py +407 -0
  90. aiecs/domain/agent/persistence.py +289 -0
  91. aiecs/domain/agent/prompts/__init__.py +29 -0
  92. aiecs/domain/agent/prompts/builder.py +161 -0
  93. aiecs/domain/agent/prompts/formatters.py +189 -0
  94. aiecs/domain/agent/prompts/template.py +255 -0
  95. aiecs/domain/agent/registry.py +260 -0
  96. aiecs/domain/agent/tool_agent.py +257 -0
  97. aiecs/domain/agent/tools/__init__.py +12 -0
  98. aiecs/domain/agent/tools/schema_generator.py +221 -0
  99. aiecs/domain/community/__init__.py +155 -0
  100. aiecs/domain/community/agent_adapter.py +477 -0
  101. aiecs/domain/community/analytics.py +481 -0
  102. aiecs/domain/community/collaborative_workflow.py +642 -0
  103. aiecs/domain/community/communication_hub.py +645 -0
  104. aiecs/domain/community/community_builder.py +320 -0
  105. aiecs/domain/community/community_integration.py +800 -0
  106. aiecs/domain/community/community_manager.py +813 -0
  107. aiecs/domain/community/decision_engine.py +879 -0
  108. aiecs/domain/community/exceptions.py +225 -0
  109. aiecs/domain/community/models/__init__.py +33 -0
  110. aiecs/domain/community/models/community_models.py +268 -0
  111. aiecs/domain/community/resource_manager.py +457 -0
  112. aiecs/domain/community/shared_context_manager.py +603 -0
  113. aiecs/domain/context/__init__.py +58 -0
  114. aiecs/domain/context/context_engine.py +989 -0
  115. aiecs/domain/context/conversation_models.py +354 -0
  116. aiecs/domain/context/graph_memory.py +467 -0
  117. aiecs/domain/execution/__init__.py +12 -0
  118. aiecs/domain/execution/model.py +57 -0
  119. aiecs/domain/knowledge_graph/__init__.py +19 -0
  120. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  121. aiecs/domain/knowledge_graph/models/entity.py +130 -0
  122. aiecs/domain/knowledge_graph/models/evidence.py +194 -0
  123. aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
  124. aiecs/domain/knowledge_graph/models/path.py +179 -0
  125. aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
  126. aiecs/domain/knowledge_graph/models/query.py +272 -0
  127. aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
  128. aiecs/domain/knowledge_graph/models/relation.py +136 -0
  129. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  130. aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
  131. aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
  132. aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
  133. aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
  134. aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
  135. aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
  136. aiecs/domain/task/__init__.py +13 -0
  137. aiecs/domain/task/dsl_processor.py +613 -0
  138. aiecs/domain/task/model.py +62 -0
  139. aiecs/domain/task/task_context.py +268 -0
  140. aiecs/infrastructure/__init__.py +24 -0
  141. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  142. aiecs/infrastructure/graph_storage/base.py +601 -0
  143. aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
  144. aiecs/infrastructure/graph_storage/cache.py +429 -0
  145. aiecs/infrastructure/graph_storage/distributed.py +226 -0
  146. aiecs/infrastructure/graph_storage/error_handling.py +390 -0
  147. aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
  148. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  149. aiecs/infrastructure/graph_storage/in_memory.py +514 -0
  150. aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
  151. aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
  152. aiecs/infrastructure/graph_storage/metrics.py +357 -0
  153. aiecs/infrastructure/graph_storage/migration.py +413 -0
  154. aiecs/infrastructure/graph_storage/pagination.py +471 -0
  155. aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
  156. aiecs/infrastructure/graph_storage/postgres.py +871 -0
  157. aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
  158. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  159. aiecs/infrastructure/graph_storage/sqlite.py +623 -0
  160. aiecs/infrastructure/graph_storage/streaming.py +495 -0
  161. aiecs/infrastructure/messaging/__init__.py +13 -0
  162. aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
  163. aiecs/infrastructure/messaging/websocket_manager.py +298 -0
  164. aiecs/infrastructure/monitoring/__init__.py +34 -0
  165. aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
  166. aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
  167. aiecs/infrastructure/monitoring/structured_logger.py +48 -0
  168. aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
  169. aiecs/infrastructure/persistence/__init__.py +24 -0
  170. aiecs/infrastructure/persistence/context_engine_client.py +187 -0
  171. aiecs/infrastructure/persistence/database_manager.py +333 -0
  172. aiecs/infrastructure/persistence/file_storage.py +754 -0
  173. aiecs/infrastructure/persistence/redis_client.py +220 -0
  174. aiecs/llm/__init__.py +86 -0
  175. aiecs/llm/callbacks/__init__.py +11 -0
  176. aiecs/llm/callbacks/custom_callbacks.py +264 -0
  177. aiecs/llm/client_factory.py +420 -0
  178. aiecs/llm/clients/__init__.py +33 -0
  179. aiecs/llm/clients/base_client.py +193 -0
  180. aiecs/llm/clients/googleai_client.py +181 -0
  181. aiecs/llm/clients/openai_client.py +131 -0
  182. aiecs/llm/clients/vertex_client.py +437 -0
  183. aiecs/llm/clients/xai_client.py +184 -0
  184. aiecs/llm/config/__init__.py +51 -0
  185. aiecs/llm/config/config_loader.py +275 -0
  186. aiecs/llm/config/config_validator.py +236 -0
  187. aiecs/llm/config/model_config.py +151 -0
  188. aiecs/llm/utils/__init__.py +10 -0
  189. aiecs/llm/utils/validate_config.py +91 -0
  190. aiecs/main.py +363 -0
  191. aiecs/scripts/__init__.py +3 -0
  192. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  193. aiecs/scripts/aid/__init__.py +19 -0
  194. aiecs/scripts/aid/version_manager.py +215 -0
  195. aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
  196. aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
  197. aiecs/scripts/dependance_check/__init__.py +17 -0
  198. aiecs/scripts/dependance_check/dependency_checker.py +938 -0
  199. aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
  200. aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
  201. aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
  202. aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
  203. aiecs/scripts/dependance_patch/__init__.py +7 -0
  204. aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
  205. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  206. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
  207. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
  208. aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
  209. aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
  210. aiecs/scripts/tools_develop/README.md +449 -0
  211. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  212. aiecs/scripts/tools_develop/__init__.py +21 -0
  213. aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
  214. aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
  215. aiecs/scripts/tools_develop/verify_tools.py +356 -0
  216. aiecs/tasks/__init__.py +1 -0
  217. aiecs/tasks/worker.py +172 -0
  218. aiecs/tools/__init__.py +299 -0
  219. aiecs/tools/apisource/__init__.py +99 -0
  220. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  221. aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
  222. aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
  223. aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
  224. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  225. aiecs/tools/apisource/monitoring/metrics.py +303 -0
  226. aiecs/tools/apisource/providers/__init__.py +115 -0
  227. aiecs/tools/apisource/providers/base.py +664 -0
  228. aiecs/tools/apisource/providers/census.py +401 -0
  229. aiecs/tools/apisource/providers/fred.py +564 -0
  230. aiecs/tools/apisource/providers/newsapi.py +412 -0
  231. aiecs/tools/apisource/providers/worldbank.py +357 -0
  232. aiecs/tools/apisource/reliability/__init__.py +12 -0
  233. aiecs/tools/apisource/reliability/error_handler.py +375 -0
  234. aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
  235. aiecs/tools/apisource/tool.py +850 -0
  236. aiecs/tools/apisource/utils/__init__.py +9 -0
  237. aiecs/tools/apisource/utils/validators.py +338 -0
  238. aiecs/tools/base_tool.py +201 -0
  239. aiecs/tools/docs/__init__.py +121 -0
  240. aiecs/tools/docs/ai_document_orchestrator.py +599 -0
  241. aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
  242. aiecs/tools/docs/content_insertion_tool.py +1333 -0
  243. aiecs/tools/docs/document_creator_tool.py +1317 -0
  244. aiecs/tools/docs/document_layout_tool.py +1166 -0
  245. aiecs/tools/docs/document_parser_tool.py +994 -0
  246. aiecs/tools/docs/document_writer_tool.py +1818 -0
  247. aiecs/tools/knowledge_graph/__init__.py +17 -0
  248. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
  249. aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
  250. aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
  251. aiecs/tools/langchain_adapter.py +542 -0
  252. aiecs/tools/schema_generator.py +275 -0
  253. aiecs/tools/search_tool/__init__.py +100 -0
  254. aiecs/tools/search_tool/analyzers.py +589 -0
  255. aiecs/tools/search_tool/cache.py +260 -0
  256. aiecs/tools/search_tool/constants.py +128 -0
  257. aiecs/tools/search_tool/context.py +216 -0
  258. aiecs/tools/search_tool/core.py +749 -0
  259. aiecs/tools/search_tool/deduplicator.py +123 -0
  260. aiecs/tools/search_tool/error_handler.py +271 -0
  261. aiecs/tools/search_tool/metrics.py +371 -0
  262. aiecs/tools/search_tool/rate_limiter.py +178 -0
  263. aiecs/tools/search_tool/schemas.py +277 -0
  264. aiecs/tools/statistics/__init__.py +80 -0
  265. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
  266. aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
  267. aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
  268. aiecs/tools/statistics/data_loader_tool.py +564 -0
  269. aiecs/tools/statistics/data_profiler_tool.py +658 -0
  270. aiecs/tools/statistics/data_transformer_tool.py +573 -0
  271. aiecs/tools/statistics/data_visualizer_tool.py +495 -0
  272. aiecs/tools/statistics/model_trainer_tool.py +487 -0
  273. aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
  274. aiecs/tools/task_tools/__init__.py +86 -0
  275. aiecs/tools/task_tools/chart_tool.py +732 -0
  276. aiecs/tools/task_tools/classfire_tool.py +922 -0
  277. aiecs/tools/task_tools/image_tool.py +447 -0
  278. aiecs/tools/task_tools/office_tool.py +684 -0
  279. aiecs/tools/task_tools/pandas_tool.py +635 -0
  280. aiecs/tools/task_tools/report_tool.py +635 -0
  281. aiecs/tools/task_tools/research_tool.py +392 -0
  282. aiecs/tools/task_tools/scraper_tool.py +715 -0
  283. aiecs/tools/task_tools/stats_tool.py +688 -0
  284. aiecs/tools/temp_file_manager.py +130 -0
  285. aiecs/tools/tool_executor/__init__.py +37 -0
  286. aiecs/tools/tool_executor/tool_executor.py +881 -0
  287. aiecs/utils/LLM_output_structor.py +445 -0
  288. aiecs/utils/__init__.py +34 -0
  289. aiecs/utils/base_callback.py +47 -0
  290. aiecs/utils/cache_provider.py +695 -0
  291. aiecs/utils/execution_utils.py +184 -0
  292. aiecs/utils/logging.py +1 -0
  293. aiecs/utils/prompt_loader.py +14 -0
  294. aiecs/utils/token_usage_repository.py +323 -0
  295. aiecs/ws/__init__.py +0 -0
  296. aiecs/ws/socket_server.py +52 -0
  297. aiecs-1.5.1.dist-info/METADATA +608 -0
  298. aiecs-1.5.1.dist-info/RECORD +302 -0
  299. aiecs-1.5.1.dist-info/WHEEL +5 -0
  300. aiecs-1.5.1.dist-info/entry_points.txt +10 -0
  301. aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
  302. aiecs-1.5.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,443 @@
1
+ """
2
+ Structured Data Pipeline
3
+
4
+ Import structured data (CSV, JSON) into knowledge graphs using schema mappings.
5
+ """
6
+
7
+ import json
8
+ import logging
9
+ from pathlib import Path
10
+ from typing import List, Optional, Dict, Any, Callable, Union
11
+ from dataclasses import dataclass, field
12
+ from datetime import datetime
13
+
14
+ try:
15
+ import pandas as pd
16
+
17
+ PANDAS_AVAILABLE = True
18
+ except ImportError:
19
+ PANDAS_AVAILABLE = False
20
+
21
+ from aiecs.infrastructure.graph_storage.base import GraphStore
22
+ from aiecs.domain.knowledge_graph.models.entity import Entity
23
+ from aiecs.domain.knowledge_graph.models.relation import Relation
24
+ from aiecs.application.knowledge_graph.builder.schema_mapping import (
25
+ SchemaMapping,
26
+ )
27
+
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ @dataclass
33
+ class ImportResult:
34
+ """
35
+ Result of structured data import operation
36
+
37
+ Attributes:
38
+ success: Whether import completed successfully
39
+ entities_added: Number of entities added to graph
40
+ relations_added: Number of relations added to graph
41
+ rows_processed: Number of rows processed
42
+ rows_failed: Number of rows that failed to process
43
+ errors: List of errors encountered
44
+ warnings: List of warnings
45
+ start_time: When import started
46
+ end_time: When import ended
47
+ duration_seconds: Total duration in seconds
48
+ """
49
+
50
+ success: bool = True
51
+ entities_added: int = 0
52
+ relations_added: int = 0
53
+ rows_processed: int = 0
54
+ rows_failed: int = 0
55
+ errors: List[str] = field(default_factory=list)
56
+ warnings: List[str] = field(default_factory=list)
57
+ start_time: Optional[datetime] = None
58
+ end_time: Optional[datetime] = None
59
+ duration_seconds: float = 0.0
60
+
61
+
62
+ class StructuredDataPipeline:
63
+ """
64
+ Pipeline for importing structured data (CSV, JSON) into knowledge graphs
65
+
66
+ Uses SchemaMapping to map source data columns to entity and relation types.
67
+ Supports batch processing, progress tracking, and error handling.
68
+
69
+ Example:
70
+ ```python
71
+ # Define schema mapping
72
+ mapping = SchemaMapping(
73
+ entity_mappings=[
74
+ EntityMapping(
75
+ source_columns=["id", "name", "age"],
76
+ entity_type="Person",
77
+ property_mapping={"id": "id", "name": "name", "age": "age"}
78
+ )
79
+ ],
80
+ relation_mappings=[
81
+ RelationMapping(
82
+ source_columns=["person_id", "company_id"],
83
+ relation_type="WORKS_FOR",
84
+ source_entity_column="person_id",
85
+ target_entity_column="company_id"
86
+ )
87
+ ]
88
+ )
89
+
90
+ # Create pipeline
91
+ pipeline = StructuredDataPipeline(
92
+ mapping=mapping,
93
+ graph_store=store
94
+ )
95
+
96
+ # Import CSV
97
+ result = await pipeline.import_from_csv("employees.csv")
98
+ print(f"Added {result.entities_added} entities, {result.relations_added} relations")
99
+ ```
100
+ """
101
+
102
+ def __init__(
103
+ self,
104
+ mapping: SchemaMapping,
105
+ graph_store: GraphStore,
106
+ batch_size: int = 100,
107
+ progress_callback: Optional[Callable[[str, float], None]] = None,
108
+ skip_errors: bool = True,
109
+ ):
110
+ """
111
+ Initialize structured data pipeline
112
+
113
+ Args:
114
+ mapping: Schema mapping configuration
115
+ graph_store: Graph storage to save entities/relations
116
+ batch_size: Number of rows to process in each batch
117
+ progress_callback: Optional callback for progress updates (message, progress_pct)
118
+ skip_errors: Whether to skip rows with errors and continue processing
119
+ """
120
+ # Validate mapping
121
+ validation_errors = mapping.validate()
122
+ if validation_errors:
123
+ raise ValueError(f"Invalid schema mapping: {validation_errors}")
124
+
125
+ self.mapping = mapping
126
+ self.graph_store = graph_store
127
+ self.batch_size = batch_size
128
+ self.progress_callback = progress_callback
129
+ self.skip_errors = skip_errors
130
+
131
+ if not PANDAS_AVAILABLE:
132
+ logger.warning(
133
+ "pandas not available. CSV import will use basic CSV reader. "
134
+ "Install pandas for better performance: pip install pandas"
135
+ )
136
+
137
+ async def import_from_csv(
138
+ self,
139
+ file_path: Union[str, Path],
140
+ encoding: str = "utf-8",
141
+ delimiter: str = ",",
142
+ header: bool = True,
143
+ ) -> ImportResult:
144
+ """
145
+ Import data from CSV file
146
+
147
+ Args:
148
+ file_path: Path to CSV file
149
+ encoding: File encoding (default: utf-8)
150
+ delimiter: CSV delimiter (default: comma)
151
+ header: Whether file has header row (default: True)
152
+
153
+ Returns:
154
+ ImportResult with statistics
155
+ """
156
+ result = ImportResult(start_time=datetime.now())
157
+
158
+ try:
159
+ # Read CSV file
160
+ if PANDAS_AVAILABLE:
161
+ df = pd.read_csv(
162
+ file_path,
163
+ encoding=encoding,
164
+ sep=delimiter,
165
+ header=0 if header else None,
166
+ )
167
+ rows = df.to_dict("records")
168
+ else:
169
+ # Fallback to basic CSV reader
170
+ import csv
171
+
172
+ rows = []
173
+ with open(file_path, "r", encoding=encoding) as f:
174
+ reader = csv.DictReader(f) if header else csv.reader(f)
175
+ if header:
176
+ for row in reader:
177
+ rows.append(row)
178
+ else:
179
+ # No header - use column indices
180
+ for row in reader:
181
+ rows.append({str(i): val for i, val in enumerate(row)})
182
+
183
+ # Process rows
184
+ result = await self._process_rows(rows, result)
185
+
186
+ except Exception as e:
187
+ error_msg = f"Failed to import CSV file {file_path}: {e}"
188
+ logger.error(error_msg, exc_info=True)
189
+ result.success = False
190
+ result.errors.append(error_msg)
191
+
192
+ finally:
193
+ result.end_time = datetime.now()
194
+ if result.start_time:
195
+ result.duration_seconds = (result.end_time - result.start_time).total_seconds()
196
+
197
+ return result
198
+
199
+ async def import_from_json(
200
+ self,
201
+ file_path: Union[str, Path],
202
+ encoding: str = "utf-8",
203
+ array_key: Optional[str] = None,
204
+ ) -> ImportResult:
205
+ """
206
+ Import data from JSON file
207
+
208
+ Supports:
209
+ - Array of objects: [{"id": 1, "name": "Alice"}, ...]
210
+ - Object with array: {"items": [{"id": 1, ...}, ...]}
211
+ - Single object: {"id": 1, "name": "Alice"}
212
+
213
+ Args:
214
+ file_path: Path to JSON file
215
+ encoding: File encoding (default: utf-8)
216
+ array_key: If JSON is object with array, key containing the array
217
+
218
+ Returns:
219
+ ImportResult with statistics
220
+ """
221
+ result = ImportResult(start_time=datetime.now())
222
+
223
+ try:
224
+ # Read JSON file
225
+ with open(file_path, "r", encoding=encoding) as f:
226
+ data = json.load(f)
227
+
228
+ # Extract rows
229
+ if isinstance(data, list):
230
+ rows = data
231
+ elif isinstance(data, dict):
232
+ if array_key:
233
+ rows = data.get(array_key, [])
234
+ if not isinstance(rows, list):
235
+ raise ValueError(f"Key '{array_key}' does not contain an array")
236
+ else:
237
+ # Single object - wrap in list
238
+ rows = [data]
239
+ else:
240
+ raise ValueError(f"JSON file must contain array or object, got {type(data)}")
241
+
242
+ # Process rows
243
+ result = await self._process_rows(rows, result)
244
+
245
+ except Exception as e:
246
+ error_msg = f"Failed to import JSON file {file_path}: {e}"
247
+ logger.error(error_msg, exc_info=True)
248
+ result.success = False
249
+ result.errors.append(error_msg)
250
+
251
+ finally:
252
+ result.end_time = datetime.now()
253
+ if result.start_time:
254
+ result.duration_seconds = (result.end_time - result.start_time).total_seconds()
255
+
256
+ return result
257
+
258
+ async def _process_rows(self, rows: List[Dict[str, Any]], result: ImportResult) -> ImportResult:
259
+ """
260
+ Process rows and convert to entities/relations
261
+
262
+ Args:
263
+ rows: List of row dictionaries
264
+ result: ImportResult to update
265
+
266
+ Returns:
267
+ Updated ImportResult
268
+ """
269
+ total_rows = len(rows)
270
+
271
+ if total_rows == 0:
272
+ result.warnings.append("No rows to process")
273
+ return result
274
+
275
+ # Process in batches
276
+ for batch_start in range(0, total_rows, self.batch_size):
277
+ batch_end = min(batch_start + self.batch_size, total_rows)
278
+ batch_rows = rows[batch_start:batch_end]
279
+
280
+ # Update progress
281
+ if self.progress_callback:
282
+ progress_pct = (batch_end / total_rows) * 100
283
+ self.progress_callback(
284
+ f"Processing rows {batch_start+1}-{batch_end} of {total_rows}",
285
+ progress_pct,
286
+ )
287
+
288
+ # Process batch
289
+ batch_result = await self._process_batch(batch_rows)
290
+
291
+ # Update result
292
+ result.entities_added += batch_result.entities_added
293
+ result.relations_added += batch_result.relations_added
294
+ result.rows_processed += batch_result.rows_processed
295
+ result.rows_failed += batch_result.rows_failed
296
+ result.errors.extend(batch_result.errors)
297
+ result.warnings.extend(batch_result.warnings)
298
+
299
+ return result
300
+
301
+ async def _process_batch(self, rows: List[Dict[str, Any]]) -> ImportResult:
302
+ """
303
+ Process a batch of rows
304
+
305
+ Args:
306
+ rows: List of row dictionaries
307
+
308
+ Returns:
309
+ ImportResult for this batch
310
+ """
311
+ batch_result = ImportResult()
312
+ batch_result.rows_processed = len(rows)
313
+
314
+ # Collect entities and relations
315
+ entities_to_add: List[Entity] = []
316
+ relations_to_add: List[Relation] = []
317
+
318
+ for i, row in enumerate(rows):
319
+ try:
320
+ # Convert row to entities
321
+ row_entities = await self._row_to_entities(row)
322
+ entities_to_add.extend(row_entities)
323
+
324
+ # Convert row to relations
325
+ row_relations = await self._row_to_relations(row)
326
+ relations_to_add.extend(row_relations)
327
+
328
+ except Exception as e:
329
+ error_msg = f"Failed to process row {i+1}: {e}"
330
+ logger.warning(error_msg, exc_info=True)
331
+ batch_result.rows_failed += 1
332
+
333
+ if self.skip_errors:
334
+ batch_result.warnings.append(error_msg)
335
+ else:
336
+ batch_result.errors.append(error_msg)
337
+ raise
338
+
339
+ # Add entities to graph store
340
+ for entity in entities_to_add:
341
+ try:
342
+ await self.graph_store.add_entity(entity)
343
+ batch_result.entities_added += 1
344
+ except Exception as e:
345
+ error_msg = f"Failed to add entity {entity.id}: {e}"
346
+ logger.warning(error_msg)
347
+ batch_result.warnings.append(error_msg)
348
+ if not self.skip_errors:
349
+ raise
350
+
351
+ # Add relations to graph store
352
+ for relation in relations_to_add:
353
+ try:
354
+ await self.graph_store.add_relation(relation)
355
+ batch_result.relations_added += 1
356
+ except Exception as e:
357
+ error_msg = f"Failed to add relation {relation.id}: {e}"
358
+ logger.warning(error_msg)
359
+ batch_result.warnings.append(error_msg)
360
+ if not self.skip_errors:
361
+ raise
362
+
363
+ return batch_result
364
+
365
+ async def _row_to_entities(self, row: Dict[str, Any]) -> List[Entity]:
366
+ """
367
+ Convert a row to entities based on entity mappings
368
+
369
+ Args:
370
+ row: Dictionary of column name -> value
371
+
372
+ Returns:
373
+ List of Entity objects
374
+ """
375
+ entities = []
376
+
377
+ for entity_mapping in self.mapping.entity_mappings:
378
+ try:
379
+ # Map row to entity using mapping
380
+ entity_data = entity_mapping.map_row_to_entity(row)
381
+
382
+ # Create Entity object
383
+ entity = Entity(
384
+ id=entity_data["id"],
385
+ entity_type=entity_data["type"],
386
+ properties=entity_data["properties"],
387
+ metadata={
388
+ "source": "structured_data_import",
389
+ "imported_at": datetime.now().isoformat(),
390
+ },
391
+ )
392
+
393
+ entities.append(entity)
394
+
395
+ except Exception as e:
396
+ error_msg = f"Failed to map row to entity type '{entity_mapping.entity_type}': {e}"
397
+ logger.warning(error_msg)
398
+ if not self.skip_errors:
399
+ raise ValueError(error_msg)
400
+
401
+ return entities
402
+
403
+ async def _row_to_relations(self, row: Dict[str, Any]) -> List[Relation]:
404
+ """
405
+ Convert a row to relations based on relation mappings
406
+
407
+ Args:
408
+ row: Dictionary of column name -> value
409
+
410
+ Returns:
411
+ List of Relation objects
412
+ """
413
+ relations = []
414
+
415
+ for relation_mapping in self.mapping.relation_mappings:
416
+ try:
417
+ # Map row to relation using mapping
418
+ relation_data = relation_mapping.map_row_to_relation(row)
419
+
420
+ # Create Relation object
421
+ relation = Relation(
422
+ id=f"{relation_data['source_id']}_{relation_data['type']}_{relation_data['target_id']}",
423
+ relation_type=relation_data["type"],
424
+ source_id=relation_data["source_id"],
425
+ target_id=relation_data["target_id"],
426
+ properties=relation_data["properties"],
427
+ metadata={
428
+ "source": "structured_data_import",
429
+ "imported_at": datetime.now().isoformat(),
430
+ },
431
+ )
432
+
433
+ relations.append(relation)
434
+
435
+ except Exception as e:
436
+ error_msg = (
437
+ f"Failed to map row to relation type '{relation_mapping.relation_type}': {e}"
438
+ )
439
+ logger.warning(error_msg)
440
+ if not self.skip_errors:
441
+ raise ValueError(error_msg)
442
+
443
+ return relations