aiecs 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiecs/__init__.py +72 -0
- aiecs/__main__.py +41 -0
- aiecs/aiecs_client.py +469 -0
- aiecs/application/__init__.py +10 -0
- aiecs/application/executors/__init__.py +10 -0
- aiecs/application/executors/operation_executor.py +363 -0
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +100 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
- aiecs/application/knowledge_graph/search/reranker.py +295 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +484 -0
- aiecs/config/__init__.py +16 -0
- aiecs/config/config.py +498 -0
- aiecs/config/graph_config.py +137 -0
- aiecs/config/registry.py +23 -0
- aiecs/core/__init__.py +46 -0
- aiecs/core/interface/__init__.py +34 -0
- aiecs/core/interface/execution_interface.py +152 -0
- aiecs/core/interface/storage_interface.py +171 -0
- aiecs/domain/__init__.py +289 -0
- aiecs/domain/agent/__init__.py +189 -0
- aiecs/domain/agent/base_agent.py +697 -0
- aiecs/domain/agent/exceptions.py +103 -0
- aiecs/domain/agent/graph_aware_mixin.py +559 -0
- aiecs/domain/agent/hybrid_agent.py +490 -0
- aiecs/domain/agent/integration/__init__.py +26 -0
- aiecs/domain/agent/integration/context_compressor.py +222 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
- aiecs/domain/agent/integration/retry_policy.py +219 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +646 -0
- aiecs/domain/agent/lifecycle.py +296 -0
- aiecs/domain/agent/llm_agent.py +300 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +197 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +160 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
- aiecs/domain/agent/models.py +317 -0
- aiecs/domain/agent/observability.py +407 -0
- aiecs/domain/agent/persistence.py +289 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +161 -0
- aiecs/domain/agent/prompts/formatters.py +189 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +260 -0
- aiecs/domain/agent/tool_agent.py +257 -0
- aiecs/domain/agent/tools/__init__.py +12 -0
- aiecs/domain/agent/tools/schema_generator.py +221 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +477 -0
- aiecs/domain/community/analytics.py +481 -0
- aiecs/domain/community/collaborative_workflow.py +642 -0
- aiecs/domain/community/communication_hub.py +645 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +800 -0
- aiecs/domain/community/community_manager.py +813 -0
- aiecs/domain/community/decision_engine.py +879 -0
- aiecs/domain/community/exceptions.py +225 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +268 -0
- aiecs/domain/community/resource_manager.py +457 -0
- aiecs/domain/community/shared_context_manager.py +603 -0
- aiecs/domain/context/__init__.py +58 -0
- aiecs/domain/context/context_engine.py +989 -0
- aiecs/domain/context/conversation_models.py +354 -0
- aiecs/domain/context/graph_memory.py +467 -0
- aiecs/domain/execution/__init__.py +12 -0
- aiecs/domain/execution/model.py +57 -0
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +130 -0
- aiecs/domain/knowledge_graph/models/evidence.py +194 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
- aiecs/domain/knowledge_graph/models/path.py +179 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
- aiecs/domain/knowledge_graph/models/query.py +272 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
- aiecs/domain/knowledge_graph/models/relation.py +136 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
- aiecs/domain/task/__init__.py +13 -0
- aiecs/domain/task/dsl_processor.py +613 -0
- aiecs/domain/task/model.py +62 -0
- aiecs/domain/task/task_context.py +268 -0
- aiecs/infrastructure/__init__.py +24 -0
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +601 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
- aiecs/infrastructure/graph_storage/cache.py +429 -0
- aiecs/infrastructure/graph_storage/distributed.py +226 -0
- aiecs/infrastructure/graph_storage/error_handling.py +390 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +514 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
- aiecs/infrastructure/graph_storage/metrics.py +357 -0
- aiecs/infrastructure/graph_storage/migration.py +413 -0
- aiecs/infrastructure/graph_storage/pagination.py +471 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
- aiecs/infrastructure/graph_storage/postgres.py +871 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +623 -0
- aiecs/infrastructure/graph_storage/streaming.py +495 -0
- aiecs/infrastructure/messaging/__init__.py +13 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
- aiecs/infrastructure/messaging/websocket_manager.py +298 -0
- aiecs/infrastructure/monitoring/__init__.py +34 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
- aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
- aiecs/infrastructure/monitoring/structured_logger.py +48 -0
- aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
- aiecs/infrastructure/persistence/__init__.py +24 -0
- aiecs/infrastructure/persistence/context_engine_client.py +187 -0
- aiecs/infrastructure/persistence/database_manager.py +333 -0
- aiecs/infrastructure/persistence/file_storage.py +754 -0
- aiecs/infrastructure/persistence/redis_client.py +220 -0
- aiecs/llm/__init__.py +86 -0
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/callbacks/custom_callbacks.py +264 -0
- aiecs/llm/client_factory.py +420 -0
- aiecs/llm/clients/__init__.py +33 -0
- aiecs/llm/clients/base_client.py +193 -0
- aiecs/llm/clients/googleai_client.py +181 -0
- aiecs/llm/clients/openai_client.py +131 -0
- aiecs/llm/clients/vertex_client.py +437 -0
- aiecs/llm/clients/xai_client.py +184 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +275 -0
- aiecs/llm/config/config_validator.py +236 -0
- aiecs/llm/config/model_config.py +151 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +91 -0
- aiecs/main.py +363 -0
- aiecs/scripts/__init__.py +3 -0
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/version_manager.py +215 -0
- aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
- aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
- aiecs/scripts/dependance_check/__init__.py +17 -0
- aiecs/scripts/dependance_check/dependency_checker.py +938 -0
- aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
- aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
- aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
- aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
- aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
- aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
- aiecs/scripts/tools_develop/README.md +449 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
- aiecs/scripts/tools_develop/verify_tools.py +356 -0
- aiecs/tasks/__init__.py +1 -0
- aiecs/tasks/worker.py +172 -0
- aiecs/tools/__init__.py +299 -0
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +303 -0
- aiecs/tools/apisource/providers/__init__.py +115 -0
- aiecs/tools/apisource/providers/base.py +664 -0
- aiecs/tools/apisource/providers/census.py +401 -0
- aiecs/tools/apisource/providers/fred.py +564 -0
- aiecs/tools/apisource/providers/newsapi.py +412 -0
- aiecs/tools/apisource/providers/worldbank.py +357 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +375 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
- aiecs/tools/apisource/tool.py +850 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +338 -0
- aiecs/tools/base_tool.py +201 -0
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +599 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
- aiecs/tools/docs/content_insertion_tool.py +1333 -0
- aiecs/tools/docs/document_creator_tool.py +1317 -0
- aiecs/tools/docs/document_layout_tool.py +1166 -0
- aiecs/tools/docs/document_parser_tool.py +994 -0
- aiecs/tools/docs/document_writer_tool.py +1818 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
- aiecs/tools/langchain_adapter.py +542 -0
- aiecs/tools/schema_generator.py +275 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +589 -0
- aiecs/tools/search_tool/cache.py +260 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +216 -0
- aiecs/tools/search_tool/core.py +749 -0
- aiecs/tools/search_tool/deduplicator.py +123 -0
- aiecs/tools/search_tool/error_handler.py +271 -0
- aiecs/tools/search_tool/metrics.py +371 -0
- aiecs/tools/search_tool/rate_limiter.py +178 -0
- aiecs/tools/search_tool/schemas.py +277 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
- aiecs/tools/statistics/data_loader_tool.py +564 -0
- aiecs/tools/statistics/data_profiler_tool.py +658 -0
- aiecs/tools/statistics/data_transformer_tool.py +573 -0
- aiecs/tools/statistics/data_visualizer_tool.py +495 -0
- aiecs/tools/statistics/model_trainer_tool.py +487 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
- aiecs/tools/task_tools/__init__.py +86 -0
- aiecs/tools/task_tools/chart_tool.py +732 -0
- aiecs/tools/task_tools/classfire_tool.py +922 -0
- aiecs/tools/task_tools/image_tool.py +447 -0
- aiecs/tools/task_tools/office_tool.py +684 -0
- aiecs/tools/task_tools/pandas_tool.py +635 -0
- aiecs/tools/task_tools/report_tool.py +635 -0
- aiecs/tools/task_tools/research_tool.py +392 -0
- aiecs/tools/task_tools/scraper_tool.py +715 -0
- aiecs/tools/task_tools/stats_tool.py +688 -0
- aiecs/tools/temp_file_manager.py +130 -0
- aiecs/tools/tool_executor/__init__.py +37 -0
- aiecs/tools/tool_executor/tool_executor.py +881 -0
- aiecs/utils/LLM_output_structor.py +445 -0
- aiecs/utils/__init__.py +34 -0
- aiecs/utils/base_callback.py +47 -0
- aiecs/utils/cache_provider.py +695 -0
- aiecs/utils/execution_utils.py +184 -0
- aiecs/utils/logging.py +1 -0
- aiecs/utils/prompt_loader.py +14 -0
- aiecs/utils/token_usage_repository.py +323 -0
- aiecs/ws/__init__.py +0 -0
- aiecs/ws/socket_server.py +52 -0
- aiecs-1.5.1.dist-info/METADATA +608 -0
- aiecs-1.5.1.dist-info/RECORD +302 -0
- aiecs-1.5.1.dist-info/WHEEL +5 -0
- aiecs-1.5.1.dist-info/entry_points.txt +10 -0
- aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
- aiecs-1.5.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,443 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Structured Data Pipeline
|
|
3
|
+
|
|
4
|
+
Import structured data (CSV, JSON) into knowledge graphs using schema mappings.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import List, Optional, Dict, Any, Callable, Union
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
from datetime import datetime
|
|
13
|
+
|
|
14
|
+
try:
|
|
15
|
+
import pandas as pd
|
|
16
|
+
|
|
17
|
+
PANDAS_AVAILABLE = True
|
|
18
|
+
except ImportError:
|
|
19
|
+
PANDAS_AVAILABLE = False
|
|
20
|
+
|
|
21
|
+
from aiecs.infrastructure.graph_storage.base import GraphStore
|
|
22
|
+
from aiecs.domain.knowledge_graph.models.entity import Entity
|
|
23
|
+
from aiecs.domain.knowledge_graph.models.relation import Relation
|
|
24
|
+
from aiecs.application.knowledge_graph.builder.schema_mapping import (
|
|
25
|
+
SchemaMapping,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class ImportResult:
|
|
34
|
+
"""
|
|
35
|
+
Result of structured data import operation
|
|
36
|
+
|
|
37
|
+
Attributes:
|
|
38
|
+
success: Whether import completed successfully
|
|
39
|
+
entities_added: Number of entities added to graph
|
|
40
|
+
relations_added: Number of relations added to graph
|
|
41
|
+
rows_processed: Number of rows processed
|
|
42
|
+
rows_failed: Number of rows that failed to process
|
|
43
|
+
errors: List of errors encountered
|
|
44
|
+
warnings: List of warnings
|
|
45
|
+
start_time: When import started
|
|
46
|
+
end_time: When import ended
|
|
47
|
+
duration_seconds: Total duration in seconds
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
success: bool = True
|
|
51
|
+
entities_added: int = 0
|
|
52
|
+
relations_added: int = 0
|
|
53
|
+
rows_processed: int = 0
|
|
54
|
+
rows_failed: int = 0
|
|
55
|
+
errors: List[str] = field(default_factory=list)
|
|
56
|
+
warnings: List[str] = field(default_factory=list)
|
|
57
|
+
start_time: Optional[datetime] = None
|
|
58
|
+
end_time: Optional[datetime] = None
|
|
59
|
+
duration_seconds: float = 0.0
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class StructuredDataPipeline:
|
|
63
|
+
"""
|
|
64
|
+
Pipeline for importing structured data (CSV, JSON) into knowledge graphs
|
|
65
|
+
|
|
66
|
+
Uses SchemaMapping to map source data columns to entity and relation types.
|
|
67
|
+
Supports batch processing, progress tracking, and error handling.
|
|
68
|
+
|
|
69
|
+
Example:
|
|
70
|
+
```python
|
|
71
|
+
# Define schema mapping
|
|
72
|
+
mapping = SchemaMapping(
|
|
73
|
+
entity_mappings=[
|
|
74
|
+
EntityMapping(
|
|
75
|
+
source_columns=["id", "name", "age"],
|
|
76
|
+
entity_type="Person",
|
|
77
|
+
property_mapping={"id": "id", "name": "name", "age": "age"}
|
|
78
|
+
)
|
|
79
|
+
],
|
|
80
|
+
relation_mappings=[
|
|
81
|
+
RelationMapping(
|
|
82
|
+
source_columns=["person_id", "company_id"],
|
|
83
|
+
relation_type="WORKS_FOR",
|
|
84
|
+
source_entity_column="person_id",
|
|
85
|
+
target_entity_column="company_id"
|
|
86
|
+
)
|
|
87
|
+
]
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# Create pipeline
|
|
91
|
+
pipeline = StructuredDataPipeline(
|
|
92
|
+
mapping=mapping,
|
|
93
|
+
graph_store=store
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# Import CSV
|
|
97
|
+
result = await pipeline.import_from_csv("employees.csv")
|
|
98
|
+
print(f"Added {result.entities_added} entities, {result.relations_added} relations")
|
|
99
|
+
```
|
|
100
|
+
"""
|
|
101
|
+
|
|
102
|
+
def __init__(
|
|
103
|
+
self,
|
|
104
|
+
mapping: SchemaMapping,
|
|
105
|
+
graph_store: GraphStore,
|
|
106
|
+
batch_size: int = 100,
|
|
107
|
+
progress_callback: Optional[Callable[[str, float], None]] = None,
|
|
108
|
+
skip_errors: bool = True,
|
|
109
|
+
):
|
|
110
|
+
"""
|
|
111
|
+
Initialize structured data pipeline
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
mapping: Schema mapping configuration
|
|
115
|
+
graph_store: Graph storage to save entities/relations
|
|
116
|
+
batch_size: Number of rows to process in each batch
|
|
117
|
+
progress_callback: Optional callback for progress updates (message, progress_pct)
|
|
118
|
+
skip_errors: Whether to skip rows with errors and continue processing
|
|
119
|
+
"""
|
|
120
|
+
# Validate mapping
|
|
121
|
+
validation_errors = mapping.validate()
|
|
122
|
+
if validation_errors:
|
|
123
|
+
raise ValueError(f"Invalid schema mapping: {validation_errors}")
|
|
124
|
+
|
|
125
|
+
self.mapping = mapping
|
|
126
|
+
self.graph_store = graph_store
|
|
127
|
+
self.batch_size = batch_size
|
|
128
|
+
self.progress_callback = progress_callback
|
|
129
|
+
self.skip_errors = skip_errors
|
|
130
|
+
|
|
131
|
+
if not PANDAS_AVAILABLE:
|
|
132
|
+
logger.warning(
|
|
133
|
+
"pandas not available. CSV import will use basic CSV reader. "
|
|
134
|
+
"Install pandas for better performance: pip install pandas"
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
async def import_from_csv(
|
|
138
|
+
self,
|
|
139
|
+
file_path: Union[str, Path],
|
|
140
|
+
encoding: str = "utf-8",
|
|
141
|
+
delimiter: str = ",",
|
|
142
|
+
header: bool = True,
|
|
143
|
+
) -> ImportResult:
|
|
144
|
+
"""
|
|
145
|
+
Import data from CSV file
|
|
146
|
+
|
|
147
|
+
Args:
|
|
148
|
+
file_path: Path to CSV file
|
|
149
|
+
encoding: File encoding (default: utf-8)
|
|
150
|
+
delimiter: CSV delimiter (default: comma)
|
|
151
|
+
header: Whether file has header row (default: True)
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
ImportResult with statistics
|
|
155
|
+
"""
|
|
156
|
+
result = ImportResult(start_time=datetime.now())
|
|
157
|
+
|
|
158
|
+
try:
|
|
159
|
+
# Read CSV file
|
|
160
|
+
if PANDAS_AVAILABLE:
|
|
161
|
+
df = pd.read_csv(
|
|
162
|
+
file_path,
|
|
163
|
+
encoding=encoding,
|
|
164
|
+
sep=delimiter,
|
|
165
|
+
header=0 if header else None,
|
|
166
|
+
)
|
|
167
|
+
rows = df.to_dict("records")
|
|
168
|
+
else:
|
|
169
|
+
# Fallback to basic CSV reader
|
|
170
|
+
import csv
|
|
171
|
+
|
|
172
|
+
rows = []
|
|
173
|
+
with open(file_path, "r", encoding=encoding) as f:
|
|
174
|
+
reader = csv.DictReader(f) if header else csv.reader(f)
|
|
175
|
+
if header:
|
|
176
|
+
for row in reader:
|
|
177
|
+
rows.append(row)
|
|
178
|
+
else:
|
|
179
|
+
# No header - use column indices
|
|
180
|
+
for row in reader:
|
|
181
|
+
rows.append({str(i): val for i, val in enumerate(row)})
|
|
182
|
+
|
|
183
|
+
# Process rows
|
|
184
|
+
result = await self._process_rows(rows, result)
|
|
185
|
+
|
|
186
|
+
except Exception as e:
|
|
187
|
+
error_msg = f"Failed to import CSV file {file_path}: {e}"
|
|
188
|
+
logger.error(error_msg, exc_info=True)
|
|
189
|
+
result.success = False
|
|
190
|
+
result.errors.append(error_msg)
|
|
191
|
+
|
|
192
|
+
finally:
|
|
193
|
+
result.end_time = datetime.now()
|
|
194
|
+
if result.start_time:
|
|
195
|
+
result.duration_seconds = (result.end_time - result.start_time).total_seconds()
|
|
196
|
+
|
|
197
|
+
return result
|
|
198
|
+
|
|
199
|
+
async def import_from_json(
|
|
200
|
+
self,
|
|
201
|
+
file_path: Union[str, Path],
|
|
202
|
+
encoding: str = "utf-8",
|
|
203
|
+
array_key: Optional[str] = None,
|
|
204
|
+
) -> ImportResult:
|
|
205
|
+
"""
|
|
206
|
+
Import data from JSON file
|
|
207
|
+
|
|
208
|
+
Supports:
|
|
209
|
+
- Array of objects: [{"id": 1, "name": "Alice"}, ...]
|
|
210
|
+
- Object with array: {"items": [{"id": 1, ...}, ...]}
|
|
211
|
+
- Single object: {"id": 1, "name": "Alice"}
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
file_path: Path to JSON file
|
|
215
|
+
encoding: File encoding (default: utf-8)
|
|
216
|
+
array_key: If JSON is object with array, key containing the array
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
ImportResult with statistics
|
|
220
|
+
"""
|
|
221
|
+
result = ImportResult(start_time=datetime.now())
|
|
222
|
+
|
|
223
|
+
try:
|
|
224
|
+
# Read JSON file
|
|
225
|
+
with open(file_path, "r", encoding=encoding) as f:
|
|
226
|
+
data = json.load(f)
|
|
227
|
+
|
|
228
|
+
# Extract rows
|
|
229
|
+
if isinstance(data, list):
|
|
230
|
+
rows = data
|
|
231
|
+
elif isinstance(data, dict):
|
|
232
|
+
if array_key:
|
|
233
|
+
rows = data.get(array_key, [])
|
|
234
|
+
if not isinstance(rows, list):
|
|
235
|
+
raise ValueError(f"Key '{array_key}' does not contain an array")
|
|
236
|
+
else:
|
|
237
|
+
# Single object - wrap in list
|
|
238
|
+
rows = [data]
|
|
239
|
+
else:
|
|
240
|
+
raise ValueError(f"JSON file must contain array or object, got {type(data)}")
|
|
241
|
+
|
|
242
|
+
# Process rows
|
|
243
|
+
result = await self._process_rows(rows, result)
|
|
244
|
+
|
|
245
|
+
except Exception as e:
|
|
246
|
+
error_msg = f"Failed to import JSON file {file_path}: {e}"
|
|
247
|
+
logger.error(error_msg, exc_info=True)
|
|
248
|
+
result.success = False
|
|
249
|
+
result.errors.append(error_msg)
|
|
250
|
+
|
|
251
|
+
finally:
|
|
252
|
+
result.end_time = datetime.now()
|
|
253
|
+
if result.start_time:
|
|
254
|
+
result.duration_seconds = (result.end_time - result.start_time).total_seconds()
|
|
255
|
+
|
|
256
|
+
return result
|
|
257
|
+
|
|
258
|
+
async def _process_rows(self, rows: List[Dict[str, Any]], result: ImportResult) -> ImportResult:
|
|
259
|
+
"""
|
|
260
|
+
Process rows and convert to entities/relations
|
|
261
|
+
|
|
262
|
+
Args:
|
|
263
|
+
rows: List of row dictionaries
|
|
264
|
+
result: ImportResult to update
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
Updated ImportResult
|
|
268
|
+
"""
|
|
269
|
+
total_rows = len(rows)
|
|
270
|
+
|
|
271
|
+
if total_rows == 0:
|
|
272
|
+
result.warnings.append("No rows to process")
|
|
273
|
+
return result
|
|
274
|
+
|
|
275
|
+
# Process in batches
|
|
276
|
+
for batch_start in range(0, total_rows, self.batch_size):
|
|
277
|
+
batch_end = min(batch_start + self.batch_size, total_rows)
|
|
278
|
+
batch_rows = rows[batch_start:batch_end]
|
|
279
|
+
|
|
280
|
+
# Update progress
|
|
281
|
+
if self.progress_callback:
|
|
282
|
+
progress_pct = (batch_end / total_rows) * 100
|
|
283
|
+
self.progress_callback(
|
|
284
|
+
f"Processing rows {batch_start+1}-{batch_end} of {total_rows}",
|
|
285
|
+
progress_pct,
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
# Process batch
|
|
289
|
+
batch_result = await self._process_batch(batch_rows)
|
|
290
|
+
|
|
291
|
+
# Update result
|
|
292
|
+
result.entities_added += batch_result.entities_added
|
|
293
|
+
result.relations_added += batch_result.relations_added
|
|
294
|
+
result.rows_processed += batch_result.rows_processed
|
|
295
|
+
result.rows_failed += batch_result.rows_failed
|
|
296
|
+
result.errors.extend(batch_result.errors)
|
|
297
|
+
result.warnings.extend(batch_result.warnings)
|
|
298
|
+
|
|
299
|
+
return result
|
|
300
|
+
|
|
301
|
+
async def _process_batch(self, rows: List[Dict[str, Any]]) -> ImportResult:
|
|
302
|
+
"""
|
|
303
|
+
Process a batch of rows
|
|
304
|
+
|
|
305
|
+
Args:
|
|
306
|
+
rows: List of row dictionaries
|
|
307
|
+
|
|
308
|
+
Returns:
|
|
309
|
+
ImportResult for this batch
|
|
310
|
+
"""
|
|
311
|
+
batch_result = ImportResult()
|
|
312
|
+
batch_result.rows_processed = len(rows)
|
|
313
|
+
|
|
314
|
+
# Collect entities and relations
|
|
315
|
+
entities_to_add: List[Entity] = []
|
|
316
|
+
relations_to_add: List[Relation] = []
|
|
317
|
+
|
|
318
|
+
for i, row in enumerate(rows):
|
|
319
|
+
try:
|
|
320
|
+
# Convert row to entities
|
|
321
|
+
row_entities = await self._row_to_entities(row)
|
|
322
|
+
entities_to_add.extend(row_entities)
|
|
323
|
+
|
|
324
|
+
# Convert row to relations
|
|
325
|
+
row_relations = await self._row_to_relations(row)
|
|
326
|
+
relations_to_add.extend(row_relations)
|
|
327
|
+
|
|
328
|
+
except Exception as e:
|
|
329
|
+
error_msg = f"Failed to process row {i+1}: {e}"
|
|
330
|
+
logger.warning(error_msg, exc_info=True)
|
|
331
|
+
batch_result.rows_failed += 1
|
|
332
|
+
|
|
333
|
+
if self.skip_errors:
|
|
334
|
+
batch_result.warnings.append(error_msg)
|
|
335
|
+
else:
|
|
336
|
+
batch_result.errors.append(error_msg)
|
|
337
|
+
raise
|
|
338
|
+
|
|
339
|
+
# Add entities to graph store
|
|
340
|
+
for entity in entities_to_add:
|
|
341
|
+
try:
|
|
342
|
+
await self.graph_store.add_entity(entity)
|
|
343
|
+
batch_result.entities_added += 1
|
|
344
|
+
except Exception as e:
|
|
345
|
+
error_msg = f"Failed to add entity {entity.id}: {e}"
|
|
346
|
+
logger.warning(error_msg)
|
|
347
|
+
batch_result.warnings.append(error_msg)
|
|
348
|
+
if not self.skip_errors:
|
|
349
|
+
raise
|
|
350
|
+
|
|
351
|
+
# Add relations to graph store
|
|
352
|
+
for relation in relations_to_add:
|
|
353
|
+
try:
|
|
354
|
+
await self.graph_store.add_relation(relation)
|
|
355
|
+
batch_result.relations_added += 1
|
|
356
|
+
except Exception as e:
|
|
357
|
+
error_msg = f"Failed to add relation {relation.id}: {e}"
|
|
358
|
+
logger.warning(error_msg)
|
|
359
|
+
batch_result.warnings.append(error_msg)
|
|
360
|
+
if not self.skip_errors:
|
|
361
|
+
raise
|
|
362
|
+
|
|
363
|
+
return batch_result
|
|
364
|
+
|
|
365
|
+
async def _row_to_entities(self, row: Dict[str, Any]) -> List[Entity]:
|
|
366
|
+
"""
|
|
367
|
+
Convert a row to entities based on entity mappings
|
|
368
|
+
|
|
369
|
+
Args:
|
|
370
|
+
row: Dictionary of column name -> value
|
|
371
|
+
|
|
372
|
+
Returns:
|
|
373
|
+
List of Entity objects
|
|
374
|
+
"""
|
|
375
|
+
entities = []
|
|
376
|
+
|
|
377
|
+
for entity_mapping in self.mapping.entity_mappings:
|
|
378
|
+
try:
|
|
379
|
+
# Map row to entity using mapping
|
|
380
|
+
entity_data = entity_mapping.map_row_to_entity(row)
|
|
381
|
+
|
|
382
|
+
# Create Entity object
|
|
383
|
+
entity = Entity(
|
|
384
|
+
id=entity_data["id"],
|
|
385
|
+
entity_type=entity_data["type"],
|
|
386
|
+
properties=entity_data["properties"],
|
|
387
|
+
metadata={
|
|
388
|
+
"source": "structured_data_import",
|
|
389
|
+
"imported_at": datetime.now().isoformat(),
|
|
390
|
+
},
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
entities.append(entity)
|
|
394
|
+
|
|
395
|
+
except Exception as e:
|
|
396
|
+
error_msg = f"Failed to map row to entity type '{entity_mapping.entity_type}': {e}"
|
|
397
|
+
logger.warning(error_msg)
|
|
398
|
+
if not self.skip_errors:
|
|
399
|
+
raise ValueError(error_msg)
|
|
400
|
+
|
|
401
|
+
return entities
|
|
402
|
+
|
|
403
|
+
async def _row_to_relations(self, row: Dict[str, Any]) -> List[Relation]:
|
|
404
|
+
"""
|
|
405
|
+
Convert a row to relations based on relation mappings
|
|
406
|
+
|
|
407
|
+
Args:
|
|
408
|
+
row: Dictionary of column name -> value
|
|
409
|
+
|
|
410
|
+
Returns:
|
|
411
|
+
List of Relation objects
|
|
412
|
+
"""
|
|
413
|
+
relations = []
|
|
414
|
+
|
|
415
|
+
for relation_mapping in self.mapping.relation_mappings:
|
|
416
|
+
try:
|
|
417
|
+
# Map row to relation using mapping
|
|
418
|
+
relation_data = relation_mapping.map_row_to_relation(row)
|
|
419
|
+
|
|
420
|
+
# Create Relation object
|
|
421
|
+
relation = Relation(
|
|
422
|
+
id=f"{relation_data['source_id']}_{relation_data['type']}_{relation_data['target_id']}",
|
|
423
|
+
relation_type=relation_data["type"],
|
|
424
|
+
source_id=relation_data["source_id"],
|
|
425
|
+
target_id=relation_data["target_id"],
|
|
426
|
+
properties=relation_data["properties"],
|
|
427
|
+
metadata={
|
|
428
|
+
"source": "structured_data_import",
|
|
429
|
+
"imported_at": datetime.now().isoformat(),
|
|
430
|
+
},
|
|
431
|
+
)
|
|
432
|
+
|
|
433
|
+
relations.append(relation)
|
|
434
|
+
|
|
435
|
+
except Exception as e:
|
|
436
|
+
error_msg = (
|
|
437
|
+
f"Failed to map row to relation type '{relation_mapping.relation_type}': {e}"
|
|
438
|
+
)
|
|
439
|
+
logger.warning(error_msg)
|
|
440
|
+
if not self.skip_errors:
|
|
441
|
+
raise ValueError(error_msg)
|
|
442
|
+
|
|
443
|
+
return relations
|