aiecs 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiecs/__init__.py +72 -0
- aiecs/__main__.py +41 -0
- aiecs/aiecs_client.py +469 -0
- aiecs/application/__init__.py +10 -0
- aiecs/application/executors/__init__.py +10 -0
- aiecs/application/executors/operation_executor.py +363 -0
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +100 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
- aiecs/application/knowledge_graph/search/reranker.py +295 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +484 -0
- aiecs/config/__init__.py +16 -0
- aiecs/config/config.py +498 -0
- aiecs/config/graph_config.py +137 -0
- aiecs/config/registry.py +23 -0
- aiecs/core/__init__.py +46 -0
- aiecs/core/interface/__init__.py +34 -0
- aiecs/core/interface/execution_interface.py +152 -0
- aiecs/core/interface/storage_interface.py +171 -0
- aiecs/domain/__init__.py +289 -0
- aiecs/domain/agent/__init__.py +189 -0
- aiecs/domain/agent/base_agent.py +697 -0
- aiecs/domain/agent/exceptions.py +103 -0
- aiecs/domain/agent/graph_aware_mixin.py +559 -0
- aiecs/domain/agent/hybrid_agent.py +490 -0
- aiecs/domain/agent/integration/__init__.py +26 -0
- aiecs/domain/agent/integration/context_compressor.py +222 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
- aiecs/domain/agent/integration/retry_policy.py +219 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +646 -0
- aiecs/domain/agent/lifecycle.py +296 -0
- aiecs/domain/agent/llm_agent.py +300 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +197 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +160 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
- aiecs/domain/agent/models.py +317 -0
- aiecs/domain/agent/observability.py +407 -0
- aiecs/domain/agent/persistence.py +289 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +161 -0
- aiecs/domain/agent/prompts/formatters.py +189 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +260 -0
- aiecs/domain/agent/tool_agent.py +257 -0
- aiecs/domain/agent/tools/__init__.py +12 -0
- aiecs/domain/agent/tools/schema_generator.py +221 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +477 -0
- aiecs/domain/community/analytics.py +481 -0
- aiecs/domain/community/collaborative_workflow.py +642 -0
- aiecs/domain/community/communication_hub.py +645 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +800 -0
- aiecs/domain/community/community_manager.py +813 -0
- aiecs/domain/community/decision_engine.py +879 -0
- aiecs/domain/community/exceptions.py +225 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +268 -0
- aiecs/domain/community/resource_manager.py +457 -0
- aiecs/domain/community/shared_context_manager.py +603 -0
- aiecs/domain/context/__init__.py +58 -0
- aiecs/domain/context/context_engine.py +989 -0
- aiecs/domain/context/conversation_models.py +354 -0
- aiecs/domain/context/graph_memory.py +467 -0
- aiecs/domain/execution/__init__.py +12 -0
- aiecs/domain/execution/model.py +57 -0
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +130 -0
- aiecs/domain/knowledge_graph/models/evidence.py +194 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
- aiecs/domain/knowledge_graph/models/path.py +179 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
- aiecs/domain/knowledge_graph/models/query.py +272 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
- aiecs/domain/knowledge_graph/models/relation.py +136 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
- aiecs/domain/task/__init__.py +13 -0
- aiecs/domain/task/dsl_processor.py +613 -0
- aiecs/domain/task/model.py +62 -0
- aiecs/domain/task/task_context.py +268 -0
- aiecs/infrastructure/__init__.py +24 -0
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +601 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
- aiecs/infrastructure/graph_storage/cache.py +429 -0
- aiecs/infrastructure/graph_storage/distributed.py +226 -0
- aiecs/infrastructure/graph_storage/error_handling.py +390 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +514 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
- aiecs/infrastructure/graph_storage/metrics.py +357 -0
- aiecs/infrastructure/graph_storage/migration.py +413 -0
- aiecs/infrastructure/graph_storage/pagination.py +471 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
- aiecs/infrastructure/graph_storage/postgres.py +871 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +623 -0
- aiecs/infrastructure/graph_storage/streaming.py +495 -0
- aiecs/infrastructure/messaging/__init__.py +13 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
- aiecs/infrastructure/messaging/websocket_manager.py +298 -0
- aiecs/infrastructure/monitoring/__init__.py +34 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
- aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
- aiecs/infrastructure/monitoring/structured_logger.py +48 -0
- aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
- aiecs/infrastructure/persistence/__init__.py +24 -0
- aiecs/infrastructure/persistence/context_engine_client.py +187 -0
- aiecs/infrastructure/persistence/database_manager.py +333 -0
- aiecs/infrastructure/persistence/file_storage.py +754 -0
- aiecs/infrastructure/persistence/redis_client.py +220 -0
- aiecs/llm/__init__.py +86 -0
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/callbacks/custom_callbacks.py +264 -0
- aiecs/llm/client_factory.py +420 -0
- aiecs/llm/clients/__init__.py +33 -0
- aiecs/llm/clients/base_client.py +193 -0
- aiecs/llm/clients/googleai_client.py +181 -0
- aiecs/llm/clients/openai_client.py +131 -0
- aiecs/llm/clients/vertex_client.py +437 -0
- aiecs/llm/clients/xai_client.py +184 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +275 -0
- aiecs/llm/config/config_validator.py +236 -0
- aiecs/llm/config/model_config.py +151 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +91 -0
- aiecs/main.py +363 -0
- aiecs/scripts/__init__.py +3 -0
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/version_manager.py +215 -0
- aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
- aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
- aiecs/scripts/dependance_check/__init__.py +17 -0
- aiecs/scripts/dependance_check/dependency_checker.py +938 -0
- aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
- aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
- aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
- aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
- aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
- aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
- aiecs/scripts/tools_develop/README.md +449 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
- aiecs/scripts/tools_develop/verify_tools.py +356 -0
- aiecs/tasks/__init__.py +1 -0
- aiecs/tasks/worker.py +172 -0
- aiecs/tools/__init__.py +299 -0
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +303 -0
- aiecs/tools/apisource/providers/__init__.py +115 -0
- aiecs/tools/apisource/providers/base.py +664 -0
- aiecs/tools/apisource/providers/census.py +401 -0
- aiecs/tools/apisource/providers/fred.py +564 -0
- aiecs/tools/apisource/providers/newsapi.py +412 -0
- aiecs/tools/apisource/providers/worldbank.py +357 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +375 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
- aiecs/tools/apisource/tool.py +850 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +338 -0
- aiecs/tools/base_tool.py +201 -0
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +599 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
- aiecs/tools/docs/content_insertion_tool.py +1333 -0
- aiecs/tools/docs/document_creator_tool.py +1317 -0
- aiecs/tools/docs/document_layout_tool.py +1166 -0
- aiecs/tools/docs/document_parser_tool.py +994 -0
- aiecs/tools/docs/document_writer_tool.py +1818 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
- aiecs/tools/langchain_adapter.py +542 -0
- aiecs/tools/schema_generator.py +275 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +589 -0
- aiecs/tools/search_tool/cache.py +260 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +216 -0
- aiecs/tools/search_tool/core.py +749 -0
- aiecs/tools/search_tool/deduplicator.py +123 -0
- aiecs/tools/search_tool/error_handler.py +271 -0
- aiecs/tools/search_tool/metrics.py +371 -0
- aiecs/tools/search_tool/rate_limiter.py +178 -0
- aiecs/tools/search_tool/schemas.py +277 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
- aiecs/tools/statistics/data_loader_tool.py +564 -0
- aiecs/tools/statistics/data_profiler_tool.py +658 -0
- aiecs/tools/statistics/data_transformer_tool.py +573 -0
- aiecs/tools/statistics/data_visualizer_tool.py +495 -0
- aiecs/tools/statistics/model_trainer_tool.py +487 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
- aiecs/tools/task_tools/__init__.py +86 -0
- aiecs/tools/task_tools/chart_tool.py +732 -0
- aiecs/tools/task_tools/classfire_tool.py +922 -0
- aiecs/tools/task_tools/image_tool.py +447 -0
- aiecs/tools/task_tools/office_tool.py +684 -0
- aiecs/tools/task_tools/pandas_tool.py +635 -0
- aiecs/tools/task_tools/report_tool.py +635 -0
- aiecs/tools/task_tools/research_tool.py +392 -0
- aiecs/tools/task_tools/scraper_tool.py +715 -0
- aiecs/tools/task_tools/stats_tool.py +688 -0
- aiecs/tools/temp_file_manager.py +130 -0
- aiecs/tools/tool_executor/__init__.py +37 -0
- aiecs/tools/tool_executor/tool_executor.py +881 -0
- aiecs/utils/LLM_output_structor.py +445 -0
- aiecs/utils/__init__.py +34 -0
- aiecs/utils/base_callback.py +47 -0
- aiecs/utils/cache_provider.py +695 -0
- aiecs/utils/execution_utils.py +184 -0
- aiecs/utils/logging.py +1 -0
- aiecs/utils/prompt_loader.py +14 -0
- aiecs/utils/token_usage_repository.py +323 -0
- aiecs/ws/__init__.py +0 -0
- aiecs/ws/socket_server.py +52 -0
- aiecs-1.5.1.dist-info/METADATA +608 -0
- aiecs-1.5.1.dist-info/RECORD +302 -0
- aiecs-1.5.1.dist-info/WHEEL +5 -0
- aiecs-1.5.1.dist-info/entry_points.txt +10 -0
- aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
- aiecs-1.5.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,495 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Streaming Support for Graph Storage
|
|
3
|
+
|
|
4
|
+
Provides streaming export and import capabilities for large graphs,
|
|
5
|
+
enabling efficient handling of millions of entities and relations.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
import logging
|
|
10
|
+
from typing import AsyncIterator, Optional, Dict, Any
|
|
11
|
+
from enum import Enum
|
|
12
|
+
import gzip
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from datetime import datetime
|
|
15
|
+
|
|
16
|
+
from aiecs.domain.knowledge_graph.models.entity import Entity
|
|
17
|
+
from aiecs.domain.knowledge_graph.models.relation import Relation
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class DateTimeEncoder(json.JSONEncoder):
|
|
23
|
+
"""Custom JSON encoder for datetime objects"""
|
|
24
|
+
|
|
25
|
+
def default(self, obj):
|
|
26
|
+
if isinstance(obj, datetime):
|
|
27
|
+
return obj.isoformat()
|
|
28
|
+
return super().default(obj)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class StreamFormat(str, Enum):
|
|
32
|
+
"""Streaming export format"""
|
|
33
|
+
|
|
34
|
+
JSONL = "jsonl" # JSON Lines (one JSON object per line)
|
|
35
|
+
JSON = "json" # Standard JSON array
|
|
36
|
+
CSV = "csv" # CSV format
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class GraphStreamExporter:
|
|
40
|
+
"""
|
|
41
|
+
Stream large graphs to files
|
|
42
|
+
|
|
43
|
+
Exports entities and relations in chunks to avoid loading
|
|
44
|
+
everything into memory at once.
|
|
45
|
+
|
|
46
|
+
Example:
|
|
47
|
+
```python
|
|
48
|
+
exporter = GraphStreamExporter(store)
|
|
49
|
+
|
|
50
|
+
# Export to JSONL (recommended for large graphs)
|
|
51
|
+
await exporter.export_to_file(
|
|
52
|
+
"graph_export.jsonl",
|
|
53
|
+
format=StreamFormat.JSONL,
|
|
54
|
+
compress=True
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# Export with filtering
|
|
58
|
+
await exporter.export_entities(
|
|
59
|
+
"persons.jsonl",
|
|
60
|
+
entity_type="Person",
|
|
61
|
+
batch_size=1000
|
|
62
|
+
)
|
|
63
|
+
```
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
def __init__(self, store: Any):
|
|
67
|
+
"""
|
|
68
|
+
Initialize exporter
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
store: Graph store instance
|
|
72
|
+
"""
|
|
73
|
+
self.store = store
|
|
74
|
+
|
|
75
|
+
async def export_to_file(
|
|
76
|
+
self,
|
|
77
|
+
filepath: str,
|
|
78
|
+
format: StreamFormat = StreamFormat.JSONL,
|
|
79
|
+
compress: bool = False,
|
|
80
|
+
batch_size: int = 1000,
|
|
81
|
+
entity_type: Optional[str] = None,
|
|
82
|
+
include_relations: bool = True,
|
|
83
|
+
) -> Dict[str, int]:
|
|
84
|
+
"""
|
|
85
|
+
Export entire graph to file
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
filepath: Output file path
|
|
89
|
+
format: Export format
|
|
90
|
+
compress: Enable gzip compression
|
|
91
|
+
batch_size: Streaming batch size
|
|
92
|
+
entity_type: Filter entities by type
|
|
93
|
+
include_relations: Include relations in export
|
|
94
|
+
|
|
95
|
+
Returns:
|
|
96
|
+
Dictionary with export statistics
|
|
97
|
+
|
|
98
|
+
Example:
|
|
99
|
+
```python
|
|
100
|
+
stats = await exporter.export_to_file(
|
|
101
|
+
"graph.jsonl.gz",
|
|
102
|
+
format=StreamFormat.JSONL,
|
|
103
|
+
compress=True,
|
|
104
|
+
batch_size=5000
|
|
105
|
+
)
|
|
106
|
+
print(f"Exported {stats['entity_count']} entities")
|
|
107
|
+
```
|
|
108
|
+
"""
|
|
109
|
+
Path(filepath)
|
|
110
|
+
|
|
111
|
+
# Add .gz extension if compressing
|
|
112
|
+
if compress and not filepath.endswith(".gz"):
|
|
113
|
+
filepath = f"{filepath}.gz"
|
|
114
|
+
|
|
115
|
+
entity_count = 0
|
|
116
|
+
relation_count = 0
|
|
117
|
+
|
|
118
|
+
# Open file (with compression if requested)
|
|
119
|
+
if compress:
|
|
120
|
+
import gzip
|
|
121
|
+
|
|
122
|
+
file = gzip.open(filepath, "wt", encoding="utf-8")
|
|
123
|
+
else:
|
|
124
|
+
file = open(filepath, "w", encoding="utf-8")
|
|
125
|
+
|
|
126
|
+
try:
|
|
127
|
+
# Write header for JSON format
|
|
128
|
+
if format == StreamFormat.JSON:
|
|
129
|
+
file.write('{"entities": [')
|
|
130
|
+
|
|
131
|
+
# Stream entities
|
|
132
|
+
first = True
|
|
133
|
+
async for entity in self.stream_entities(
|
|
134
|
+
entity_type=entity_type, batch_size=batch_size
|
|
135
|
+
):
|
|
136
|
+
if format == StreamFormat.JSONL:
|
|
137
|
+
json.dump(
|
|
138
|
+
{"type": "entity", "data": entity.model_dump()},
|
|
139
|
+
file,
|
|
140
|
+
cls=DateTimeEncoder,
|
|
141
|
+
)
|
|
142
|
+
file.write("\n")
|
|
143
|
+
elif format == StreamFormat.JSON:
|
|
144
|
+
if not first:
|
|
145
|
+
file.write(",")
|
|
146
|
+
json.dump(entity.model_dump(), file, cls=DateTimeEncoder)
|
|
147
|
+
first = False
|
|
148
|
+
|
|
149
|
+
entity_count += 1
|
|
150
|
+
|
|
151
|
+
# Log progress
|
|
152
|
+
if entity_count % 10000 == 0:
|
|
153
|
+
logger.info(f"Exported {entity_count} entities...")
|
|
154
|
+
|
|
155
|
+
# Stream relations if requested
|
|
156
|
+
if include_relations:
|
|
157
|
+
if format == StreamFormat.JSON:
|
|
158
|
+
file.write('], "relations": [')
|
|
159
|
+
first = True
|
|
160
|
+
|
|
161
|
+
async for relation in self.stream_relations(batch_size=batch_size):
|
|
162
|
+
if format == StreamFormat.JSONL:
|
|
163
|
+
json.dump(
|
|
164
|
+
{
|
|
165
|
+
"type": "relation",
|
|
166
|
+
"data": relation.model_dump(),
|
|
167
|
+
},
|
|
168
|
+
file,
|
|
169
|
+
cls=DateTimeEncoder,
|
|
170
|
+
)
|
|
171
|
+
file.write("\n")
|
|
172
|
+
elif format == StreamFormat.JSON:
|
|
173
|
+
if not first:
|
|
174
|
+
file.write(",")
|
|
175
|
+
json.dump(relation.model_dump(), file, cls=DateTimeEncoder)
|
|
176
|
+
first = False
|
|
177
|
+
|
|
178
|
+
relation_count += 1
|
|
179
|
+
|
|
180
|
+
if relation_count % 10000 == 0:
|
|
181
|
+
logger.info(f"Exported {relation_count} relations...")
|
|
182
|
+
|
|
183
|
+
# Write footer for JSON format
|
|
184
|
+
if format == StreamFormat.JSON:
|
|
185
|
+
file.write("]}")
|
|
186
|
+
|
|
187
|
+
finally:
|
|
188
|
+
file.close()
|
|
189
|
+
|
|
190
|
+
logger.info(f"Export complete: {entity_count} entities, {relation_count} relations")
|
|
191
|
+
|
|
192
|
+
return {
|
|
193
|
+
"entity_count": entity_count,
|
|
194
|
+
"relation_count": relation_count,
|
|
195
|
+
"filepath": filepath,
|
|
196
|
+
"compressed": compress,
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
async def stream_entities(
|
|
200
|
+
self, entity_type: Optional[str] = None, batch_size: int = 1000
|
|
201
|
+
) -> AsyncIterator[Entity]:
|
|
202
|
+
"""
|
|
203
|
+
Stream entities in batches
|
|
204
|
+
|
|
205
|
+
Args:
|
|
206
|
+
entity_type: Filter by entity type
|
|
207
|
+
batch_size: Batch size for streaming
|
|
208
|
+
|
|
209
|
+
Yields:
|
|
210
|
+
Entity instances
|
|
211
|
+
"""
|
|
212
|
+
# Use pagination to stream efficiently
|
|
213
|
+
if hasattr(self.store, "paginate_entities"):
|
|
214
|
+
cursor = None
|
|
215
|
+
while True:
|
|
216
|
+
page = await self.store.paginate_entities(
|
|
217
|
+
entity_type=entity_type,
|
|
218
|
+
page_size=batch_size,
|
|
219
|
+
cursor=cursor,
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
for entity in page.items:
|
|
223
|
+
yield entity
|
|
224
|
+
|
|
225
|
+
if not page.page_info.has_next_page:
|
|
226
|
+
break
|
|
227
|
+
|
|
228
|
+
cursor = page.page_info.end_cursor
|
|
229
|
+
else:
|
|
230
|
+
# Fallback: load all and yield
|
|
231
|
+
entities = await self.store.get_all_entities(entity_type=entity_type)
|
|
232
|
+
for entity in entities:
|
|
233
|
+
yield entity
|
|
234
|
+
|
|
235
|
+
async def stream_relations(
|
|
236
|
+
self, relation_type: Optional[str] = None, batch_size: int = 1000
|
|
237
|
+
) -> AsyncIterator[Relation]:
|
|
238
|
+
"""
|
|
239
|
+
Stream relations in batches
|
|
240
|
+
|
|
241
|
+
Args:
|
|
242
|
+
relation_type: Filter by relation type
|
|
243
|
+
batch_size: Batch size for streaming
|
|
244
|
+
|
|
245
|
+
Yields:
|
|
246
|
+
Relation instances
|
|
247
|
+
"""
|
|
248
|
+
# Use pagination if available
|
|
249
|
+
if hasattr(self.store, "paginate_relations"):
|
|
250
|
+
cursor = None
|
|
251
|
+
while True:
|
|
252
|
+
page = await self.store.paginate_relations(
|
|
253
|
+
relation_type=relation_type,
|
|
254
|
+
page_size=batch_size,
|
|
255
|
+
cursor=cursor,
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
for relation in page.items:
|
|
259
|
+
yield relation
|
|
260
|
+
|
|
261
|
+
if not page.page_info.has_next_page:
|
|
262
|
+
break
|
|
263
|
+
|
|
264
|
+
cursor = page.page_info.end_cursor
|
|
265
|
+
else:
|
|
266
|
+
# Fallback: get all relations (this may be memory intensive)
|
|
267
|
+
# Backends should implement paginate_relations
|
|
268
|
+
logger.warning("Pagination not available, loading all relations")
|
|
269
|
+
# For now, yield nothing - backends must implement pagination
|
|
270
|
+
return
|
|
271
|
+
|
|
272
|
+
async def export_entities(
|
|
273
|
+
self,
|
|
274
|
+
filepath: str,
|
|
275
|
+
entity_type: Optional[str] = None,
|
|
276
|
+
batch_size: int = 1000,
|
|
277
|
+
compress: bool = False,
|
|
278
|
+
) -> int:
|
|
279
|
+
"""
|
|
280
|
+
Export only entities to file
|
|
281
|
+
|
|
282
|
+
Args:
|
|
283
|
+
filepath: Output file path
|
|
284
|
+
entity_type: Filter by entity type
|
|
285
|
+
batch_size: Streaming batch size
|
|
286
|
+
compress: Enable gzip compression
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
Number of entities exported
|
|
290
|
+
"""
|
|
291
|
+
if compress:
|
|
292
|
+
file = gzip.open(filepath, "wt", encoding="utf-8")
|
|
293
|
+
else:
|
|
294
|
+
file = open(filepath, "w", encoding="utf-8")
|
|
295
|
+
|
|
296
|
+
count = 0
|
|
297
|
+
try:
|
|
298
|
+
async for entity in self.stream_entities(
|
|
299
|
+
entity_type=entity_type, batch_size=batch_size
|
|
300
|
+
):
|
|
301
|
+
json.dump(entity.model_dump(), file, cls=DateTimeEncoder)
|
|
302
|
+
file.write("\n")
|
|
303
|
+
count += 1
|
|
304
|
+
finally:
|
|
305
|
+
file.close()
|
|
306
|
+
|
|
307
|
+
return count
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
class GraphStreamImporter:
|
|
311
|
+
"""
|
|
312
|
+
Stream large graphs from files
|
|
313
|
+
|
|
314
|
+
Imports entities and relations in chunks to avoid memory issues.
|
|
315
|
+
|
|
316
|
+
Example:
|
|
317
|
+
```python
|
|
318
|
+
importer = GraphStreamImporter(store)
|
|
319
|
+
|
|
320
|
+
# Import from JSONL file
|
|
321
|
+
stats = await importer.import_from_file(
|
|
322
|
+
"graph_export.jsonl.gz",
|
|
323
|
+
batch_size=1000
|
|
324
|
+
)
|
|
325
|
+
print(f"Imported {stats['entity_count']} entities")
|
|
326
|
+
```
|
|
327
|
+
"""
|
|
328
|
+
|
|
329
|
+
def __init__(self, store: Any):
|
|
330
|
+
"""
|
|
331
|
+
Initialize importer
|
|
332
|
+
|
|
333
|
+
Args:
|
|
334
|
+
store: Graph store instance
|
|
335
|
+
"""
|
|
336
|
+
self.store = store
|
|
337
|
+
|
|
338
|
+
async def import_from_file(
|
|
339
|
+
self,
|
|
340
|
+
filepath: str,
|
|
341
|
+
batch_size: int = 1000,
|
|
342
|
+
format: StreamFormat = StreamFormat.JSONL,
|
|
343
|
+
) -> Dict[str, int]:
|
|
344
|
+
"""
|
|
345
|
+
Import graph from file
|
|
346
|
+
|
|
347
|
+
Args:
|
|
348
|
+
filepath: Input file path
|
|
349
|
+
batch_size: Batch size for bulk operations
|
|
350
|
+
format: File format
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
Dictionary with import statistics
|
|
354
|
+
"""
|
|
355
|
+
# Detect compression
|
|
356
|
+
compressed = filepath.endswith(".gz")
|
|
357
|
+
|
|
358
|
+
# Open file
|
|
359
|
+
if compressed:
|
|
360
|
+
file = gzip.open(filepath, "rt", encoding="utf-8")
|
|
361
|
+
else:
|
|
362
|
+
file = open(filepath, "r", encoding="utf-8")
|
|
363
|
+
|
|
364
|
+
entity_count = 0
|
|
365
|
+
relation_count = 0
|
|
366
|
+
|
|
367
|
+
entity_batch = []
|
|
368
|
+
relation_batch = []
|
|
369
|
+
|
|
370
|
+
try:
|
|
371
|
+
if format == StreamFormat.JSONL:
|
|
372
|
+
for line in file:
|
|
373
|
+
if not line.strip():
|
|
374
|
+
continue
|
|
375
|
+
|
|
376
|
+
data = json.loads(line)
|
|
377
|
+
|
|
378
|
+
if data.get("type") == "entity":
|
|
379
|
+
entity_batch.append(Entity(**data["data"]))
|
|
380
|
+
elif data.get("type") == "relation":
|
|
381
|
+
relation_batch.append(Relation(**data["data"]))
|
|
382
|
+
else:
|
|
383
|
+
# Assume entity if no type specified
|
|
384
|
+
entity_batch.append(Entity(**data))
|
|
385
|
+
|
|
386
|
+
# Flush batches
|
|
387
|
+
if len(entity_batch) >= batch_size:
|
|
388
|
+
await self._import_entity_batch(entity_batch)
|
|
389
|
+
entity_count += len(entity_batch)
|
|
390
|
+
entity_batch.clear()
|
|
391
|
+
logger.info(f"Imported {entity_count} entities...")
|
|
392
|
+
|
|
393
|
+
if len(relation_batch) >= batch_size:
|
|
394
|
+
await self._import_relation_batch(relation_batch)
|
|
395
|
+
relation_count += len(relation_batch)
|
|
396
|
+
relation_batch.clear()
|
|
397
|
+
logger.info(f"Imported {relation_count} relations...")
|
|
398
|
+
|
|
399
|
+
# Flush remaining batches
|
|
400
|
+
if entity_batch:
|
|
401
|
+
await self._import_entity_batch(entity_batch)
|
|
402
|
+
entity_count += len(entity_batch)
|
|
403
|
+
|
|
404
|
+
if relation_batch:
|
|
405
|
+
await self._import_relation_batch(relation_batch)
|
|
406
|
+
relation_count += len(relation_batch)
|
|
407
|
+
|
|
408
|
+
finally:
|
|
409
|
+
file.close()
|
|
410
|
+
|
|
411
|
+
logger.info(f"Import complete: {entity_count} entities, {relation_count} relations")
|
|
412
|
+
|
|
413
|
+
return {"entity_count": entity_count, "relation_count": relation_count}
|
|
414
|
+
|
|
415
|
+
async def _import_entity_batch(self, entities: list[Entity]) -> None:
|
|
416
|
+
"""Import a batch of entities"""
|
|
417
|
+
if hasattr(self.store, "batch_add_entities"):
|
|
418
|
+
await self.store.batch_add_entities(entities)
|
|
419
|
+
else:
|
|
420
|
+
for entity in entities:
|
|
421
|
+
await self.store.add_entity(entity)
|
|
422
|
+
|
|
423
|
+
async def _import_relation_batch(self, relations: list[Relation]) -> None:
|
|
424
|
+
"""Import a batch of relations"""
|
|
425
|
+
if hasattr(self.store, "batch_add_relations"):
|
|
426
|
+
await self.store.batch_add_relations(relations)
|
|
427
|
+
else:
|
|
428
|
+
for relation in relations:
|
|
429
|
+
await self.store.add_relation(relation)
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
async def stream_subgraph(
|
|
433
|
+
store: Any,
|
|
434
|
+
entity_ids: list[str],
|
|
435
|
+
max_depth: int = 2,
|
|
436
|
+
batch_size: int = 100,
|
|
437
|
+
) -> AsyncIterator[tuple[Entity, list[Relation]]]:
|
|
438
|
+
"""
|
|
439
|
+
Stream a subgraph around specific entities
|
|
440
|
+
|
|
441
|
+
Yields entities with their relations in manageable chunks.
|
|
442
|
+
|
|
443
|
+
Args:
|
|
444
|
+
store: Graph store instance
|
|
445
|
+
entity_ids: Starting entity IDs
|
|
446
|
+
max_depth: Maximum depth to traverse
|
|
447
|
+
batch_size: Batch size for processing
|
|
448
|
+
|
|
449
|
+
Yields:
|
|
450
|
+
Tuples of (entity, relations) for each entity in subgraph
|
|
451
|
+
|
|
452
|
+
Example:
|
|
453
|
+
```python
|
|
454
|
+
async for entity, relations in stream_subgraph(store, ["person_1"], max_depth=2):
|
|
455
|
+
print(f"Entity: {entity.id}, Relations: {len(relations)}")
|
|
456
|
+
```
|
|
457
|
+
"""
|
|
458
|
+
visited = set()
|
|
459
|
+
current_level = entity_ids
|
|
460
|
+
depth = 0
|
|
461
|
+
|
|
462
|
+
while current_level and depth <= max_depth:
|
|
463
|
+
# Process current level in batches
|
|
464
|
+
for i in range(0, len(current_level), batch_size):
|
|
465
|
+
batch = current_level[i : i + batch_size]
|
|
466
|
+
next_level_batch = []
|
|
467
|
+
|
|
468
|
+
for entity_id in batch:
|
|
469
|
+
if entity_id in visited:
|
|
470
|
+
continue
|
|
471
|
+
|
|
472
|
+
visited.add(entity_id)
|
|
473
|
+
|
|
474
|
+
# Get entity
|
|
475
|
+
entity = await store.get_entity(entity_id)
|
|
476
|
+
if not entity:
|
|
477
|
+
continue
|
|
478
|
+
|
|
479
|
+
# Get relations
|
|
480
|
+
neighbors = await store.get_neighbors(entity_id, direction="both")
|
|
481
|
+
# For now, return empty relations list - would need to fetch
|
|
482
|
+
# actual relations
|
|
483
|
+
relations = []
|
|
484
|
+
|
|
485
|
+
# Collect next level
|
|
486
|
+
for neighbor in neighbors:
|
|
487
|
+
if neighbor.id not in visited:
|
|
488
|
+
next_level_batch.append(neighbor.id)
|
|
489
|
+
|
|
490
|
+
yield (entity, relations)
|
|
491
|
+
|
|
492
|
+
# Add to next level
|
|
493
|
+
current_level.extend(next_level_batch)
|
|
494
|
+
|
|
495
|
+
depth += 1
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""Infrastructure messaging module
|
|
2
|
+
|
|
3
|
+
Contains messaging and communication infrastructure.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from .celery_task_manager import CeleryTaskManager
|
|
7
|
+
from .websocket_manager import WebSocketManager, UserConfirmation
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"CeleryTaskManager",
|
|
11
|
+
"WebSocketManager",
|
|
12
|
+
"UserConfirmation",
|
|
13
|
+
]
|