aiecs 1.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aiecs/__init__.py +72 -0
- aiecs/__main__.py +41 -0
- aiecs/aiecs_client.py +469 -0
- aiecs/application/__init__.py +10 -0
- aiecs/application/executors/__init__.py +10 -0
- aiecs/application/executors/operation_executor.py +363 -0
- aiecs/application/knowledge_graph/__init__.py +7 -0
- aiecs/application/knowledge_graph/builder/__init__.py +37 -0
- aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
- aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
- aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
- aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
- aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
- aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
- aiecs/application/knowledge_graph/extractors/base.py +100 -0
- aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
- aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
- aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
- aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
- aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
- aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
- aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
- aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
- aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
- aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
- aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
- aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
- aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
- aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
- aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
- aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
- aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
- aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
- aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
- aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
- aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
- aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
- aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
- aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
- aiecs/application/knowledge_graph/search/__init__.py +59 -0
- aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
- aiecs/application/knowledge_graph/search/reranker.py +295 -0
- aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
- aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
- aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
- aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
- aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
- aiecs/application/knowledge_graph/validators/__init__.py +13 -0
- aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
- aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
- aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
- aiecs/common/__init__.py +9 -0
- aiecs/common/knowledge_graph/__init__.py +17 -0
- aiecs/common/knowledge_graph/runnable.py +484 -0
- aiecs/config/__init__.py +16 -0
- aiecs/config/config.py +498 -0
- aiecs/config/graph_config.py +137 -0
- aiecs/config/registry.py +23 -0
- aiecs/core/__init__.py +46 -0
- aiecs/core/interface/__init__.py +34 -0
- aiecs/core/interface/execution_interface.py +152 -0
- aiecs/core/interface/storage_interface.py +171 -0
- aiecs/domain/__init__.py +289 -0
- aiecs/domain/agent/__init__.py +189 -0
- aiecs/domain/agent/base_agent.py +697 -0
- aiecs/domain/agent/exceptions.py +103 -0
- aiecs/domain/agent/graph_aware_mixin.py +559 -0
- aiecs/domain/agent/hybrid_agent.py +490 -0
- aiecs/domain/agent/integration/__init__.py +26 -0
- aiecs/domain/agent/integration/context_compressor.py +222 -0
- aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
- aiecs/domain/agent/integration/retry_policy.py +219 -0
- aiecs/domain/agent/integration/role_config.py +213 -0
- aiecs/domain/agent/knowledge_aware_agent.py +646 -0
- aiecs/domain/agent/lifecycle.py +296 -0
- aiecs/domain/agent/llm_agent.py +300 -0
- aiecs/domain/agent/memory/__init__.py +12 -0
- aiecs/domain/agent/memory/conversation.py +197 -0
- aiecs/domain/agent/migration/__init__.py +14 -0
- aiecs/domain/agent/migration/conversion.py +160 -0
- aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
- aiecs/domain/agent/models.py +317 -0
- aiecs/domain/agent/observability.py +407 -0
- aiecs/domain/agent/persistence.py +289 -0
- aiecs/domain/agent/prompts/__init__.py +29 -0
- aiecs/domain/agent/prompts/builder.py +161 -0
- aiecs/domain/agent/prompts/formatters.py +189 -0
- aiecs/domain/agent/prompts/template.py +255 -0
- aiecs/domain/agent/registry.py +260 -0
- aiecs/domain/agent/tool_agent.py +257 -0
- aiecs/domain/agent/tools/__init__.py +12 -0
- aiecs/domain/agent/tools/schema_generator.py +221 -0
- aiecs/domain/community/__init__.py +155 -0
- aiecs/domain/community/agent_adapter.py +477 -0
- aiecs/domain/community/analytics.py +481 -0
- aiecs/domain/community/collaborative_workflow.py +642 -0
- aiecs/domain/community/communication_hub.py +645 -0
- aiecs/domain/community/community_builder.py +320 -0
- aiecs/domain/community/community_integration.py +800 -0
- aiecs/domain/community/community_manager.py +813 -0
- aiecs/domain/community/decision_engine.py +879 -0
- aiecs/domain/community/exceptions.py +225 -0
- aiecs/domain/community/models/__init__.py +33 -0
- aiecs/domain/community/models/community_models.py +268 -0
- aiecs/domain/community/resource_manager.py +457 -0
- aiecs/domain/community/shared_context_manager.py +603 -0
- aiecs/domain/context/__init__.py +58 -0
- aiecs/domain/context/context_engine.py +989 -0
- aiecs/domain/context/conversation_models.py +354 -0
- aiecs/domain/context/graph_memory.py +467 -0
- aiecs/domain/execution/__init__.py +12 -0
- aiecs/domain/execution/model.py +57 -0
- aiecs/domain/knowledge_graph/__init__.py +19 -0
- aiecs/domain/knowledge_graph/models/__init__.py +52 -0
- aiecs/domain/knowledge_graph/models/entity.py +130 -0
- aiecs/domain/knowledge_graph/models/evidence.py +194 -0
- aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
- aiecs/domain/knowledge_graph/models/path.py +179 -0
- aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
- aiecs/domain/knowledge_graph/models/query.py +272 -0
- aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
- aiecs/domain/knowledge_graph/models/relation.py +136 -0
- aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
- aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
- aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
- aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
- aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
- aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
- aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
- aiecs/domain/task/__init__.py +13 -0
- aiecs/domain/task/dsl_processor.py +613 -0
- aiecs/domain/task/model.py +62 -0
- aiecs/domain/task/task_context.py +268 -0
- aiecs/infrastructure/__init__.py +24 -0
- aiecs/infrastructure/graph_storage/__init__.py +11 -0
- aiecs/infrastructure/graph_storage/base.py +601 -0
- aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
- aiecs/infrastructure/graph_storage/cache.py +429 -0
- aiecs/infrastructure/graph_storage/distributed.py +226 -0
- aiecs/infrastructure/graph_storage/error_handling.py +390 -0
- aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
- aiecs/infrastructure/graph_storage/health_checks.py +378 -0
- aiecs/infrastructure/graph_storage/in_memory.py +514 -0
- aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
- aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
- aiecs/infrastructure/graph_storage/metrics.py +357 -0
- aiecs/infrastructure/graph_storage/migration.py +413 -0
- aiecs/infrastructure/graph_storage/pagination.py +471 -0
- aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
- aiecs/infrastructure/graph_storage/postgres.py +871 -0
- aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
- aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
- aiecs/infrastructure/graph_storage/sqlite.py +623 -0
- aiecs/infrastructure/graph_storage/streaming.py +495 -0
- aiecs/infrastructure/messaging/__init__.py +13 -0
- aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
- aiecs/infrastructure/messaging/websocket_manager.py +298 -0
- aiecs/infrastructure/monitoring/__init__.py +34 -0
- aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
- aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
- aiecs/infrastructure/monitoring/structured_logger.py +48 -0
- aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
- aiecs/infrastructure/persistence/__init__.py +24 -0
- aiecs/infrastructure/persistence/context_engine_client.py +187 -0
- aiecs/infrastructure/persistence/database_manager.py +333 -0
- aiecs/infrastructure/persistence/file_storage.py +754 -0
- aiecs/infrastructure/persistence/redis_client.py +220 -0
- aiecs/llm/__init__.py +86 -0
- aiecs/llm/callbacks/__init__.py +11 -0
- aiecs/llm/callbacks/custom_callbacks.py +264 -0
- aiecs/llm/client_factory.py +420 -0
- aiecs/llm/clients/__init__.py +33 -0
- aiecs/llm/clients/base_client.py +193 -0
- aiecs/llm/clients/googleai_client.py +181 -0
- aiecs/llm/clients/openai_client.py +131 -0
- aiecs/llm/clients/vertex_client.py +437 -0
- aiecs/llm/clients/xai_client.py +184 -0
- aiecs/llm/config/__init__.py +51 -0
- aiecs/llm/config/config_loader.py +275 -0
- aiecs/llm/config/config_validator.py +236 -0
- aiecs/llm/config/model_config.py +151 -0
- aiecs/llm/utils/__init__.py +10 -0
- aiecs/llm/utils/validate_config.py +91 -0
- aiecs/main.py +363 -0
- aiecs/scripts/__init__.py +3 -0
- aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
- aiecs/scripts/aid/__init__.py +19 -0
- aiecs/scripts/aid/version_manager.py +215 -0
- aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
- aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
- aiecs/scripts/dependance_check/__init__.py +17 -0
- aiecs/scripts/dependance_check/dependency_checker.py +938 -0
- aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
- aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
- aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
- aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
- aiecs/scripts/dependance_patch/__init__.py +7 -0
- aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
- aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
- aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
- aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
- aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
- aiecs/scripts/tools_develop/README.md +449 -0
- aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
- aiecs/scripts/tools_develop/__init__.py +21 -0
- aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
- aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
- aiecs/scripts/tools_develop/verify_tools.py +356 -0
- aiecs/tasks/__init__.py +1 -0
- aiecs/tasks/worker.py +172 -0
- aiecs/tools/__init__.py +299 -0
- aiecs/tools/apisource/__init__.py +99 -0
- aiecs/tools/apisource/intelligence/__init__.py +19 -0
- aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
- aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
- aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
- aiecs/tools/apisource/monitoring/__init__.py +9 -0
- aiecs/tools/apisource/monitoring/metrics.py +303 -0
- aiecs/tools/apisource/providers/__init__.py +115 -0
- aiecs/tools/apisource/providers/base.py +664 -0
- aiecs/tools/apisource/providers/census.py +401 -0
- aiecs/tools/apisource/providers/fred.py +564 -0
- aiecs/tools/apisource/providers/newsapi.py +412 -0
- aiecs/tools/apisource/providers/worldbank.py +357 -0
- aiecs/tools/apisource/reliability/__init__.py +12 -0
- aiecs/tools/apisource/reliability/error_handler.py +375 -0
- aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
- aiecs/tools/apisource/tool.py +850 -0
- aiecs/tools/apisource/utils/__init__.py +9 -0
- aiecs/tools/apisource/utils/validators.py +338 -0
- aiecs/tools/base_tool.py +201 -0
- aiecs/tools/docs/__init__.py +121 -0
- aiecs/tools/docs/ai_document_orchestrator.py +599 -0
- aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
- aiecs/tools/docs/content_insertion_tool.py +1333 -0
- aiecs/tools/docs/document_creator_tool.py +1317 -0
- aiecs/tools/docs/document_layout_tool.py +1166 -0
- aiecs/tools/docs/document_parser_tool.py +994 -0
- aiecs/tools/docs/document_writer_tool.py +1818 -0
- aiecs/tools/knowledge_graph/__init__.py +17 -0
- aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
- aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
- aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
- aiecs/tools/langchain_adapter.py +542 -0
- aiecs/tools/schema_generator.py +275 -0
- aiecs/tools/search_tool/__init__.py +100 -0
- aiecs/tools/search_tool/analyzers.py +589 -0
- aiecs/tools/search_tool/cache.py +260 -0
- aiecs/tools/search_tool/constants.py +128 -0
- aiecs/tools/search_tool/context.py +216 -0
- aiecs/tools/search_tool/core.py +749 -0
- aiecs/tools/search_tool/deduplicator.py +123 -0
- aiecs/tools/search_tool/error_handler.py +271 -0
- aiecs/tools/search_tool/metrics.py +371 -0
- aiecs/tools/search_tool/rate_limiter.py +178 -0
- aiecs/tools/search_tool/schemas.py +277 -0
- aiecs/tools/statistics/__init__.py +80 -0
- aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
- aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
- aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
- aiecs/tools/statistics/data_loader_tool.py +564 -0
- aiecs/tools/statistics/data_profiler_tool.py +658 -0
- aiecs/tools/statistics/data_transformer_tool.py +573 -0
- aiecs/tools/statistics/data_visualizer_tool.py +495 -0
- aiecs/tools/statistics/model_trainer_tool.py +487 -0
- aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
- aiecs/tools/task_tools/__init__.py +86 -0
- aiecs/tools/task_tools/chart_tool.py +732 -0
- aiecs/tools/task_tools/classfire_tool.py +922 -0
- aiecs/tools/task_tools/image_tool.py +447 -0
- aiecs/tools/task_tools/office_tool.py +684 -0
- aiecs/tools/task_tools/pandas_tool.py +635 -0
- aiecs/tools/task_tools/report_tool.py +635 -0
- aiecs/tools/task_tools/research_tool.py +392 -0
- aiecs/tools/task_tools/scraper_tool.py +715 -0
- aiecs/tools/task_tools/stats_tool.py +688 -0
- aiecs/tools/temp_file_manager.py +130 -0
- aiecs/tools/tool_executor/__init__.py +37 -0
- aiecs/tools/tool_executor/tool_executor.py +881 -0
- aiecs/utils/LLM_output_structor.py +445 -0
- aiecs/utils/__init__.py +34 -0
- aiecs/utils/base_callback.py +47 -0
- aiecs/utils/cache_provider.py +695 -0
- aiecs/utils/execution_utils.py +184 -0
- aiecs/utils/logging.py +1 -0
- aiecs/utils/prompt_loader.py +14 -0
- aiecs/utils/token_usage_repository.py +323 -0
- aiecs/ws/__init__.py +0 -0
- aiecs/ws/socket_server.py +52 -0
- aiecs-1.5.1.dist-info/METADATA +608 -0
- aiecs-1.5.1.dist-info/RECORD +302 -0
- aiecs-1.5.1.dist-info/WHEEL +5 -0
- aiecs-1.5.1.dist-info/entry_points.txt +10 -0
- aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
- aiecs-1.5.1.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,1818 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import json
|
|
3
|
+
import uuid
|
|
4
|
+
import hashlib
|
|
5
|
+
import logging
|
|
6
|
+
import asyncio
|
|
7
|
+
import shutil
|
|
8
|
+
from typing import Dict, Any, List, Optional, Union, Tuple
|
|
9
|
+
from enum import Enum
|
|
10
|
+
from datetime import datetime
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
import tempfile
|
|
13
|
+
|
|
14
|
+
from pydantic import BaseModel, Field
|
|
15
|
+
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
16
|
+
|
|
17
|
+
from aiecs.tools.base_tool import BaseTool
|
|
18
|
+
from aiecs.tools import register_tool
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DocumentFormat(str, Enum):
|
|
22
|
+
"""Supported document formats for writing"""
|
|
23
|
+
|
|
24
|
+
TXT = "txt"
|
|
25
|
+
PLAIN_TEXT = "txt" # Alias for TXT
|
|
26
|
+
JSON = "json"
|
|
27
|
+
CSV = "csv"
|
|
28
|
+
XML = "xml"
|
|
29
|
+
MARKDOWN = "md"
|
|
30
|
+
HTML = "html"
|
|
31
|
+
YAML = "yaml"
|
|
32
|
+
PDF = "pdf"
|
|
33
|
+
DOCX = "docx"
|
|
34
|
+
XLSX = "xlsx"
|
|
35
|
+
BINARY = "binary"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class WriteMode(str, Enum):
|
|
39
|
+
"""Document writing modes"""
|
|
40
|
+
|
|
41
|
+
CREATE = "create" # 创建新文件,如果存在则失败
|
|
42
|
+
OVERWRITE = "overwrite" # 覆盖现有文件
|
|
43
|
+
APPEND = "append" # 追加到现有文件
|
|
44
|
+
UPDATE = "update" # 更新现有文件(智能合并)
|
|
45
|
+
BACKUP_WRITE = "backup_write" # 备份后写入
|
|
46
|
+
VERSION_WRITE = "version_write" # 版本化写入
|
|
47
|
+
INSERT = "insert" # 在指定位置插入内容
|
|
48
|
+
REPLACE = "replace" # 替换指定内容
|
|
49
|
+
DELETE = "delete" # 删除指定内容
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class EditOperation(str, Enum):
|
|
53
|
+
"""Advanced edit operations"""
|
|
54
|
+
|
|
55
|
+
BOLD = "bold" # 加粗文本
|
|
56
|
+
ITALIC = "italic" # 斜体文本
|
|
57
|
+
UNDERLINE = "underline" # 下划线文本
|
|
58
|
+
STRIKETHROUGH = "strikethrough" # 删除线文本
|
|
59
|
+
HIGHLIGHT = "highlight" # 高亮文本
|
|
60
|
+
INSERT_TEXT = "insert_text" # 插入文本
|
|
61
|
+
DELETE_TEXT = "delete_text" # 删除文本
|
|
62
|
+
REPLACE_TEXT = "replace_text" # 替换文本
|
|
63
|
+
COPY_TEXT = "copy_text" # 复制文本
|
|
64
|
+
CUT_TEXT = "cut_text" # 剪切文本
|
|
65
|
+
PASTE_TEXT = "paste_text" # 粘贴文本
|
|
66
|
+
FIND_REPLACE = "find_replace" # 查找替换
|
|
67
|
+
INSERT_LINE = "insert_line" # 插入行
|
|
68
|
+
DELETE_LINE = "delete_line" # 删除行
|
|
69
|
+
MOVE_LINE = "move_line" # 移动行
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
class EncodingType(str, Enum):
|
|
73
|
+
"""Text encoding types"""
|
|
74
|
+
|
|
75
|
+
UTF8 = "utf-8"
|
|
76
|
+
UTF16 = "utf-16"
|
|
77
|
+
ASCII = "ascii"
|
|
78
|
+
GBK = "gbk"
|
|
79
|
+
AUTO = "auto"
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class ValidationLevel(str, Enum):
|
|
83
|
+
"""Content validation levels"""
|
|
84
|
+
|
|
85
|
+
NONE = "none" # 无验证
|
|
86
|
+
BASIC = "basic" # 基础验证(格式、大小)
|
|
87
|
+
STRICT = "strict" # 严格验证(内容、结构)
|
|
88
|
+
ENTERPRISE = "enterprise" # 企业级验证(安全、合规)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class DocumentWriterError(Exception):
|
|
92
|
+
"""Base exception for document writer errors"""
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class WriteError(DocumentWriterError):
|
|
96
|
+
"""Raised when write operations fail"""
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class ValidationError(DocumentWriterError):
|
|
100
|
+
"""Raised when validation fails"""
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class SecurityError(DocumentWriterError):
|
|
104
|
+
"""Raised when security validation fails"""
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class WritePermissionError(DocumentWriterError):
|
|
108
|
+
"""Raised when write permission is denied"""
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class ContentValidationError(DocumentWriterError):
|
|
112
|
+
"""Raised when content validation fails"""
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
class StorageError(DocumentWriterError):
|
|
116
|
+
"""Raised when storage operations fail"""
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
@register_tool("document_writer")
|
|
120
|
+
class DocumentWriterTool(BaseTool):
|
|
121
|
+
"""
|
|
122
|
+
Modern high-performance document writing component that can:
|
|
123
|
+
1. Handle multiple document formats and encodings
|
|
124
|
+
2. Provide production-grade write operations with validation
|
|
125
|
+
3. Support various write modes (create, overwrite, append, update)
|
|
126
|
+
4. Implement backup and versioning strategies
|
|
127
|
+
5. Ensure atomic operations and data integrity
|
|
128
|
+
6. Support both local and cloud storage
|
|
129
|
+
|
|
130
|
+
Production Features:
|
|
131
|
+
- Atomic writes (no partial writes)
|
|
132
|
+
- Content validation and security scanning
|
|
133
|
+
- Automatic backup and versioning
|
|
134
|
+
- Write permission and quota checks
|
|
135
|
+
- Transaction-like operations
|
|
136
|
+
- Audit logging
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
# Configuration schema
|
|
140
|
+
class Config(BaseSettings):
|
|
141
|
+
"""Configuration for the document writer tool
|
|
142
|
+
|
|
143
|
+
Automatically reads from environment variables with DOC_WRITER_ prefix.
|
|
144
|
+
Example: DOC_WRITER_GCS_PROJECT_ID -> gcs_project_id
|
|
145
|
+
"""
|
|
146
|
+
|
|
147
|
+
model_config = SettingsConfigDict(env_prefix="DOC_WRITER_")
|
|
148
|
+
|
|
149
|
+
temp_dir: str = Field(
|
|
150
|
+
default=os.path.join(tempfile.gettempdir(), "document_writer"),
|
|
151
|
+
description="Temporary directory for document processing",
|
|
152
|
+
)
|
|
153
|
+
backup_dir: str = Field(
|
|
154
|
+
default=os.path.join(tempfile.gettempdir(), "document_backups"),
|
|
155
|
+
description="Directory for document backups",
|
|
156
|
+
)
|
|
157
|
+
output_dir: Optional[str] = Field(
|
|
158
|
+
default=None, description="Default output directory for documents"
|
|
159
|
+
)
|
|
160
|
+
max_file_size: int = Field(
|
|
161
|
+
default=100 * 1024 * 1024, description="Maximum file size in bytes"
|
|
162
|
+
)
|
|
163
|
+
max_backup_versions: int = Field(
|
|
164
|
+
default=10, description="Maximum number of backup versions to keep"
|
|
165
|
+
)
|
|
166
|
+
default_encoding: str = Field(
|
|
167
|
+
default="utf-8", description="Default text encoding for documents"
|
|
168
|
+
)
|
|
169
|
+
enable_backup: bool = Field(
|
|
170
|
+
default=True,
|
|
171
|
+
description="Whether to enable automatic backup functionality",
|
|
172
|
+
)
|
|
173
|
+
enable_versioning: bool = Field(
|
|
174
|
+
default=True, description="Whether to enable document versioning"
|
|
175
|
+
)
|
|
176
|
+
enable_content_validation: bool = Field(
|
|
177
|
+
default=True, description="Whether to enable content validation"
|
|
178
|
+
)
|
|
179
|
+
enable_security_scan: bool = Field(
|
|
180
|
+
default=True, description="Whether to enable security scanning"
|
|
181
|
+
)
|
|
182
|
+
atomic_write: bool = Field(
|
|
183
|
+
default=True, description="Whether to use atomic write operations"
|
|
184
|
+
)
|
|
185
|
+
validation_level: str = Field(default="basic", description="Content validation level")
|
|
186
|
+
timeout_seconds: int = Field(default=60, description="Operation timeout in seconds")
|
|
187
|
+
auto_backup: bool = Field(
|
|
188
|
+
default=True,
|
|
189
|
+
description="Whether to automatically backup before write operations",
|
|
190
|
+
)
|
|
191
|
+
atomic_writes: bool = Field(
|
|
192
|
+
default=True, description="Whether to use atomic write operations"
|
|
193
|
+
)
|
|
194
|
+
default_format: str = Field(default="md", description="Default document format")
|
|
195
|
+
version_control: bool = Field(default=True, description="Whether to enable version control")
|
|
196
|
+
security_scan: bool = Field(default=True, description="Whether to enable security scanning")
|
|
197
|
+
enable_cloud_storage: bool = Field(
|
|
198
|
+
default=True,
|
|
199
|
+
description="Whether to enable cloud storage integration",
|
|
200
|
+
)
|
|
201
|
+
gcs_bucket_name: str = Field(
|
|
202
|
+
default="aiecs-documents",
|
|
203
|
+
description="Google Cloud Storage bucket name",
|
|
204
|
+
)
|
|
205
|
+
gcs_project_id: Optional[str] = Field(
|
|
206
|
+
default=None, description="Google Cloud Storage project ID"
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
def __init__(self, config: Optional[Dict] = None):
|
|
210
|
+
"""Initialize DocumentWriterTool with settings"""
|
|
211
|
+
super().__init__(config)
|
|
212
|
+
|
|
213
|
+
# Parse configuration
|
|
214
|
+
self.config = self.Config(**(config or {}))
|
|
215
|
+
|
|
216
|
+
self.logger = logging.getLogger(__name__)
|
|
217
|
+
|
|
218
|
+
# Create necessary directories
|
|
219
|
+
os.makedirs(self.config.temp_dir, exist_ok=True)
|
|
220
|
+
os.makedirs(self.config.backup_dir, exist_ok=True)
|
|
221
|
+
|
|
222
|
+
# Initialize cloud storage
|
|
223
|
+
self._init_cloud_storage()
|
|
224
|
+
|
|
225
|
+
# Initialize content validators
|
|
226
|
+
self._init_validators()
|
|
227
|
+
|
|
228
|
+
def _init_cloud_storage(self):
|
|
229
|
+
"""Initialize cloud storage for document writing"""
|
|
230
|
+
self.file_storage = None
|
|
231
|
+
|
|
232
|
+
if self.config.enable_cloud_storage:
|
|
233
|
+
try:
|
|
234
|
+
from aiecs.infrastructure.persistence.file_storage import (
|
|
235
|
+
FileStorage,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
storage_config = {
|
|
239
|
+
"gcs_bucket_name": self.config.gcs_bucket_name,
|
|
240
|
+
"gcs_project_id": self.config.gcs_project_id,
|
|
241
|
+
"enable_local_fallback": True,
|
|
242
|
+
"local_storage_path": self.config.temp_dir,
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
self.file_storage = FileStorage(storage_config)
|
|
246
|
+
asyncio.create_task(self._init_storage_async())
|
|
247
|
+
|
|
248
|
+
except ImportError:
|
|
249
|
+
self.logger.warning("FileStorage not available, cloud storage disabled")
|
|
250
|
+
except Exception as e:
|
|
251
|
+
self.logger.warning(f"Failed to initialize cloud storage: {e}")
|
|
252
|
+
|
|
253
|
+
async def _init_storage_async(self):
|
|
254
|
+
"""Async initialization of file storage"""
|
|
255
|
+
try:
|
|
256
|
+
if self.file_storage:
|
|
257
|
+
await self.file_storage.initialize()
|
|
258
|
+
self.logger.info("Cloud storage initialized successfully")
|
|
259
|
+
except Exception as e:
|
|
260
|
+
self.logger.warning(f"Cloud storage initialization failed: {e}")
|
|
261
|
+
self.file_storage = None
|
|
262
|
+
|
|
263
|
+
def _init_validators(self):
|
|
264
|
+
"""Initialize content validators"""
|
|
265
|
+
self.validators = {
|
|
266
|
+
DocumentFormat.JSON: self._validate_json_content,
|
|
267
|
+
DocumentFormat.XML: self._validate_xml_content,
|
|
268
|
+
DocumentFormat.CSV: self._validate_csv_content,
|
|
269
|
+
DocumentFormat.YAML: self._validate_yaml_content,
|
|
270
|
+
DocumentFormat.HTML: self._validate_html_content,
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
# Schema definitions
|
|
274
|
+
class WriteDocumentSchema(BaseModel):
|
|
275
|
+
"""Schema for write_document operation"""
|
|
276
|
+
|
|
277
|
+
target_path: str = Field(description="Target file path (local or cloud)")
|
|
278
|
+
content: Union[str, bytes, Dict, List] = Field(description="Content to write")
|
|
279
|
+
format: DocumentFormat = Field(description="Document format")
|
|
280
|
+
mode: WriteMode = Field(default=WriteMode.CREATE, description="Write mode")
|
|
281
|
+
encoding: EncodingType = Field(default=EncodingType.UTF8, description="Text encoding")
|
|
282
|
+
validation_level: ValidationLevel = Field(
|
|
283
|
+
default=ValidationLevel.BASIC, description="Validation level"
|
|
284
|
+
)
|
|
285
|
+
metadata: Optional[Dict[str, Any]] = Field(default=None, description="Additional metadata")
|
|
286
|
+
backup_comment: Optional[str] = Field(default=None, description="Backup comment")
|
|
287
|
+
|
|
288
|
+
class BatchWriteSchema(BaseModel):
|
|
289
|
+
"""Schema for batch_write_documents operation"""
|
|
290
|
+
|
|
291
|
+
write_operations: List[Dict[str, Any]] = Field(description="List of write operations")
|
|
292
|
+
transaction_mode: bool = Field(default=True, description="Use transaction mode")
|
|
293
|
+
rollback_on_error: bool = Field(default=True, description="Rollback on any error")
|
|
294
|
+
|
|
295
|
+
class EditDocumentSchema(BaseModel):
|
|
296
|
+
"""Schema for edit_document operation"""
|
|
297
|
+
|
|
298
|
+
target_path: str = Field(description="Target file path")
|
|
299
|
+
operation: EditOperation = Field(description="Edit operation to perform")
|
|
300
|
+
content: Optional[str] = Field(default=None, description="Content for the operation")
|
|
301
|
+
position: Optional[Dict[str, Any]] = Field(
|
|
302
|
+
default=None, description="Position info (line, column, offset)"
|
|
303
|
+
)
|
|
304
|
+
selection: Optional[Dict[str, Any]] = Field(
|
|
305
|
+
default=None, description="Text selection range"
|
|
306
|
+
)
|
|
307
|
+
format_options: Optional[Dict[str, Any]] = Field(
|
|
308
|
+
default=None, description="Formatting options"
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
class FormatTextSchema(BaseModel):
|
|
312
|
+
"""Schema for format_text operation"""
|
|
313
|
+
|
|
314
|
+
target_path: str = Field(description="Target file path")
|
|
315
|
+
text_to_format: str = Field(description="Text to apply formatting to")
|
|
316
|
+
format_type: EditOperation = Field(description="Type of formatting")
|
|
317
|
+
format_options: Optional[Dict[str, Any]] = Field(
|
|
318
|
+
default=None, description="Additional format options"
|
|
319
|
+
)
|
|
320
|
+
|
|
321
|
+
class FindReplaceSchema(BaseModel):
|
|
322
|
+
"""Schema for find_replace operation"""
|
|
323
|
+
|
|
324
|
+
target_path: str = Field(description="Target file path")
|
|
325
|
+
find_text: str = Field(description="Text to find")
|
|
326
|
+
replace_text: str = Field(description="Text to replace with")
|
|
327
|
+
replace_all: bool = Field(default=False, description="Replace all occurrences")
|
|
328
|
+
case_sensitive: bool = Field(default=True, description="Case sensitive search")
|
|
329
|
+
regex_mode: bool = Field(default=False, description="Use regex for find/replace")
|
|
330
|
+
|
|
331
|
+
def write_document(
|
|
332
|
+
self,
|
|
333
|
+
target_path: str,
|
|
334
|
+
content: Union[str, bytes, Dict, List],
|
|
335
|
+
format: DocumentFormat,
|
|
336
|
+
mode: WriteMode = WriteMode.CREATE,
|
|
337
|
+
encoding: EncodingType = EncodingType.UTF8,
|
|
338
|
+
validation_level: ValidationLevel = ValidationLevel.BASIC,
|
|
339
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
340
|
+
backup_comment: Optional[str] = None,
|
|
341
|
+
) -> Dict[str, Any]:
|
|
342
|
+
"""
|
|
343
|
+
Write document with production-grade features
|
|
344
|
+
|
|
345
|
+
Args:
|
|
346
|
+
target_path: Target file path (local or cloud)
|
|
347
|
+
content: Content to write
|
|
348
|
+
format: Document format
|
|
349
|
+
mode: Write mode (create, overwrite, append, update, etc.)
|
|
350
|
+
encoding: Text encoding
|
|
351
|
+
validation_level: Content validation level
|
|
352
|
+
metadata: Additional metadata
|
|
353
|
+
backup_comment: Comment for backup
|
|
354
|
+
|
|
355
|
+
Returns:
|
|
356
|
+
Dict containing write results and metadata
|
|
357
|
+
"""
|
|
358
|
+
try:
|
|
359
|
+
start_time = datetime.now()
|
|
360
|
+
operation_id = str(uuid.uuid4())
|
|
361
|
+
|
|
362
|
+
self.logger.info(f"Starting write operation {operation_id}: {target_path}")
|
|
363
|
+
|
|
364
|
+
# Step 1: Validate inputs
|
|
365
|
+
self._validate_write_inputs(target_path, content, format, mode)
|
|
366
|
+
|
|
367
|
+
# Step 2: Prepare content
|
|
368
|
+
processed_content, content_metadata = self._prepare_content(
|
|
369
|
+
content, format, encoding, validation_level
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
# Step 3: Handle write mode logic
|
|
373
|
+
write_plan = self._plan_write_operation(target_path, mode, metadata)
|
|
374
|
+
|
|
375
|
+
# Step 4: Create backup if needed
|
|
376
|
+
backup_info = None
|
|
377
|
+
if self.config.enable_backup and mode in [
|
|
378
|
+
WriteMode.OVERWRITE,
|
|
379
|
+
WriteMode.UPDATE,
|
|
380
|
+
]:
|
|
381
|
+
backup_info = self._create_backup(target_path, backup_comment)
|
|
382
|
+
|
|
383
|
+
# Step 5: Execute atomic write
|
|
384
|
+
write_result = self._execute_atomic_write(
|
|
385
|
+
target_path, processed_content, format, encoding, write_plan
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
# Step 6: Update metadata and versioning
|
|
389
|
+
version_info = self._handle_versioning(target_path, content_metadata, metadata)
|
|
390
|
+
|
|
391
|
+
# Step 7: Audit logging
|
|
392
|
+
audit_info = self._log_write_operation(
|
|
393
|
+
operation_id, target_path, mode, write_result, backup_info
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
result = {
|
|
397
|
+
"operation_id": operation_id,
|
|
398
|
+
"target_path": target_path,
|
|
399
|
+
"write_mode": mode,
|
|
400
|
+
"format": format,
|
|
401
|
+
"encoding": encoding,
|
|
402
|
+
"content_metadata": content_metadata,
|
|
403
|
+
"write_result": write_result,
|
|
404
|
+
"backup_info": backup_info,
|
|
405
|
+
"version_info": version_info,
|
|
406
|
+
"audit_info": audit_info,
|
|
407
|
+
"processing_metadata": {
|
|
408
|
+
"start_time": start_time.isoformat(),
|
|
409
|
+
"end_time": datetime.now().isoformat(),
|
|
410
|
+
"duration": (datetime.now() - start_time).total_seconds(),
|
|
411
|
+
},
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
self.logger.info(f"Write operation {operation_id} completed successfully")
|
|
415
|
+
return result
|
|
416
|
+
|
|
417
|
+
except Exception as e:
|
|
418
|
+
self.logger.error(f"Write operation failed for {target_path}: {str(e)}")
|
|
419
|
+
# Rollback if needed
|
|
420
|
+
if "backup_info" in locals() and backup_info:
|
|
421
|
+
self._rollback_from_backup(target_path, backup_info)
|
|
422
|
+
raise DocumentWriterError(f"Document write failed: {str(e)}")
|
|
423
|
+
|
|
424
|
+
async def write_document_async(
|
|
425
|
+
self,
|
|
426
|
+
target_path: str,
|
|
427
|
+
content: Union[str, bytes, Dict, List],
|
|
428
|
+
format: DocumentFormat,
|
|
429
|
+
mode: WriteMode = WriteMode.CREATE,
|
|
430
|
+
encoding: EncodingType = EncodingType.UTF8,
|
|
431
|
+
validation_level: ValidationLevel = ValidationLevel.BASIC,
|
|
432
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
433
|
+
backup_comment: Optional[str] = None,
|
|
434
|
+
) -> Dict[str, Any]:
|
|
435
|
+
"""Async version of write_document"""
|
|
436
|
+
return await asyncio.to_thread(
|
|
437
|
+
self.write_document,
|
|
438
|
+
target_path=target_path,
|
|
439
|
+
content=content,
|
|
440
|
+
format=format,
|
|
441
|
+
mode=mode,
|
|
442
|
+
encoding=encoding,
|
|
443
|
+
validation_level=validation_level,
|
|
444
|
+
metadata=metadata,
|
|
445
|
+
backup_comment=backup_comment,
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
def batch_write_documents(
|
|
449
|
+
self,
|
|
450
|
+
write_operations: List[Dict[str, Any]],
|
|
451
|
+
transaction_mode: bool = True,
|
|
452
|
+
rollback_on_error: bool = True,
|
|
453
|
+
) -> Dict[str, Any]:
|
|
454
|
+
"""
|
|
455
|
+
Batch write multiple documents with transaction support
|
|
456
|
+
|
|
457
|
+
Args:
|
|
458
|
+
write_operations: List of write operation dictionaries
|
|
459
|
+
transaction_mode: Use transaction mode for atomicity
|
|
460
|
+
rollback_on_error: Rollback all operations on any error
|
|
461
|
+
|
|
462
|
+
Returns:
|
|
463
|
+
Dict containing batch write results
|
|
464
|
+
"""
|
|
465
|
+
try:
|
|
466
|
+
start_time = datetime.now()
|
|
467
|
+
batch_id = str(uuid.uuid4())
|
|
468
|
+
|
|
469
|
+
self.logger.info(
|
|
470
|
+
f"Starting batch write operation {batch_id}: {len(write_operations)} operations"
|
|
471
|
+
)
|
|
472
|
+
|
|
473
|
+
completed_operations = []
|
|
474
|
+
backup_operations = []
|
|
475
|
+
|
|
476
|
+
try:
|
|
477
|
+
for i, operation in enumerate(write_operations):
|
|
478
|
+
self.logger.info(f"Processing operation {i+1}/{len(write_operations)}")
|
|
479
|
+
|
|
480
|
+
# Execute individual write operation
|
|
481
|
+
result = self.write_document(**operation)
|
|
482
|
+
completed_operations.append(
|
|
483
|
+
{
|
|
484
|
+
"index": i,
|
|
485
|
+
"operation": operation,
|
|
486
|
+
"result": result,
|
|
487
|
+
"status": "success",
|
|
488
|
+
}
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
# Track backup info for potential rollback
|
|
492
|
+
if result.get("backup_info"):
|
|
493
|
+
backup_operations.append(result["backup_info"])
|
|
494
|
+
|
|
495
|
+
batch_result = {
|
|
496
|
+
"batch_id": batch_id,
|
|
497
|
+
"total_operations": len(write_operations),
|
|
498
|
+
"successful_operations": len(completed_operations),
|
|
499
|
+
"failed_operations": 0,
|
|
500
|
+
"operations": completed_operations,
|
|
501
|
+
"transaction_mode": transaction_mode,
|
|
502
|
+
"batch_metadata": {
|
|
503
|
+
"start_time": start_time.isoformat(),
|
|
504
|
+
"end_time": datetime.now().isoformat(),
|
|
505
|
+
"duration": (datetime.now() - start_time).total_seconds(),
|
|
506
|
+
},
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
self.logger.info(f"Batch write operation {batch_id} completed successfully")
|
|
510
|
+
return batch_result
|
|
511
|
+
|
|
512
|
+
except Exception as e:
|
|
513
|
+
self.logger.error(f"Batch write operation {batch_id} failed: {str(e)}")
|
|
514
|
+
|
|
515
|
+
if rollback_on_error and transaction_mode:
|
|
516
|
+
self.logger.info(f"Rolling back batch operation {batch_id}")
|
|
517
|
+
self._rollback_batch_operations(completed_operations, backup_operations)
|
|
518
|
+
|
|
519
|
+
# Create failure result
|
|
520
|
+
batch_result = {
|
|
521
|
+
"batch_id": batch_id,
|
|
522
|
+
"total_operations": len(write_operations),
|
|
523
|
+
"successful_operations": len(completed_operations),
|
|
524
|
+
"failed_operations": len(write_operations) - len(completed_operations),
|
|
525
|
+
"operations": completed_operations,
|
|
526
|
+
"error": str(e),
|
|
527
|
+
"transaction_mode": transaction_mode,
|
|
528
|
+
"rollback_performed": rollback_on_error and transaction_mode,
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
raise DocumentWriterError(f"Batch write operation failed: {str(e)}")
|
|
532
|
+
|
|
533
|
+
except Exception as e:
|
|
534
|
+
raise DocumentWriterError(f"Batch write operation failed: {str(e)}")
|
|
535
|
+
|
|
536
|
+
def _validate_write_inputs(
|
|
537
|
+
self,
|
|
538
|
+
target_path: str,
|
|
539
|
+
content: Any,
|
|
540
|
+
format: DocumentFormat,
|
|
541
|
+
mode: WriteMode,
|
|
542
|
+
):
|
|
543
|
+
"""Validate write operation inputs"""
|
|
544
|
+
# Path validation
|
|
545
|
+
if not target_path or not isinstance(target_path, str):
|
|
546
|
+
raise ValueError("Invalid target path")
|
|
547
|
+
|
|
548
|
+
# Content validation
|
|
549
|
+
if content is None:
|
|
550
|
+
raise ValueError("Content cannot be None")
|
|
551
|
+
|
|
552
|
+
# Size validation
|
|
553
|
+
content_size = self._calculate_content_size(content)
|
|
554
|
+
if content_size > self.config.max_file_size:
|
|
555
|
+
raise ValueError(
|
|
556
|
+
f"Content size {content_size} exceeds maximum {self.config.max_file_size}"
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
# Permission validation
|
|
560
|
+
if not self._check_write_permission(target_path, mode):
|
|
561
|
+
raise WritePermissionError(f"No write permission for {target_path}")
|
|
562
|
+
|
|
563
|
+
def _prepare_content(
|
|
564
|
+
self,
|
|
565
|
+
content: Any,
|
|
566
|
+
format: DocumentFormat,
|
|
567
|
+
encoding: EncodingType,
|
|
568
|
+
validation_level: ValidationLevel,
|
|
569
|
+
) -> Tuple[Union[str, bytes], Dict]:
|
|
570
|
+
"""Prepare and validate content for writing"""
|
|
571
|
+
|
|
572
|
+
# Content conversion based on format
|
|
573
|
+
if format == DocumentFormat.JSON:
|
|
574
|
+
if isinstance(content, (dict, list)):
|
|
575
|
+
processed_content = json.dumps(content, ensure_ascii=False, indent=2)
|
|
576
|
+
else:
|
|
577
|
+
processed_content = str(content)
|
|
578
|
+
elif format == DocumentFormat.CSV:
|
|
579
|
+
processed_content = self._convert_to_csv(content)
|
|
580
|
+
elif format == DocumentFormat.XML:
|
|
581
|
+
processed_content = self._convert_to_xml(content)
|
|
582
|
+
elif format == DocumentFormat.YAML:
|
|
583
|
+
processed_content = self._convert_to_yaml(content)
|
|
584
|
+
elif format == DocumentFormat.HTML:
|
|
585
|
+
processed_content = self._convert_to_html(content)
|
|
586
|
+
elif format == DocumentFormat.MARKDOWN:
|
|
587
|
+
processed_content = self._convert_to_markdown(content)
|
|
588
|
+
elif format == DocumentFormat.BINARY:
|
|
589
|
+
if isinstance(content, bytes):
|
|
590
|
+
processed_content = content
|
|
591
|
+
else:
|
|
592
|
+
processed_content = str(content).encode(encoding.value)
|
|
593
|
+
else:
|
|
594
|
+
processed_content = str(content)
|
|
595
|
+
|
|
596
|
+
# Content validation
|
|
597
|
+
if self.config.enable_content_validation:
|
|
598
|
+
self._validate_content(processed_content, format, validation_level)
|
|
599
|
+
|
|
600
|
+
# Calculate metadata
|
|
601
|
+
content_metadata = {
|
|
602
|
+
"original_type": type(content).__name__,
|
|
603
|
+
"processed_size": (
|
|
604
|
+
len(processed_content) if isinstance(processed_content, (str, bytes)) else 0
|
|
605
|
+
),
|
|
606
|
+
"format": format,
|
|
607
|
+
"encoding": encoding,
|
|
608
|
+
"checksum": self._calculate_checksum(processed_content),
|
|
609
|
+
"validation_level": validation_level,
|
|
610
|
+
"timestamp": datetime.now().isoformat(),
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
return processed_content, content_metadata
|
|
614
|
+
|
|
615
|
+
def _plan_write_operation(
|
|
616
|
+
self, target_path: str, mode: WriteMode, metadata: Optional[Dict]
|
|
617
|
+
) -> Dict:
|
|
618
|
+
"""Plan the write operation based on mode and target"""
|
|
619
|
+
|
|
620
|
+
plan = {
|
|
621
|
+
"target_path": target_path,
|
|
622
|
+
"mode": mode,
|
|
623
|
+
"file_exists": self._file_exists(target_path),
|
|
624
|
+
"is_cloud_path": self._is_cloud_storage_path(target_path),
|
|
625
|
+
"requires_backup": False,
|
|
626
|
+
"requires_versioning": False,
|
|
627
|
+
"atomic_operation": self.config.atomic_write,
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
if mode == WriteMode.CREATE and plan["file_exists"]:
|
|
631
|
+
raise DocumentWriterError(f"File already exists: {target_path}")
|
|
632
|
+
|
|
633
|
+
if mode in [WriteMode.OVERWRITE, WriteMode.UPDATE] and plan["file_exists"]:
|
|
634
|
+
plan["requires_backup"] = self.config.enable_backup
|
|
635
|
+
plan["requires_versioning"] = self.config.enable_versioning
|
|
636
|
+
|
|
637
|
+
if mode == WriteMode.APPEND and not plan["file_exists"]:
|
|
638
|
+
# Convert to CREATE mode
|
|
639
|
+
plan["mode"] = WriteMode.CREATE
|
|
640
|
+
|
|
641
|
+
return plan
|
|
642
|
+
|
|
643
|
+
def _create_backup(self, target_path: str, comment: Optional[str] = None) -> Dict:
|
|
644
|
+
"""Create backup of existing file"""
|
|
645
|
+
if not self._file_exists(target_path):
|
|
646
|
+
return None
|
|
647
|
+
|
|
648
|
+
try:
|
|
649
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
650
|
+
file_stem = Path(target_path).stem
|
|
651
|
+
file_suffix = Path(target_path).suffix
|
|
652
|
+
|
|
653
|
+
backup_filename = f"{file_stem}_backup_{timestamp}{file_suffix}"
|
|
654
|
+
backup_path = os.path.join(self.config.backup_dir, backup_filename)
|
|
655
|
+
|
|
656
|
+
# Copy file to backup location
|
|
657
|
+
if self._is_cloud_storage_path(target_path):
|
|
658
|
+
backup_path = self._backup_cloud_file(target_path, backup_path)
|
|
659
|
+
else:
|
|
660
|
+
shutil.copy2(target_path, backup_path)
|
|
661
|
+
|
|
662
|
+
backup_info = {
|
|
663
|
+
"original_path": target_path,
|
|
664
|
+
"backup_path": backup_path,
|
|
665
|
+
"timestamp": timestamp,
|
|
666
|
+
"comment": comment,
|
|
667
|
+
"checksum": self._calculate_file_checksum(target_path),
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
self.logger.info(f"Created backup: {backup_path}")
|
|
671
|
+
return backup_info
|
|
672
|
+
|
|
673
|
+
except Exception as e:
|
|
674
|
+
self.logger.error(f"Failed to create backup for {target_path}: {e}")
|
|
675
|
+
raise StorageError(f"Backup creation failed: {e}")
|
|
676
|
+
|
|
677
|
+
def _execute_atomic_write(
|
|
678
|
+
self,
|
|
679
|
+
target_path: str,
|
|
680
|
+
content: Union[str, bytes],
|
|
681
|
+
format: DocumentFormat,
|
|
682
|
+
encoding: EncodingType,
|
|
683
|
+
plan: Dict,
|
|
684
|
+
) -> Dict:
|
|
685
|
+
"""Execute atomic write operation"""
|
|
686
|
+
|
|
687
|
+
if plan["is_cloud_path"]:
|
|
688
|
+
return self._write_to_cloud_storage(target_path, content, format, encoding, plan)
|
|
689
|
+
else:
|
|
690
|
+
return self._write_to_local_file(target_path, content, format, encoding, plan)
|
|
691
|
+
|
|
692
|
+
def _write_to_local_file(
|
|
693
|
+
self,
|
|
694
|
+
target_path: str,
|
|
695
|
+
content: Union[str, bytes],
|
|
696
|
+
format: DocumentFormat,
|
|
697
|
+
encoding: EncodingType,
|
|
698
|
+
plan: Dict,
|
|
699
|
+
) -> Dict:
|
|
700
|
+
"""Write to local file system with atomic operation"""
|
|
701
|
+
|
|
702
|
+
try:
|
|
703
|
+
# Create parent directories
|
|
704
|
+
os.makedirs(os.path.dirname(target_path), exist_ok=True)
|
|
705
|
+
|
|
706
|
+
if plan["atomic_operation"]:
|
|
707
|
+
# Atomic write using temporary file
|
|
708
|
+
temp_path = f"{target_path}.tmp.{uuid.uuid4().hex}"
|
|
709
|
+
|
|
710
|
+
try:
|
|
711
|
+
if plan["mode"] == WriteMode.APPEND and plan["file_exists"]:
|
|
712
|
+
# Read existing content first
|
|
713
|
+
with open(target_path, "rb") as f:
|
|
714
|
+
existing_content = f.read()
|
|
715
|
+
|
|
716
|
+
if isinstance(content, str):
|
|
717
|
+
content = existing_content.decode(encoding.value) + content
|
|
718
|
+
else:
|
|
719
|
+
content = existing_content + content
|
|
720
|
+
|
|
721
|
+
# Write to temporary file
|
|
722
|
+
if isinstance(content, bytes):
|
|
723
|
+
with open(temp_path, "wb") as f:
|
|
724
|
+
f.write(content)
|
|
725
|
+
else:
|
|
726
|
+
# Handle both EncodingType enum and string
|
|
727
|
+
enc_value = encoding.value if hasattr(encoding, "value") else str(encoding)
|
|
728
|
+
with open(temp_path, "w", encoding=enc_value) as f:
|
|
729
|
+
f.write(content)
|
|
730
|
+
|
|
731
|
+
# Atomic move
|
|
732
|
+
shutil.move(temp_path, target_path)
|
|
733
|
+
|
|
734
|
+
finally:
|
|
735
|
+
# Cleanup temp file if it still exists
|
|
736
|
+
if os.path.exists(temp_path):
|
|
737
|
+
os.unlink(temp_path)
|
|
738
|
+
else:
|
|
739
|
+
# Direct write
|
|
740
|
+
mode_map = {
|
|
741
|
+
WriteMode.CREATE: "w",
|
|
742
|
+
WriteMode.OVERWRITE: "w",
|
|
743
|
+
WriteMode.APPEND: "a",
|
|
744
|
+
WriteMode.UPDATE: "w",
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
file_mode = mode_map.get(plan["mode"], "w")
|
|
748
|
+
if isinstance(content, bytes):
|
|
749
|
+
file_mode += "b"
|
|
750
|
+
|
|
751
|
+
# Handle both EncodingType enum and string
|
|
752
|
+
enc_value = (
|
|
753
|
+
None
|
|
754
|
+
if isinstance(content, bytes)
|
|
755
|
+
else (encoding.value if hasattr(encoding, "value") else str(encoding))
|
|
756
|
+
)
|
|
757
|
+
with open(target_path, file_mode, encoding=enc_value) as f:
|
|
758
|
+
f.write(content)
|
|
759
|
+
|
|
760
|
+
# Get file stats
|
|
761
|
+
stat = os.stat(target_path)
|
|
762
|
+
|
|
763
|
+
return {
|
|
764
|
+
"path": target_path,
|
|
765
|
+
"size": stat.st_size,
|
|
766
|
+
"checksum": self._calculate_file_checksum(target_path),
|
|
767
|
+
"modified_time": datetime.fromtimestamp(stat.st_mtime).isoformat(),
|
|
768
|
+
"atomic_write": plan["atomic_operation"],
|
|
769
|
+
}
|
|
770
|
+
|
|
771
|
+
except Exception as e:
|
|
772
|
+
raise StorageError(f"Local file write failed: {e}")
|
|
773
|
+
|
|
774
|
+
async def _write_to_cloud_storage(
|
|
775
|
+
self,
|
|
776
|
+
target_path: str,
|
|
777
|
+
content: Union[str, bytes],
|
|
778
|
+
format: DocumentFormat,
|
|
779
|
+
encoding: EncodingType,
|
|
780
|
+
plan: Dict,
|
|
781
|
+
) -> Dict:
|
|
782
|
+
"""Write to cloud storage"""
|
|
783
|
+
|
|
784
|
+
if not self.file_storage:
|
|
785
|
+
raise StorageError("Cloud storage not available")
|
|
786
|
+
|
|
787
|
+
try:
|
|
788
|
+
storage_path = self._parse_cloud_storage_path(target_path)
|
|
789
|
+
|
|
790
|
+
# Handle append mode for cloud storage
|
|
791
|
+
if plan["mode"] == WriteMode.APPEND and plan["file_exists"]:
|
|
792
|
+
existing_content = await self.file_storage.retrieve(storage_path)
|
|
793
|
+
if isinstance(content, str) and isinstance(existing_content, str):
|
|
794
|
+
content = existing_content + content
|
|
795
|
+
elif isinstance(content, bytes) and isinstance(existing_content, bytes):
|
|
796
|
+
content = existing_content + content
|
|
797
|
+
|
|
798
|
+
# Store in cloud storage
|
|
799
|
+
await self.file_storage.store(storage_path, content)
|
|
800
|
+
|
|
801
|
+
return {
|
|
802
|
+
"path": target_path,
|
|
803
|
+
"storage_path": storage_path,
|
|
804
|
+
"size": (len(content) if isinstance(content, (str, bytes)) else 0),
|
|
805
|
+
"checksum": self._calculate_checksum(content),
|
|
806
|
+
"cloud_storage": True,
|
|
807
|
+
}
|
|
808
|
+
|
|
809
|
+
except Exception as e:
|
|
810
|
+
raise StorageError(f"Cloud storage write failed: {e}")
|
|
811
|
+
|
|
812
|
+
def _handle_versioning(
|
|
813
|
+
self,
|
|
814
|
+
target_path: str,
|
|
815
|
+
content_metadata: Dict,
|
|
816
|
+
metadata: Optional[Dict],
|
|
817
|
+
) -> Optional[Dict]:
|
|
818
|
+
"""Handle document versioning"""
|
|
819
|
+
|
|
820
|
+
if not self.config.enable_versioning:
|
|
821
|
+
return None
|
|
822
|
+
|
|
823
|
+
try:
|
|
824
|
+
version_info = {
|
|
825
|
+
"path": target_path,
|
|
826
|
+
"version": self._get_next_version(target_path),
|
|
827
|
+
"timestamp": datetime.now().isoformat(),
|
|
828
|
+
"content_metadata": content_metadata,
|
|
829
|
+
"user_metadata": metadata or {},
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
# Store version info
|
|
833
|
+
version_file = f"{target_path}.versions.json"
|
|
834
|
+
versions = self._load_version_history(version_file)
|
|
835
|
+
versions.append(version_info)
|
|
836
|
+
|
|
837
|
+
# Keep only recent versions
|
|
838
|
+
if len(versions) > self.config.max_backup_versions:
|
|
839
|
+
versions = versions[-self.config.max_backup_versions :]
|
|
840
|
+
|
|
841
|
+
self._save_version_history(version_file, versions)
|
|
842
|
+
|
|
843
|
+
return version_info
|
|
844
|
+
|
|
845
|
+
except Exception as e:
|
|
846
|
+
self.logger.warning(f"Versioning failed for {target_path}: {e}")
|
|
847
|
+
return None
|
|
848
|
+
|
|
849
|
+
def _validate_content(
|
|
850
|
+
self,
|
|
851
|
+
content: Union[str, bytes],
|
|
852
|
+
format: DocumentFormat,
|
|
853
|
+
validation_level: ValidationLevel,
|
|
854
|
+
):
|
|
855
|
+
"""Validate content based on format and validation level"""
|
|
856
|
+
|
|
857
|
+
if validation_level == ValidationLevel.NONE:
|
|
858
|
+
return
|
|
859
|
+
|
|
860
|
+
try:
|
|
861
|
+
# Format-specific validation
|
|
862
|
+
if format in self.validators:
|
|
863
|
+
self.validators[format](content, validation_level)
|
|
864
|
+
|
|
865
|
+
# Security validation for enterprise level
|
|
866
|
+
if validation_level == ValidationLevel.ENTERPRISE:
|
|
867
|
+
self._security_scan_content(content)
|
|
868
|
+
|
|
869
|
+
except Exception as e:
|
|
870
|
+
raise ContentValidationError(f"Content validation failed: {e}")
|
|
871
|
+
|
|
872
|
+
def _validate_json_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
|
|
873
|
+
"""Validate JSON content"""
|
|
874
|
+
try:
|
|
875
|
+
if isinstance(content, bytes):
|
|
876
|
+
content = content.decode("utf-8")
|
|
877
|
+
json.loads(content)
|
|
878
|
+
except json.JSONDecodeError as e:
|
|
879
|
+
raise ContentValidationError(f"Invalid JSON: {e}")
|
|
880
|
+
|
|
881
|
+
def _validate_xml_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
|
|
882
|
+
"""Validate XML content"""
|
|
883
|
+
try:
|
|
884
|
+
import xml.etree.ElementTree as ET
|
|
885
|
+
|
|
886
|
+
if isinstance(content, bytes):
|
|
887
|
+
content = content.decode("utf-8")
|
|
888
|
+
ET.fromstring(content)
|
|
889
|
+
except ET.ParseError as e:
|
|
890
|
+
raise ContentValidationError(f"Invalid XML: {e}")
|
|
891
|
+
|
|
892
|
+
def _validate_csv_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
|
|
893
|
+
"""Validate CSV content"""
|
|
894
|
+
try:
|
|
895
|
+
import csv
|
|
896
|
+
import io
|
|
897
|
+
|
|
898
|
+
if isinstance(content, bytes):
|
|
899
|
+
content = content.decode("utf-8")
|
|
900
|
+
csv.reader(io.StringIO(content))
|
|
901
|
+
except Exception as e:
|
|
902
|
+
raise ContentValidationError(f"Invalid CSV: {e}")
|
|
903
|
+
|
|
904
|
+
def _validate_yaml_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
|
|
905
|
+
"""Validate YAML content"""
|
|
906
|
+
try:
|
|
907
|
+
import yaml
|
|
908
|
+
|
|
909
|
+
if isinstance(content, bytes):
|
|
910
|
+
content = content.decode("utf-8")
|
|
911
|
+
yaml.safe_load(content)
|
|
912
|
+
except yaml.YAMLError as e:
|
|
913
|
+
raise ContentValidationError(f"Invalid YAML: {e}")
|
|
914
|
+
|
|
915
|
+
def _validate_html_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
|
|
916
|
+
"""Validate HTML content"""
|
|
917
|
+
try:
|
|
918
|
+
from bs4 import BeautifulSoup
|
|
919
|
+
|
|
920
|
+
if isinstance(content, bytes):
|
|
921
|
+
content = content.decode("utf-8")
|
|
922
|
+
BeautifulSoup(content, "html.parser")
|
|
923
|
+
except Exception as e:
|
|
924
|
+
raise ContentValidationError(f"Invalid HTML: {e}")
|
|
925
|
+
|
|
926
|
+
def _security_scan_content(self, content: Union[str, bytes]):
|
|
927
|
+
"""Perform security scan on content"""
|
|
928
|
+
if isinstance(content, bytes):
|
|
929
|
+
content = content.decode("utf-8", errors="ignore")
|
|
930
|
+
|
|
931
|
+
# Check for suspicious patterns
|
|
932
|
+
suspicious_patterns = [
|
|
933
|
+
r"<script[^>]*>", # JavaScript
|
|
934
|
+
r"javascript:", # JavaScript URLs
|
|
935
|
+
r"vbscript:", # VBScript URLs
|
|
936
|
+
r"data:.*base64", # Base64 data URLs
|
|
937
|
+
r"eval\s*\(", # eval() calls
|
|
938
|
+
r"exec\s*\(", # exec() calls
|
|
939
|
+
]
|
|
940
|
+
|
|
941
|
+
import re
|
|
942
|
+
|
|
943
|
+
for pattern in suspicious_patterns:
|
|
944
|
+
if re.search(pattern, content, re.IGNORECASE):
|
|
945
|
+
raise ContentValidationError("Security scan failed: suspicious pattern detected")
|
|
946
|
+
|
|
947
|
+
# Helper methods
|
|
948
|
+
def _calculate_content_size(self, content: Any) -> int:
|
|
949
|
+
"""Calculate content size in bytes"""
|
|
950
|
+
if isinstance(content, bytes):
|
|
951
|
+
return len(content)
|
|
952
|
+
elif isinstance(content, str):
|
|
953
|
+
return len(content.encode("utf-8"))
|
|
954
|
+
else:
|
|
955
|
+
return len(str(content).encode("utf-8"))
|
|
956
|
+
|
|
957
|
+
def _calculate_checksum(self, content: Union[str, bytes]) -> str:
|
|
958
|
+
"""Calculate content checksum"""
|
|
959
|
+
if isinstance(content, str):
|
|
960
|
+
content = content.encode("utf-8")
|
|
961
|
+
return hashlib.sha256(content).hexdigest()
|
|
962
|
+
|
|
963
|
+
def _calculate_file_checksum(self, file_path: str) -> str:
|
|
964
|
+
"""Calculate file checksum"""
|
|
965
|
+
hash_sha256 = hashlib.sha256()
|
|
966
|
+
with open(file_path, "rb") as f:
|
|
967
|
+
for chunk in iter(lambda: f.read(4096), b""):
|
|
968
|
+
hash_sha256.update(chunk)
|
|
969
|
+
return hash_sha256.hexdigest()
|
|
970
|
+
|
|
971
|
+
def _check_write_permission(self, target_path: str, mode: WriteMode) -> bool:
|
|
972
|
+
"""Check write permission for target path"""
|
|
973
|
+
try:
|
|
974
|
+
if self._is_cloud_storage_path(target_path):
|
|
975
|
+
return self.file_storage is not None
|
|
976
|
+
|
|
977
|
+
parent_dir = os.path.dirname(target_path)
|
|
978
|
+
if not os.path.exists(parent_dir):
|
|
979
|
+
# Check if we can create the directory
|
|
980
|
+
return os.access(os.path.dirname(parent_dir), os.W_OK)
|
|
981
|
+
|
|
982
|
+
if os.path.exists(target_path):
|
|
983
|
+
return os.access(target_path, os.W_OK)
|
|
984
|
+
else:
|
|
985
|
+
return os.access(parent_dir, os.W_OK)
|
|
986
|
+
|
|
987
|
+
except Exception:
|
|
988
|
+
return False
|
|
989
|
+
|
|
990
|
+
def _file_exists(self, file_path: str) -> bool:
|
|
991
|
+
"""Check if file exists (local or cloud)"""
|
|
992
|
+
if self._is_cloud_storage_path(file_path):
|
|
993
|
+
# For cloud storage, we'd need to implement exists check
|
|
994
|
+
return False # Simplified for now
|
|
995
|
+
else:
|
|
996
|
+
return os.path.exists(file_path)
|
|
997
|
+
|
|
998
|
+
def _is_cloud_storage_path(self, source: str) -> bool:
|
|
999
|
+
"""Check if source is a cloud storage path"""
|
|
1000
|
+
cloud_schemes = ["gs", "s3", "azure", "cloud"]
|
|
1001
|
+
try:
|
|
1002
|
+
from urllib.parse import urlparse
|
|
1003
|
+
|
|
1004
|
+
parsed = urlparse(source)
|
|
1005
|
+
return parsed.scheme in cloud_schemes
|
|
1006
|
+
except Exception:
|
|
1007
|
+
return False
|
|
1008
|
+
|
|
1009
|
+
def _parse_cloud_storage_path(self, source: str) -> str:
|
|
1010
|
+
"""Parse cloud storage path to get storage key"""
|
|
1011
|
+
try:
|
|
1012
|
+
from urllib.parse import urlparse
|
|
1013
|
+
|
|
1014
|
+
parsed = urlparse(source)
|
|
1015
|
+
return parsed.path.lstrip("/")
|
|
1016
|
+
except Exception:
|
|
1017
|
+
return source
|
|
1018
|
+
|
|
1019
|
+
# Content conversion methods
|
|
1020
|
+
def _convert_to_csv(self, content: Any) -> str:
|
|
1021
|
+
"""Convert content to CSV format"""
|
|
1022
|
+
import csv
|
|
1023
|
+
import io
|
|
1024
|
+
|
|
1025
|
+
output = io.StringIO()
|
|
1026
|
+
writer = csv.writer(output)
|
|
1027
|
+
|
|
1028
|
+
if isinstance(content, list):
|
|
1029
|
+
for row in content:
|
|
1030
|
+
if isinstance(row, (list, tuple)):
|
|
1031
|
+
writer.writerow(row)
|
|
1032
|
+
else:
|
|
1033
|
+
writer.writerow([row])
|
|
1034
|
+
elif isinstance(content, dict):
|
|
1035
|
+
# Convert dict to CSV with headers
|
|
1036
|
+
if content:
|
|
1037
|
+
headers = list(content.keys())
|
|
1038
|
+
writer.writerow(headers)
|
|
1039
|
+
writer.writerow([content[h] for h in headers])
|
|
1040
|
+
else:
|
|
1041
|
+
writer.writerow([str(content)])
|
|
1042
|
+
|
|
1043
|
+
return output.getvalue()
|
|
1044
|
+
|
|
1045
|
+
def _convert_to_xml(self, content: Any) -> str:
|
|
1046
|
+
"""Convert content to XML format"""
|
|
1047
|
+
import xml.etree.ElementTree as ET
|
|
1048
|
+
|
|
1049
|
+
if isinstance(content, dict):
|
|
1050
|
+
root = ET.Element("document")
|
|
1051
|
+
for key, value in content.items():
|
|
1052
|
+
elem = ET.SubElement(root, str(key))
|
|
1053
|
+
elem.text = str(value)
|
|
1054
|
+
return ET.tostring(root, encoding="unicode")
|
|
1055
|
+
else:
|
|
1056
|
+
root = ET.Element("document")
|
|
1057
|
+
root.text = str(content)
|
|
1058
|
+
return ET.tostring(root, encoding="unicode")
|
|
1059
|
+
|
|
1060
|
+
def _convert_to_yaml(self, content: Any) -> str:
|
|
1061
|
+
"""Convert content to YAML format"""
|
|
1062
|
+
try:
|
|
1063
|
+
import yaml
|
|
1064
|
+
|
|
1065
|
+
return yaml.dump(content, default_flow_style=False, allow_unicode=True)
|
|
1066
|
+
except ImportError:
|
|
1067
|
+
# Fallback to simple string representation
|
|
1068
|
+
return str(content)
|
|
1069
|
+
|
|
1070
|
+
def _convert_to_html(self, content: Any) -> str:
|
|
1071
|
+
"""Convert content to HTML format"""
|
|
1072
|
+
if isinstance(content, dict):
|
|
1073
|
+
html = "<html><body>\n"
|
|
1074
|
+
for key, value in content.items():
|
|
1075
|
+
html += f"<h3>{key}</h3>\n<p>{value}</p>\n"
|
|
1076
|
+
html += "</body></html>"
|
|
1077
|
+
return html
|
|
1078
|
+
else:
|
|
1079
|
+
return f"<html><body><pre>{str(content)}</pre></body></html>"
|
|
1080
|
+
|
|
1081
|
+
def _convert_to_markdown(self, content: Any) -> str:
|
|
1082
|
+
"""Convert content to Markdown format"""
|
|
1083
|
+
if isinstance(content, dict):
|
|
1084
|
+
md = ""
|
|
1085
|
+
for key, value in content.items():
|
|
1086
|
+
md += f"## {key}\n\n{value}\n\n"
|
|
1087
|
+
return md
|
|
1088
|
+
else:
|
|
1089
|
+
return str(content)
|
|
1090
|
+
|
|
1091
|
+
# Versioning methods
|
|
1092
|
+
def _get_next_version(self, file_path: str) -> int:
|
|
1093
|
+
"""Get next version number for file"""
|
|
1094
|
+
version_file = f"{file_path}.versions.json"
|
|
1095
|
+
versions = self._load_version_history(version_file)
|
|
1096
|
+
return len(versions) + 1
|
|
1097
|
+
|
|
1098
|
+
def _load_version_history(self, version_file: str) -> List[Dict]:
|
|
1099
|
+
"""Load version history from file"""
|
|
1100
|
+
try:
|
|
1101
|
+
if os.path.exists(version_file):
|
|
1102
|
+
with open(version_file, "r") as f:
|
|
1103
|
+
return json.load(f)
|
|
1104
|
+
except Exception:
|
|
1105
|
+
pass
|
|
1106
|
+
return []
|
|
1107
|
+
|
|
1108
|
+
def _save_version_history(self, version_file: str, versions: List[Dict]):
|
|
1109
|
+
"""Save version history to file"""
|
|
1110
|
+
try:
|
|
1111
|
+
with open(version_file, "w") as f:
|
|
1112
|
+
json.dump(versions, f, indent=2)
|
|
1113
|
+
except Exception as e:
|
|
1114
|
+
self.logger.warning(f"Failed to save version history: {e}")
|
|
1115
|
+
|
|
1116
|
+
# Backup and rollback methods
|
|
1117
|
+
def _backup_cloud_file(self, source_path: str, backup_path: str) -> str:
|
|
1118
|
+
"""Backup cloud file"""
|
|
1119
|
+
# Simplified implementation
|
|
1120
|
+
return backup_path
|
|
1121
|
+
|
|
1122
|
+
def _rollback_from_backup(self, target_path: str, backup_info: Dict):
|
|
1123
|
+
"""Rollback file from backup"""
|
|
1124
|
+
try:
|
|
1125
|
+
if backup_info and os.path.exists(backup_info["backup_path"]):
|
|
1126
|
+
shutil.copy2(backup_info["backup_path"], target_path)
|
|
1127
|
+
self.logger.info(f"Rolled back {target_path} from backup")
|
|
1128
|
+
except Exception as e:
|
|
1129
|
+
self.logger.error(f"Rollback failed: {e}")
|
|
1130
|
+
|
|
1131
|
+
def _rollback_batch_operations(
|
|
1132
|
+
self, completed_operations: List[Dict], backup_operations: List[Dict]
|
|
1133
|
+
):
|
|
1134
|
+
"""Rollback batch operations"""
|
|
1135
|
+
for op in reversed(completed_operations):
|
|
1136
|
+
try:
|
|
1137
|
+
result = op.get("result", {})
|
|
1138
|
+
backup_info = result.get("backup_info")
|
|
1139
|
+
if backup_info:
|
|
1140
|
+
self._rollback_from_backup(result["write_result"]["path"], backup_info)
|
|
1141
|
+
except Exception as e:
|
|
1142
|
+
self.logger.error(f"Batch rollback failed for operation: {e}")
|
|
1143
|
+
|
|
1144
|
+
def _log_write_operation(
|
|
1145
|
+
self,
|
|
1146
|
+
operation_id: str,
|
|
1147
|
+
target_path: str,
|
|
1148
|
+
mode: WriteMode,
|
|
1149
|
+
write_result: Dict,
|
|
1150
|
+
backup_info: Optional[Dict],
|
|
1151
|
+
) -> Dict:
|
|
1152
|
+
"""Log write operation for audit"""
|
|
1153
|
+
audit_info = {
|
|
1154
|
+
"operation_id": operation_id,
|
|
1155
|
+
"timestamp": datetime.now().isoformat(),
|
|
1156
|
+
"target_path": target_path,
|
|
1157
|
+
"mode": mode,
|
|
1158
|
+
"success": True,
|
|
1159
|
+
"file_size": write_result.get("size", 0),
|
|
1160
|
+
"checksum": write_result.get("checksum"),
|
|
1161
|
+
"backup_created": backup_info is not None,
|
|
1162
|
+
}
|
|
1163
|
+
|
|
1164
|
+
# Log to audit file
|
|
1165
|
+
try:
|
|
1166
|
+
audit_file = os.path.join(self.config.temp_dir, "write_audit.log")
|
|
1167
|
+
with open(audit_file, "a") as f:
|
|
1168
|
+
f.write(json.dumps(audit_info) + "\n")
|
|
1169
|
+
except Exception as e:
|
|
1170
|
+
self.logger.warning(f"Audit logging failed: {e}")
|
|
1171
|
+
|
|
1172
|
+
return audit_info
|
|
1173
|
+
|
|
1174
|
+
def edit_document(
|
|
1175
|
+
self,
|
|
1176
|
+
target_path: str,
|
|
1177
|
+
operation: EditOperation,
|
|
1178
|
+
content: Optional[str] = None,
|
|
1179
|
+
position: Optional[Dict[str, Any]] = None,
|
|
1180
|
+
selection: Optional[Dict[str, Any]] = None,
|
|
1181
|
+
format_options: Optional[Dict[str, Any]] = None,
|
|
1182
|
+
) -> Dict[str, Any]:
|
|
1183
|
+
"""
|
|
1184
|
+
Perform advanced editing operations on documents
|
|
1185
|
+
|
|
1186
|
+
Args:
|
|
1187
|
+
target_path: Target file path
|
|
1188
|
+
operation: Edit operation to perform
|
|
1189
|
+
content: Content for the operation (if applicable)
|
|
1190
|
+
position: Position info (line, column, offset)
|
|
1191
|
+
selection: Text selection range
|
|
1192
|
+
format_options: Additional format options
|
|
1193
|
+
|
|
1194
|
+
Returns:
|
|
1195
|
+
Dict containing edit results
|
|
1196
|
+
"""
|
|
1197
|
+
try:
|
|
1198
|
+
start_time = datetime.now()
|
|
1199
|
+
operation_id = str(uuid.uuid4())
|
|
1200
|
+
|
|
1201
|
+
self.logger.info(
|
|
1202
|
+
f"Starting edit operation {operation_id}: {operation} on {target_path}"
|
|
1203
|
+
)
|
|
1204
|
+
|
|
1205
|
+
# Read current document content
|
|
1206
|
+
current_content = self._read_document_content(target_path)
|
|
1207
|
+
|
|
1208
|
+
# Perform the specific edit operation
|
|
1209
|
+
if operation == EditOperation.INSERT_TEXT:
|
|
1210
|
+
edited_content = self._insert_text(current_content, content, position)
|
|
1211
|
+
elif operation == EditOperation.DELETE_TEXT:
|
|
1212
|
+
edited_content = self._delete_text(current_content, selection)
|
|
1213
|
+
elif operation == EditOperation.REPLACE_TEXT:
|
|
1214
|
+
edited_content = self._replace_text(current_content, selection, content)
|
|
1215
|
+
elif operation == EditOperation.BOLD:
|
|
1216
|
+
edited_content = self._format_text_bold(current_content, selection, format_options)
|
|
1217
|
+
elif operation == EditOperation.ITALIC:
|
|
1218
|
+
edited_content = self._format_text_italic(
|
|
1219
|
+
current_content, selection, format_options
|
|
1220
|
+
)
|
|
1221
|
+
elif operation == EditOperation.UNDERLINE:
|
|
1222
|
+
edited_content = self._format_text_underline(
|
|
1223
|
+
current_content, selection, format_options
|
|
1224
|
+
)
|
|
1225
|
+
elif operation == EditOperation.STRIKETHROUGH:
|
|
1226
|
+
edited_content = self._format_text_strikethrough(
|
|
1227
|
+
current_content, selection, format_options
|
|
1228
|
+
)
|
|
1229
|
+
elif operation == EditOperation.HIGHLIGHT:
|
|
1230
|
+
edited_content = self._format_text_highlight(
|
|
1231
|
+
current_content, selection, format_options
|
|
1232
|
+
)
|
|
1233
|
+
elif operation == EditOperation.INSERT_LINE:
|
|
1234
|
+
edited_content = self._insert_line(current_content, position, content)
|
|
1235
|
+
elif operation == EditOperation.DELETE_LINE:
|
|
1236
|
+
edited_content = self._delete_line(current_content, position)
|
|
1237
|
+
elif operation == EditOperation.MOVE_LINE:
|
|
1238
|
+
edited_content = self._move_line(current_content, position, format_options)
|
|
1239
|
+
elif operation == EditOperation.COPY_TEXT:
|
|
1240
|
+
return self._copy_text(current_content, selection)
|
|
1241
|
+
elif operation == EditOperation.CUT_TEXT:
|
|
1242
|
+
edited_content, cut_content = self._cut_text(current_content, selection)
|
|
1243
|
+
# Store cut content in clipboard
|
|
1244
|
+
self._store_clipboard_content(cut_content)
|
|
1245
|
+
elif operation == EditOperation.PASTE_TEXT:
|
|
1246
|
+
clipboard_content = self._get_clipboard_content()
|
|
1247
|
+
edited_content = self._paste_text(current_content, position, clipboard_content)
|
|
1248
|
+
else:
|
|
1249
|
+
raise ValueError(f"Unsupported edit operation: {operation}")
|
|
1250
|
+
|
|
1251
|
+
# Write the edited content back to file
|
|
1252
|
+
file_format = self._detect_file_format(target_path)
|
|
1253
|
+
write_result = self.write_document(
|
|
1254
|
+
target_path=target_path,
|
|
1255
|
+
content=edited_content,
|
|
1256
|
+
format=file_format,
|
|
1257
|
+
mode="backup_write", # Always backup before editing
|
|
1258
|
+
backup_comment=f"Edit operation: {operation}",
|
|
1259
|
+
)
|
|
1260
|
+
|
|
1261
|
+
result = {
|
|
1262
|
+
"operation_id": operation_id,
|
|
1263
|
+
"target_path": target_path,
|
|
1264
|
+
"operation": operation,
|
|
1265
|
+
"edit_metadata": {
|
|
1266
|
+
"original_size": len(current_content),
|
|
1267
|
+
"edited_size": (len(edited_content) if isinstance(edited_content, str) else 0),
|
|
1268
|
+
"position": position,
|
|
1269
|
+
"selection": selection,
|
|
1270
|
+
},
|
|
1271
|
+
"write_result": write_result,
|
|
1272
|
+
"processing_metadata": {
|
|
1273
|
+
"start_time": start_time.isoformat(),
|
|
1274
|
+
"end_time": datetime.now().isoformat(),
|
|
1275
|
+
"duration": (datetime.now() - start_time).total_seconds(),
|
|
1276
|
+
},
|
|
1277
|
+
}
|
|
1278
|
+
|
|
1279
|
+
self.logger.info(f"Edit operation {operation_id} completed successfully")
|
|
1280
|
+
return result
|
|
1281
|
+
|
|
1282
|
+
except Exception as e:
|
|
1283
|
+
raise DocumentWriterError(f"Edit operation failed: {str(e)}")
|
|
1284
|
+
|
|
1285
|
+
def format_text(
|
|
1286
|
+
self,
|
|
1287
|
+
target_path: str,
|
|
1288
|
+
text_to_format: str,
|
|
1289
|
+
format_type: EditOperation,
|
|
1290
|
+
format_options: Optional[Dict[str, Any]] = None,
|
|
1291
|
+
) -> Dict[str, Any]:
|
|
1292
|
+
"""
|
|
1293
|
+
Apply formatting to specific text in a document
|
|
1294
|
+
|
|
1295
|
+
Args:
|
|
1296
|
+
target_path: Target file path
|
|
1297
|
+
text_to_format: Text to apply formatting to
|
|
1298
|
+
format_type: Type of formatting (bold, italic, etc.)
|
|
1299
|
+
format_options: Additional format options
|
|
1300
|
+
|
|
1301
|
+
Returns:
|
|
1302
|
+
Dict containing formatting results
|
|
1303
|
+
"""
|
|
1304
|
+
try:
|
|
1305
|
+
current_content = self._read_document_content(target_path)
|
|
1306
|
+
|
|
1307
|
+
# Find all occurrences of the text
|
|
1308
|
+
formatted_content = self._apply_text_formatting(
|
|
1309
|
+
current_content, text_to_format, format_type, format_options
|
|
1310
|
+
)
|
|
1311
|
+
|
|
1312
|
+
# Write back to file
|
|
1313
|
+
file_format = self._detect_file_format(target_path)
|
|
1314
|
+
write_result = self.write_document(
|
|
1315
|
+
target_path=target_path,
|
|
1316
|
+
content=formatted_content,
|
|
1317
|
+
format=file_format,
|
|
1318
|
+
mode="backup_write",
|
|
1319
|
+
)
|
|
1320
|
+
|
|
1321
|
+
return {
|
|
1322
|
+
"target_path": target_path,
|
|
1323
|
+
"text_formatted": text_to_format,
|
|
1324
|
+
"format_type": format_type,
|
|
1325
|
+
"write_result": write_result,
|
|
1326
|
+
}
|
|
1327
|
+
|
|
1328
|
+
except Exception as e:
|
|
1329
|
+
raise DocumentWriterError(f"Text formatting failed: {str(e)}")
|
|
1330
|
+
|
|
1331
|
+
def find_replace(
|
|
1332
|
+
self,
|
|
1333
|
+
target_path: str,
|
|
1334
|
+
find_text: str,
|
|
1335
|
+
replace_text: str,
|
|
1336
|
+
replace_all: bool = False,
|
|
1337
|
+
case_sensitive: bool = True,
|
|
1338
|
+
regex_mode: bool = False,
|
|
1339
|
+
) -> Dict[str, Any]:
|
|
1340
|
+
"""
|
|
1341
|
+
Find and replace text in a document
|
|
1342
|
+
|
|
1343
|
+
Args:
|
|
1344
|
+
target_path: Target file path
|
|
1345
|
+
find_text: Text to find
|
|
1346
|
+
replace_text: Text to replace with
|
|
1347
|
+
replace_all: Replace all occurrences
|
|
1348
|
+
case_sensitive: Case sensitive search
|
|
1349
|
+
regex_mode: Use regex for find/replace
|
|
1350
|
+
|
|
1351
|
+
Returns:
|
|
1352
|
+
Dict containing find/replace results
|
|
1353
|
+
"""
|
|
1354
|
+
try:
|
|
1355
|
+
current_content = self._read_document_content(target_path)
|
|
1356
|
+
|
|
1357
|
+
# Perform find and replace
|
|
1358
|
+
new_content, replacements = self._perform_find_replace(
|
|
1359
|
+
current_content,
|
|
1360
|
+
find_text,
|
|
1361
|
+
replace_text,
|
|
1362
|
+
replace_all,
|
|
1363
|
+
case_sensitive,
|
|
1364
|
+
regex_mode,
|
|
1365
|
+
)
|
|
1366
|
+
|
|
1367
|
+
if replacements > 0:
|
|
1368
|
+
# Write back to file
|
|
1369
|
+
file_format = self._detect_file_format(target_path)
|
|
1370
|
+
write_result = self.write_document(
|
|
1371
|
+
target_path=target_path,
|
|
1372
|
+
content=new_content,
|
|
1373
|
+
format=file_format,
|
|
1374
|
+
mode="backup_write",
|
|
1375
|
+
backup_comment=f"Find/Replace: '{find_text}' -> '{replace_text}'",
|
|
1376
|
+
)
|
|
1377
|
+
|
|
1378
|
+
return {
|
|
1379
|
+
"target_path": target_path,
|
|
1380
|
+
"find_text": find_text,
|
|
1381
|
+
"replace_text": replace_text,
|
|
1382
|
+
"replacements_made": replacements,
|
|
1383
|
+
"write_result": write_result,
|
|
1384
|
+
}
|
|
1385
|
+
else:
|
|
1386
|
+
return {
|
|
1387
|
+
"target_path": target_path,
|
|
1388
|
+
"find_text": find_text,
|
|
1389
|
+
"replace_text": replace_text,
|
|
1390
|
+
"replacements_made": 0,
|
|
1391
|
+
"message": "No matches found",
|
|
1392
|
+
}
|
|
1393
|
+
|
|
1394
|
+
except Exception as e:
|
|
1395
|
+
raise DocumentWriterError(f"Find/replace operation failed: {str(e)}")
|
|
1396
|
+
|
|
1397
|
+
# Helper methods for editing operations
|
|
1398
|
+
def _read_document_content(self, file_path: str) -> str:
|
|
1399
|
+
"""Read document content for editing"""
|
|
1400
|
+
try:
|
|
1401
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
1402
|
+
return f.read()
|
|
1403
|
+
except UnicodeDecodeError:
|
|
1404
|
+
# Try with different encodings
|
|
1405
|
+
for encoding in ["gbk", "latin1", "cp1252"]:
|
|
1406
|
+
try:
|
|
1407
|
+
with open(file_path, "r", encoding=encoding) as f:
|
|
1408
|
+
return f.read()
|
|
1409
|
+
except Exception:
|
|
1410
|
+
continue
|
|
1411
|
+
raise DocumentWriterError(f"Cannot decode file: {file_path}")
|
|
1412
|
+
except Exception as e:
|
|
1413
|
+
raise DocumentWriterError(f"Cannot read file {file_path}: {str(e)}")
|
|
1414
|
+
|
|
1415
|
+
def _detect_file_format(self, file_path: str) -> str:
|
|
1416
|
+
"""Detect file format from extension"""
|
|
1417
|
+
ext = os.path.splitext(file_path)[1].lower()
|
|
1418
|
+
format_map = {
|
|
1419
|
+
".txt": "txt",
|
|
1420
|
+
".json": "json",
|
|
1421
|
+
".csv": "csv",
|
|
1422
|
+
".xml": "xml",
|
|
1423
|
+
".html": "html",
|
|
1424
|
+
".htm": "html",
|
|
1425
|
+
".md": "markdown",
|
|
1426
|
+
".markdown": "markdown",
|
|
1427
|
+
".yaml": "yaml",
|
|
1428
|
+
".yml": "yaml",
|
|
1429
|
+
}
|
|
1430
|
+
return format_map.get(ext, "txt")
|
|
1431
|
+
|
|
1432
|
+
def _insert_text(self, content: str, text: str, position: Optional[Dict[str, Any]]) -> str:
|
|
1433
|
+
"""Insert text at specified position"""
|
|
1434
|
+
if not position:
|
|
1435
|
+
return content + text
|
|
1436
|
+
|
|
1437
|
+
if "offset" in position:
|
|
1438
|
+
offset = position["offset"]
|
|
1439
|
+
return content[:offset] + text + content[offset:]
|
|
1440
|
+
elif "line" in position:
|
|
1441
|
+
lines = content.split("\n")
|
|
1442
|
+
line_num = position.get("line", 0)
|
|
1443
|
+
column = position.get("column", 0)
|
|
1444
|
+
|
|
1445
|
+
if line_num < len(lines):
|
|
1446
|
+
line = lines[line_num]
|
|
1447
|
+
lines[line_num] = line[:column] + text + line[column:]
|
|
1448
|
+
else:
|
|
1449
|
+
lines.append(text)
|
|
1450
|
+
return "\n".join(lines)
|
|
1451
|
+
else:
|
|
1452
|
+
return content + text
|
|
1453
|
+
|
|
1454
|
+
def _delete_text(self, content: str, selection: Optional[Dict[str, Any]]) -> str:
|
|
1455
|
+
"""Delete text in specified selection"""
|
|
1456
|
+
if not selection:
|
|
1457
|
+
return content
|
|
1458
|
+
|
|
1459
|
+
if "start_offset" in selection and "end_offset" in selection:
|
|
1460
|
+
start = selection["start_offset"]
|
|
1461
|
+
end = selection["end_offset"]
|
|
1462
|
+
return content[:start] + content[end:]
|
|
1463
|
+
elif "start_line" in selection and "end_line" in selection:
|
|
1464
|
+
lines = content.split("\n")
|
|
1465
|
+
start_line = selection["start_line"]
|
|
1466
|
+
end_line = selection["end_line"]
|
|
1467
|
+
start_col = selection.get("start_column", 0)
|
|
1468
|
+
end_col = selection.get(
|
|
1469
|
+
"end_column",
|
|
1470
|
+
len(lines[end_line]) if end_line < len(lines) else 0,
|
|
1471
|
+
)
|
|
1472
|
+
|
|
1473
|
+
if start_line == end_line:
|
|
1474
|
+
# Same line deletion
|
|
1475
|
+
line = lines[start_line]
|
|
1476
|
+
lines[start_line] = line[:start_col] + line[end_col:]
|
|
1477
|
+
else:
|
|
1478
|
+
# Multi-line deletion
|
|
1479
|
+
lines[start_line] = lines[start_line][:start_col]
|
|
1480
|
+
if end_line < len(lines):
|
|
1481
|
+
lines[start_line] += lines[end_line][end_col:]
|
|
1482
|
+
del lines[start_line + 1 : end_line + 1]
|
|
1483
|
+
|
|
1484
|
+
return "\n".join(lines)
|
|
1485
|
+
|
|
1486
|
+
return content
|
|
1487
|
+
|
|
1488
|
+
def _replace_text(
|
|
1489
|
+
self,
|
|
1490
|
+
content: str,
|
|
1491
|
+
selection: Optional[Dict[str, Any]],
|
|
1492
|
+
replacement: str,
|
|
1493
|
+
) -> str:
|
|
1494
|
+
"""Replace text in specified selection"""
|
|
1495
|
+
if not selection:
|
|
1496
|
+
return content
|
|
1497
|
+
|
|
1498
|
+
# First delete the selected text, then insert replacement
|
|
1499
|
+
content_after_delete = self._delete_text(content, selection)
|
|
1500
|
+
|
|
1501
|
+
# Calculate new insertion position after deletion
|
|
1502
|
+
if "start_offset" in selection:
|
|
1503
|
+
insert_pos = {"offset": selection["start_offset"]}
|
|
1504
|
+
elif "start_line" in selection:
|
|
1505
|
+
insert_pos = {
|
|
1506
|
+
"line": selection["start_line"],
|
|
1507
|
+
"column": selection.get("start_column", 0),
|
|
1508
|
+
}
|
|
1509
|
+
else:
|
|
1510
|
+
insert_pos = None
|
|
1511
|
+
|
|
1512
|
+
return self._insert_text(content_after_delete, replacement, insert_pos)
|
|
1513
|
+
|
|
1514
|
+
def _format_text_bold(
|
|
1515
|
+
self,
|
|
1516
|
+
content: str,
|
|
1517
|
+
selection: Optional[Dict[str, Any]],
|
|
1518
|
+
options: Optional[Dict[str, Any]],
|
|
1519
|
+
) -> str:
|
|
1520
|
+
"""Apply bold formatting to selected text"""
|
|
1521
|
+
if not selection:
|
|
1522
|
+
return content
|
|
1523
|
+
|
|
1524
|
+
format_type = options.get("format_type", "markdown") if options else "markdown"
|
|
1525
|
+
|
|
1526
|
+
if format_type == "markdown":
|
|
1527
|
+
return self._apply_markdown_formatting(content, selection, "**", "**")
|
|
1528
|
+
elif format_type == "html":
|
|
1529
|
+
return self._apply_html_formatting(content, selection, "<strong>", "</strong>")
|
|
1530
|
+
else:
|
|
1531
|
+
return content
|
|
1532
|
+
|
|
1533
|
+
def _format_text_italic(
|
|
1534
|
+
self,
|
|
1535
|
+
content: str,
|
|
1536
|
+
selection: Optional[Dict[str, Any]],
|
|
1537
|
+
options: Optional[Dict[str, Any]],
|
|
1538
|
+
) -> str:
|
|
1539
|
+
"""Apply italic formatting to selected text"""
|
|
1540
|
+
if not selection:
|
|
1541
|
+
return content
|
|
1542
|
+
|
|
1543
|
+
format_type = options.get("format_type", "markdown") if options else "markdown"
|
|
1544
|
+
|
|
1545
|
+
if format_type == "markdown":
|
|
1546
|
+
return self._apply_markdown_formatting(content, selection, "*", "*")
|
|
1547
|
+
elif format_type == "html":
|
|
1548
|
+
return self._apply_html_formatting(content, selection, "<em>", "</em>")
|
|
1549
|
+
else:
|
|
1550
|
+
return content
|
|
1551
|
+
|
|
1552
|
+
def _format_text_underline(
|
|
1553
|
+
self,
|
|
1554
|
+
content: str,
|
|
1555
|
+
selection: Optional[Dict[str, Any]],
|
|
1556
|
+
options: Optional[Dict[str, Any]],
|
|
1557
|
+
) -> str:
|
|
1558
|
+
"""Apply underline formatting to selected text"""
|
|
1559
|
+
if not selection:
|
|
1560
|
+
return content
|
|
1561
|
+
|
|
1562
|
+
format_type = options.get("format_type", "html") if options else "html"
|
|
1563
|
+
|
|
1564
|
+
if format_type == "html":
|
|
1565
|
+
return self._apply_html_formatting(content, selection, "<u>", "</u>")
|
|
1566
|
+
else:
|
|
1567
|
+
return content
|
|
1568
|
+
|
|
1569
|
+
def _format_text_strikethrough(
|
|
1570
|
+
self,
|
|
1571
|
+
content: str,
|
|
1572
|
+
selection: Optional[Dict[str, Any]],
|
|
1573
|
+
options: Optional[Dict[str, Any]],
|
|
1574
|
+
) -> str:
|
|
1575
|
+
"""Apply strikethrough formatting to selected text"""
|
|
1576
|
+
if not selection:
|
|
1577
|
+
return content
|
|
1578
|
+
|
|
1579
|
+
format_type = options.get("format_type", "markdown") if options else "markdown"
|
|
1580
|
+
|
|
1581
|
+
if format_type == "markdown":
|
|
1582
|
+
return self._apply_markdown_formatting(content, selection, "~~", "~~")
|
|
1583
|
+
elif format_type == "html":
|
|
1584
|
+
return self._apply_html_formatting(content, selection, "<del>", "</del>")
|
|
1585
|
+
else:
|
|
1586
|
+
return content
|
|
1587
|
+
|
|
1588
|
+
def _format_text_highlight(
|
|
1589
|
+
self,
|
|
1590
|
+
content: str,
|
|
1591
|
+
selection: Optional[Dict[str, Any]],
|
|
1592
|
+
options: Optional[Dict[str, Any]],
|
|
1593
|
+
) -> str:
|
|
1594
|
+
"""Apply highlight formatting to selected text"""
|
|
1595
|
+
if not selection:
|
|
1596
|
+
return content
|
|
1597
|
+
|
|
1598
|
+
format_type = options.get("format_type", "html") if options else "html"
|
|
1599
|
+
color = options.get("color", "yellow") if options else "yellow"
|
|
1600
|
+
|
|
1601
|
+
if format_type == "html":
|
|
1602
|
+
return self._apply_html_formatting(
|
|
1603
|
+
content,
|
|
1604
|
+
selection,
|
|
1605
|
+
f'<mark style="background-color: {color}">',
|
|
1606
|
+
"</mark>",
|
|
1607
|
+
)
|
|
1608
|
+
elif format_type == "markdown":
|
|
1609
|
+
return self._apply_markdown_formatting(content, selection, "==", "==")
|
|
1610
|
+
else:
|
|
1611
|
+
return content
|
|
1612
|
+
|
|
1613
|
+
def _apply_markdown_formatting(
|
|
1614
|
+
self,
|
|
1615
|
+
content: str,
|
|
1616
|
+
selection: Dict[str, Any],
|
|
1617
|
+
start_marker: str,
|
|
1618
|
+
end_marker: str,
|
|
1619
|
+
) -> str:
|
|
1620
|
+
"""Apply markdown formatting to selected text"""
|
|
1621
|
+
selected_text = self._extract_selected_text(content, selection)
|
|
1622
|
+
formatted_text = start_marker + selected_text + end_marker
|
|
1623
|
+
return self._replace_text(content, selection, formatted_text)
|
|
1624
|
+
|
|
1625
|
+
def _apply_html_formatting(
|
|
1626
|
+
self,
|
|
1627
|
+
content: str,
|
|
1628
|
+
selection: Dict[str, Any],
|
|
1629
|
+
start_tag: str,
|
|
1630
|
+
end_tag: str,
|
|
1631
|
+
) -> str:
|
|
1632
|
+
"""Apply HTML formatting to selected text"""
|
|
1633
|
+
selected_text = self._extract_selected_text(content, selection)
|
|
1634
|
+
formatted_text = start_tag + selected_text + end_tag
|
|
1635
|
+
return self._replace_text(content, selection, formatted_text)
|
|
1636
|
+
|
|
1637
|
+
def _extract_selected_text(self, content: str, selection: Dict[str, Any]) -> str:
|
|
1638
|
+
"""Extract text from selection"""
|
|
1639
|
+
if "start_offset" in selection and "end_offset" in selection:
|
|
1640
|
+
return content[selection["start_offset"] : selection["end_offset"]]
|
|
1641
|
+
elif "start_line" in selection and "end_line" in selection:
|
|
1642
|
+
lines = content.split("\n")
|
|
1643
|
+
start_line = selection["start_line"]
|
|
1644
|
+
end_line = selection["end_line"]
|
|
1645
|
+
start_col = selection.get("start_column", 0)
|
|
1646
|
+
end_col = selection.get(
|
|
1647
|
+
"end_column",
|
|
1648
|
+
len(lines[end_line]) if end_line < len(lines) else 0,
|
|
1649
|
+
)
|
|
1650
|
+
|
|
1651
|
+
if start_line == end_line:
|
|
1652
|
+
return lines[start_line][start_col:end_col]
|
|
1653
|
+
else:
|
|
1654
|
+
result = [lines[start_line][start_col:]]
|
|
1655
|
+
result.extend(lines[start_line + 1 : end_line])
|
|
1656
|
+
if end_line < len(lines):
|
|
1657
|
+
result.append(lines[end_line][:end_col])
|
|
1658
|
+
return "\n".join(result)
|
|
1659
|
+
return ""
|
|
1660
|
+
|
|
1661
|
+
def _insert_line(
|
|
1662
|
+
self,
|
|
1663
|
+
content: str,
|
|
1664
|
+
position: Optional[Dict[str, Any]],
|
|
1665
|
+
line_content: str,
|
|
1666
|
+
) -> str:
|
|
1667
|
+
"""Insert a new line at specified position"""
|
|
1668
|
+
lines = content.split("\n")
|
|
1669
|
+
line_num = position.get("line", len(lines)) if position else len(lines)
|
|
1670
|
+
|
|
1671
|
+
lines.insert(line_num, line_content)
|
|
1672
|
+
return "\n".join(lines)
|
|
1673
|
+
|
|
1674
|
+
def _delete_line(self, content: str, position: Optional[Dict[str, Any]]) -> str:
|
|
1675
|
+
"""Delete line at specified position"""
|
|
1676
|
+
lines = content.split("\n")
|
|
1677
|
+
line_num = position.get("line", 0) if position else 0
|
|
1678
|
+
|
|
1679
|
+
if 0 <= line_num < len(lines):
|
|
1680
|
+
del lines[line_num]
|
|
1681
|
+
|
|
1682
|
+
return "\n".join(lines)
|
|
1683
|
+
|
|
1684
|
+
def _move_line(
|
|
1685
|
+
self,
|
|
1686
|
+
content: str,
|
|
1687
|
+
position: Optional[Dict[str, Any]],
|
|
1688
|
+
options: Optional[Dict[str, Any]],
|
|
1689
|
+
) -> str:
|
|
1690
|
+
"""Move line to different position"""
|
|
1691
|
+
lines = content.split("\n")
|
|
1692
|
+
from_line = position.get("line", 0) if position else 0
|
|
1693
|
+
to_line = options.get("to_line", 0) if options else 0
|
|
1694
|
+
|
|
1695
|
+
if 0 <= from_line < len(lines) and 0 <= to_line < len(lines):
|
|
1696
|
+
line_content = lines.pop(from_line)
|
|
1697
|
+
lines.insert(to_line, line_content)
|
|
1698
|
+
|
|
1699
|
+
return "\n".join(lines)
|
|
1700
|
+
|
|
1701
|
+
def _copy_text(self, content: str, selection: Optional[Dict[str, Any]]) -> Dict[str, Any]:
|
|
1702
|
+
"""Copy selected text to clipboard"""
|
|
1703
|
+
selected_text = self._extract_selected_text(content, selection) if selection else content
|
|
1704
|
+
self._store_clipboard_content(selected_text)
|
|
1705
|
+
|
|
1706
|
+
return {
|
|
1707
|
+
"operation": "copy",
|
|
1708
|
+
"copied_text": selected_text,
|
|
1709
|
+
"copied_length": len(selected_text),
|
|
1710
|
+
}
|
|
1711
|
+
|
|
1712
|
+
def _cut_text(self, content: str, selection: Optional[Dict[str, Any]]) -> Tuple[str, str]:
|
|
1713
|
+
"""Cut selected text (copy and delete)"""
|
|
1714
|
+
selected_text = self._extract_selected_text(content, selection) if selection else content
|
|
1715
|
+
new_content = self._delete_text(content, selection) if selection else ""
|
|
1716
|
+
|
|
1717
|
+
return new_content, selected_text
|
|
1718
|
+
|
|
1719
|
+
def _paste_text(
|
|
1720
|
+
self,
|
|
1721
|
+
content: str,
|
|
1722
|
+
position: Optional[Dict[str, Any]],
|
|
1723
|
+
clipboard_content: str,
|
|
1724
|
+
) -> str:
|
|
1725
|
+
"""Paste text from clipboard"""
|
|
1726
|
+
return self._insert_text(content, clipboard_content, position)
|
|
1727
|
+
|
|
1728
|
+
def _store_clipboard_content(self, content: str):
|
|
1729
|
+
"""Store content in clipboard (simplified implementation)"""
|
|
1730
|
+
clipboard_file = os.path.join(self.config.temp_dir, "clipboard.txt")
|
|
1731
|
+
try:
|
|
1732
|
+
with open(clipboard_file, "w", encoding="utf-8") as f:
|
|
1733
|
+
f.write(content)
|
|
1734
|
+
except Exception as e:
|
|
1735
|
+
self.logger.warning(f"Failed to store clipboard content: {e}")
|
|
1736
|
+
|
|
1737
|
+
def _get_clipboard_content(self) -> str:
|
|
1738
|
+
"""Get content from clipboard"""
|
|
1739
|
+
clipboard_file = os.path.join(self.config.temp_dir, "clipboard.txt")
|
|
1740
|
+
try:
|
|
1741
|
+
with open(clipboard_file, "r", encoding="utf-8") as f:
|
|
1742
|
+
return f.read()
|
|
1743
|
+
except Exception:
|
|
1744
|
+
return ""
|
|
1745
|
+
|
|
1746
|
+
def _apply_text_formatting(
|
|
1747
|
+
self,
|
|
1748
|
+
content: str,
|
|
1749
|
+
text_to_format: str,
|
|
1750
|
+
format_type: EditOperation,
|
|
1751
|
+
options: Optional[Dict[str, Any]],
|
|
1752
|
+
) -> str:
|
|
1753
|
+
"""Apply formatting to all occurrences of specific text"""
|
|
1754
|
+
if format_type == EditOperation.BOLD:
|
|
1755
|
+
replacement = f"**{text_to_format}**"
|
|
1756
|
+
elif format_type == EditOperation.ITALIC:
|
|
1757
|
+
replacement = f"*{text_to_format}*"
|
|
1758
|
+
elif format_type == EditOperation.UNDERLINE:
|
|
1759
|
+
replacement = f"<u>{text_to_format}</u>"
|
|
1760
|
+
elif format_type == EditOperation.STRIKETHROUGH:
|
|
1761
|
+
replacement = f"~~{text_to_format}~~"
|
|
1762
|
+
elif format_type == EditOperation.HIGHLIGHT:
|
|
1763
|
+
color = options.get("color", "yellow") if options else "yellow"
|
|
1764
|
+
replacement = f'<mark style="background-color: {color}">{text_to_format}</mark>'
|
|
1765
|
+
else:
|
|
1766
|
+
replacement = text_to_format
|
|
1767
|
+
|
|
1768
|
+
return content.replace(text_to_format, replacement)
|
|
1769
|
+
|
|
1770
|
+
def _perform_find_replace(
|
|
1771
|
+
self,
|
|
1772
|
+
content: str,
|
|
1773
|
+
find_text: str,
|
|
1774
|
+
replace_text: str,
|
|
1775
|
+
replace_all: bool,
|
|
1776
|
+
case_sensitive: bool,
|
|
1777
|
+
regex_mode: bool,
|
|
1778
|
+
) -> Tuple[str, int]:
|
|
1779
|
+
"""Perform find and replace operation"""
|
|
1780
|
+
import re
|
|
1781
|
+
|
|
1782
|
+
replacements = 0
|
|
1783
|
+
|
|
1784
|
+
if regex_mode:
|
|
1785
|
+
flags = 0 if case_sensitive else re.IGNORECASE
|
|
1786
|
+
if replace_all:
|
|
1787
|
+
new_content, replacements = re.subn(find_text, replace_text, content, flags=flags)
|
|
1788
|
+
else:
|
|
1789
|
+
new_content = re.sub(find_text, replace_text, content, count=1, flags=flags)
|
|
1790
|
+
replacements = 1 if new_content != content else 0
|
|
1791
|
+
else:
|
|
1792
|
+
if case_sensitive:
|
|
1793
|
+
if replace_all:
|
|
1794
|
+
replacements = content.count(find_text)
|
|
1795
|
+
new_content = content.replace(find_text, replace_text)
|
|
1796
|
+
else:
|
|
1797
|
+
new_content = content.replace(find_text, replace_text, 1)
|
|
1798
|
+
replacements = 1 if new_content != content else 0
|
|
1799
|
+
else:
|
|
1800
|
+
# Case insensitive replacement
|
|
1801
|
+
import re
|
|
1802
|
+
|
|
1803
|
+
pattern = re.escape(find_text)
|
|
1804
|
+
if replace_all:
|
|
1805
|
+
new_content, replacements = re.subn(
|
|
1806
|
+
pattern, replace_text, content, flags=re.IGNORECASE
|
|
1807
|
+
)
|
|
1808
|
+
else:
|
|
1809
|
+
new_content = re.sub(
|
|
1810
|
+
pattern,
|
|
1811
|
+
replace_text,
|
|
1812
|
+
content,
|
|
1813
|
+
count=1,
|
|
1814
|
+
flags=re.IGNORECASE,
|
|
1815
|
+
)
|
|
1816
|
+
replacements = 1 if new_content != content else 0
|
|
1817
|
+
|
|
1818
|
+
return new_content, replacements
|