aiecs 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (302) hide show
  1. aiecs/__init__.py +72 -0
  2. aiecs/__main__.py +41 -0
  3. aiecs/aiecs_client.py +469 -0
  4. aiecs/application/__init__.py +10 -0
  5. aiecs/application/executors/__init__.py +10 -0
  6. aiecs/application/executors/operation_executor.py +363 -0
  7. aiecs/application/knowledge_graph/__init__.py +7 -0
  8. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +375 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +356 -0
  11. aiecs/application/knowledge_graph/builder/schema_mapping.py +531 -0
  12. aiecs/application/knowledge_graph/builder/structured_pipeline.py +443 -0
  13. aiecs/application/knowledge_graph/builder/text_chunker.py +319 -0
  14. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  15. aiecs/application/knowledge_graph/extractors/base.py +100 -0
  16. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +327 -0
  17. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +349 -0
  18. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +244 -0
  19. aiecs/application/knowledge_graph/fusion/__init__.py +23 -0
  20. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +387 -0
  21. aiecs/application/knowledge_graph/fusion/entity_linker.py +343 -0
  22. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +580 -0
  23. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +189 -0
  24. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  25. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +344 -0
  26. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +378 -0
  27. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  28. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +199 -0
  29. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  30. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  31. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +347 -0
  32. aiecs/application/knowledge_graph/reasoning/inference_engine.py +504 -0
  33. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +167 -0
  34. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  35. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  36. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +630 -0
  37. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +654 -0
  38. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +477 -0
  39. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +390 -0
  40. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +217 -0
  41. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +169 -0
  42. aiecs/application/knowledge_graph/reasoning/query_planner.py +872 -0
  43. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +554 -0
  44. aiecs/application/knowledge_graph/retrieval/__init__.py +19 -0
  45. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +596 -0
  46. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  47. aiecs/application/knowledge_graph/search/hybrid_search.py +423 -0
  48. aiecs/application/knowledge_graph/search/reranker.py +295 -0
  49. aiecs/application/knowledge_graph/search/reranker_strategies.py +553 -0
  50. aiecs/application/knowledge_graph/search/text_similarity.py +398 -0
  51. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  52. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +329 -0
  53. aiecs/application/knowledge_graph/traversal/path_scorer.py +269 -0
  54. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  55. aiecs/application/knowledge_graph/validators/relation_validator.py +189 -0
  56. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  57. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +321 -0
  58. aiecs/common/__init__.py +9 -0
  59. aiecs/common/knowledge_graph/__init__.py +17 -0
  60. aiecs/common/knowledge_graph/runnable.py +484 -0
  61. aiecs/config/__init__.py +16 -0
  62. aiecs/config/config.py +498 -0
  63. aiecs/config/graph_config.py +137 -0
  64. aiecs/config/registry.py +23 -0
  65. aiecs/core/__init__.py +46 -0
  66. aiecs/core/interface/__init__.py +34 -0
  67. aiecs/core/interface/execution_interface.py +152 -0
  68. aiecs/core/interface/storage_interface.py +171 -0
  69. aiecs/domain/__init__.py +289 -0
  70. aiecs/domain/agent/__init__.py +189 -0
  71. aiecs/domain/agent/base_agent.py +697 -0
  72. aiecs/domain/agent/exceptions.py +103 -0
  73. aiecs/domain/agent/graph_aware_mixin.py +559 -0
  74. aiecs/domain/agent/hybrid_agent.py +490 -0
  75. aiecs/domain/agent/integration/__init__.py +26 -0
  76. aiecs/domain/agent/integration/context_compressor.py +222 -0
  77. aiecs/domain/agent/integration/context_engine_adapter.py +252 -0
  78. aiecs/domain/agent/integration/retry_policy.py +219 -0
  79. aiecs/domain/agent/integration/role_config.py +213 -0
  80. aiecs/domain/agent/knowledge_aware_agent.py +646 -0
  81. aiecs/domain/agent/lifecycle.py +296 -0
  82. aiecs/domain/agent/llm_agent.py +300 -0
  83. aiecs/domain/agent/memory/__init__.py +12 -0
  84. aiecs/domain/agent/memory/conversation.py +197 -0
  85. aiecs/domain/agent/migration/__init__.py +14 -0
  86. aiecs/domain/agent/migration/conversion.py +160 -0
  87. aiecs/domain/agent/migration/legacy_wrapper.py +90 -0
  88. aiecs/domain/agent/models.py +317 -0
  89. aiecs/domain/agent/observability.py +407 -0
  90. aiecs/domain/agent/persistence.py +289 -0
  91. aiecs/domain/agent/prompts/__init__.py +29 -0
  92. aiecs/domain/agent/prompts/builder.py +161 -0
  93. aiecs/domain/agent/prompts/formatters.py +189 -0
  94. aiecs/domain/agent/prompts/template.py +255 -0
  95. aiecs/domain/agent/registry.py +260 -0
  96. aiecs/domain/agent/tool_agent.py +257 -0
  97. aiecs/domain/agent/tools/__init__.py +12 -0
  98. aiecs/domain/agent/tools/schema_generator.py +221 -0
  99. aiecs/domain/community/__init__.py +155 -0
  100. aiecs/domain/community/agent_adapter.py +477 -0
  101. aiecs/domain/community/analytics.py +481 -0
  102. aiecs/domain/community/collaborative_workflow.py +642 -0
  103. aiecs/domain/community/communication_hub.py +645 -0
  104. aiecs/domain/community/community_builder.py +320 -0
  105. aiecs/domain/community/community_integration.py +800 -0
  106. aiecs/domain/community/community_manager.py +813 -0
  107. aiecs/domain/community/decision_engine.py +879 -0
  108. aiecs/domain/community/exceptions.py +225 -0
  109. aiecs/domain/community/models/__init__.py +33 -0
  110. aiecs/domain/community/models/community_models.py +268 -0
  111. aiecs/domain/community/resource_manager.py +457 -0
  112. aiecs/domain/community/shared_context_manager.py +603 -0
  113. aiecs/domain/context/__init__.py +58 -0
  114. aiecs/domain/context/context_engine.py +989 -0
  115. aiecs/domain/context/conversation_models.py +354 -0
  116. aiecs/domain/context/graph_memory.py +467 -0
  117. aiecs/domain/execution/__init__.py +12 -0
  118. aiecs/domain/execution/model.py +57 -0
  119. aiecs/domain/knowledge_graph/__init__.py +19 -0
  120. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  121. aiecs/domain/knowledge_graph/models/entity.py +130 -0
  122. aiecs/domain/knowledge_graph/models/evidence.py +194 -0
  123. aiecs/domain/knowledge_graph/models/inference_rule.py +186 -0
  124. aiecs/domain/knowledge_graph/models/path.py +179 -0
  125. aiecs/domain/knowledge_graph/models/path_pattern.py +173 -0
  126. aiecs/domain/knowledge_graph/models/query.py +272 -0
  127. aiecs/domain/knowledge_graph/models/query_plan.py +187 -0
  128. aiecs/domain/knowledge_graph/models/relation.py +136 -0
  129. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  130. aiecs/domain/knowledge_graph/schema/entity_type.py +135 -0
  131. aiecs/domain/knowledge_graph/schema/graph_schema.py +271 -0
  132. aiecs/domain/knowledge_graph/schema/property_schema.py +155 -0
  133. aiecs/domain/knowledge_graph/schema/relation_type.py +171 -0
  134. aiecs/domain/knowledge_graph/schema/schema_manager.py +496 -0
  135. aiecs/domain/knowledge_graph/schema/type_enums.py +205 -0
  136. aiecs/domain/task/__init__.py +13 -0
  137. aiecs/domain/task/dsl_processor.py +613 -0
  138. aiecs/domain/task/model.py +62 -0
  139. aiecs/domain/task/task_context.py +268 -0
  140. aiecs/infrastructure/__init__.py +24 -0
  141. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  142. aiecs/infrastructure/graph_storage/base.py +601 -0
  143. aiecs/infrastructure/graph_storage/batch_operations.py +449 -0
  144. aiecs/infrastructure/graph_storage/cache.py +429 -0
  145. aiecs/infrastructure/graph_storage/distributed.py +226 -0
  146. aiecs/infrastructure/graph_storage/error_handling.py +390 -0
  147. aiecs/infrastructure/graph_storage/graceful_degradation.py +306 -0
  148. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  149. aiecs/infrastructure/graph_storage/in_memory.py +514 -0
  150. aiecs/infrastructure/graph_storage/index_optimization.py +483 -0
  151. aiecs/infrastructure/graph_storage/lazy_loading.py +410 -0
  152. aiecs/infrastructure/graph_storage/metrics.py +357 -0
  153. aiecs/infrastructure/graph_storage/migration.py +413 -0
  154. aiecs/infrastructure/graph_storage/pagination.py +471 -0
  155. aiecs/infrastructure/graph_storage/performance_monitoring.py +466 -0
  156. aiecs/infrastructure/graph_storage/postgres.py +871 -0
  157. aiecs/infrastructure/graph_storage/query_optimizer.py +635 -0
  158. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  159. aiecs/infrastructure/graph_storage/sqlite.py +623 -0
  160. aiecs/infrastructure/graph_storage/streaming.py +495 -0
  161. aiecs/infrastructure/messaging/__init__.py +13 -0
  162. aiecs/infrastructure/messaging/celery_task_manager.py +383 -0
  163. aiecs/infrastructure/messaging/websocket_manager.py +298 -0
  164. aiecs/infrastructure/monitoring/__init__.py +34 -0
  165. aiecs/infrastructure/monitoring/executor_metrics.py +174 -0
  166. aiecs/infrastructure/monitoring/global_metrics_manager.py +213 -0
  167. aiecs/infrastructure/monitoring/structured_logger.py +48 -0
  168. aiecs/infrastructure/monitoring/tracing_manager.py +410 -0
  169. aiecs/infrastructure/persistence/__init__.py +24 -0
  170. aiecs/infrastructure/persistence/context_engine_client.py +187 -0
  171. aiecs/infrastructure/persistence/database_manager.py +333 -0
  172. aiecs/infrastructure/persistence/file_storage.py +754 -0
  173. aiecs/infrastructure/persistence/redis_client.py +220 -0
  174. aiecs/llm/__init__.py +86 -0
  175. aiecs/llm/callbacks/__init__.py +11 -0
  176. aiecs/llm/callbacks/custom_callbacks.py +264 -0
  177. aiecs/llm/client_factory.py +420 -0
  178. aiecs/llm/clients/__init__.py +33 -0
  179. aiecs/llm/clients/base_client.py +193 -0
  180. aiecs/llm/clients/googleai_client.py +181 -0
  181. aiecs/llm/clients/openai_client.py +131 -0
  182. aiecs/llm/clients/vertex_client.py +437 -0
  183. aiecs/llm/clients/xai_client.py +184 -0
  184. aiecs/llm/config/__init__.py +51 -0
  185. aiecs/llm/config/config_loader.py +275 -0
  186. aiecs/llm/config/config_validator.py +236 -0
  187. aiecs/llm/config/model_config.py +151 -0
  188. aiecs/llm/utils/__init__.py +10 -0
  189. aiecs/llm/utils/validate_config.py +91 -0
  190. aiecs/main.py +363 -0
  191. aiecs/scripts/__init__.py +3 -0
  192. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  193. aiecs/scripts/aid/__init__.py +19 -0
  194. aiecs/scripts/aid/version_manager.py +215 -0
  195. aiecs/scripts/dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md +242 -0
  196. aiecs/scripts/dependance_check/README_DEPENDENCY_CHECKER.md +310 -0
  197. aiecs/scripts/dependance_check/__init__.py +17 -0
  198. aiecs/scripts/dependance_check/dependency_checker.py +938 -0
  199. aiecs/scripts/dependance_check/dependency_fixer.py +391 -0
  200. aiecs/scripts/dependance_check/download_nlp_data.py +396 -0
  201. aiecs/scripts/dependance_check/quick_dependency_check.py +270 -0
  202. aiecs/scripts/dependance_check/setup_nlp_data.sh +217 -0
  203. aiecs/scripts/dependance_patch/__init__.py +7 -0
  204. aiecs/scripts/dependance_patch/fix_weasel/README_WEASEL_PATCH.md +126 -0
  205. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  206. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.py +128 -0
  207. aiecs/scripts/dependance_patch/fix_weasel/fix_weasel_validator.sh +82 -0
  208. aiecs/scripts/dependance_patch/fix_weasel/patch_weasel_library.sh +188 -0
  209. aiecs/scripts/dependance_patch/fix_weasel/run_weasel_patch.sh +41 -0
  210. aiecs/scripts/tools_develop/README.md +449 -0
  211. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  212. aiecs/scripts/tools_develop/__init__.py +21 -0
  213. aiecs/scripts/tools_develop/check_type_annotations.py +259 -0
  214. aiecs/scripts/tools_develop/validate_tool_schemas.py +422 -0
  215. aiecs/scripts/tools_develop/verify_tools.py +356 -0
  216. aiecs/tasks/__init__.py +1 -0
  217. aiecs/tasks/worker.py +172 -0
  218. aiecs/tools/__init__.py +299 -0
  219. aiecs/tools/apisource/__init__.py +99 -0
  220. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  221. aiecs/tools/apisource/intelligence/data_fusion.py +381 -0
  222. aiecs/tools/apisource/intelligence/query_analyzer.py +413 -0
  223. aiecs/tools/apisource/intelligence/search_enhancer.py +388 -0
  224. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  225. aiecs/tools/apisource/monitoring/metrics.py +303 -0
  226. aiecs/tools/apisource/providers/__init__.py +115 -0
  227. aiecs/tools/apisource/providers/base.py +664 -0
  228. aiecs/tools/apisource/providers/census.py +401 -0
  229. aiecs/tools/apisource/providers/fred.py +564 -0
  230. aiecs/tools/apisource/providers/newsapi.py +412 -0
  231. aiecs/tools/apisource/providers/worldbank.py +357 -0
  232. aiecs/tools/apisource/reliability/__init__.py +12 -0
  233. aiecs/tools/apisource/reliability/error_handler.py +375 -0
  234. aiecs/tools/apisource/reliability/fallback_strategy.py +391 -0
  235. aiecs/tools/apisource/tool.py +850 -0
  236. aiecs/tools/apisource/utils/__init__.py +9 -0
  237. aiecs/tools/apisource/utils/validators.py +338 -0
  238. aiecs/tools/base_tool.py +201 -0
  239. aiecs/tools/docs/__init__.py +121 -0
  240. aiecs/tools/docs/ai_document_orchestrator.py +599 -0
  241. aiecs/tools/docs/ai_document_writer_orchestrator.py +2403 -0
  242. aiecs/tools/docs/content_insertion_tool.py +1333 -0
  243. aiecs/tools/docs/document_creator_tool.py +1317 -0
  244. aiecs/tools/docs/document_layout_tool.py +1166 -0
  245. aiecs/tools/docs/document_parser_tool.py +994 -0
  246. aiecs/tools/docs/document_writer_tool.py +1818 -0
  247. aiecs/tools/knowledge_graph/__init__.py +17 -0
  248. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +734 -0
  249. aiecs/tools/knowledge_graph/graph_search_tool.py +923 -0
  250. aiecs/tools/knowledge_graph/kg_builder_tool.py +476 -0
  251. aiecs/tools/langchain_adapter.py +542 -0
  252. aiecs/tools/schema_generator.py +275 -0
  253. aiecs/tools/search_tool/__init__.py +100 -0
  254. aiecs/tools/search_tool/analyzers.py +589 -0
  255. aiecs/tools/search_tool/cache.py +260 -0
  256. aiecs/tools/search_tool/constants.py +128 -0
  257. aiecs/tools/search_tool/context.py +216 -0
  258. aiecs/tools/search_tool/core.py +749 -0
  259. aiecs/tools/search_tool/deduplicator.py +123 -0
  260. aiecs/tools/search_tool/error_handler.py +271 -0
  261. aiecs/tools/search_tool/metrics.py +371 -0
  262. aiecs/tools/search_tool/rate_limiter.py +178 -0
  263. aiecs/tools/search_tool/schemas.py +277 -0
  264. aiecs/tools/statistics/__init__.py +80 -0
  265. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +643 -0
  266. aiecs/tools/statistics/ai_insight_generator_tool.py +505 -0
  267. aiecs/tools/statistics/ai_report_orchestrator_tool.py +694 -0
  268. aiecs/tools/statistics/data_loader_tool.py +564 -0
  269. aiecs/tools/statistics/data_profiler_tool.py +658 -0
  270. aiecs/tools/statistics/data_transformer_tool.py +573 -0
  271. aiecs/tools/statistics/data_visualizer_tool.py +495 -0
  272. aiecs/tools/statistics/model_trainer_tool.py +487 -0
  273. aiecs/tools/statistics/statistical_analyzer_tool.py +459 -0
  274. aiecs/tools/task_tools/__init__.py +86 -0
  275. aiecs/tools/task_tools/chart_tool.py +732 -0
  276. aiecs/tools/task_tools/classfire_tool.py +922 -0
  277. aiecs/tools/task_tools/image_tool.py +447 -0
  278. aiecs/tools/task_tools/office_tool.py +684 -0
  279. aiecs/tools/task_tools/pandas_tool.py +635 -0
  280. aiecs/tools/task_tools/report_tool.py +635 -0
  281. aiecs/tools/task_tools/research_tool.py +392 -0
  282. aiecs/tools/task_tools/scraper_tool.py +715 -0
  283. aiecs/tools/task_tools/stats_tool.py +688 -0
  284. aiecs/tools/temp_file_manager.py +130 -0
  285. aiecs/tools/tool_executor/__init__.py +37 -0
  286. aiecs/tools/tool_executor/tool_executor.py +881 -0
  287. aiecs/utils/LLM_output_structor.py +445 -0
  288. aiecs/utils/__init__.py +34 -0
  289. aiecs/utils/base_callback.py +47 -0
  290. aiecs/utils/cache_provider.py +695 -0
  291. aiecs/utils/execution_utils.py +184 -0
  292. aiecs/utils/logging.py +1 -0
  293. aiecs/utils/prompt_loader.py +14 -0
  294. aiecs/utils/token_usage_repository.py +323 -0
  295. aiecs/ws/__init__.py +0 -0
  296. aiecs/ws/socket_server.py +52 -0
  297. aiecs-1.5.1.dist-info/METADATA +608 -0
  298. aiecs-1.5.1.dist-info/RECORD +302 -0
  299. aiecs-1.5.1.dist-info/WHEEL +5 -0
  300. aiecs-1.5.1.dist-info/entry_points.txt +10 -0
  301. aiecs-1.5.1.dist-info/licenses/LICENSE +225 -0
  302. aiecs-1.5.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1818 @@
1
+ import os
2
+ import json
3
+ import uuid
4
+ import hashlib
5
+ import logging
6
+ import asyncio
7
+ import shutil
8
+ from typing import Dict, Any, List, Optional, Union, Tuple
9
+ from enum import Enum
10
+ from datetime import datetime
11
+ from pathlib import Path
12
+ import tempfile
13
+
14
+ from pydantic import BaseModel, Field
15
+ from pydantic_settings import BaseSettings, SettingsConfigDict
16
+
17
+ from aiecs.tools.base_tool import BaseTool
18
+ from aiecs.tools import register_tool
19
+
20
+
21
+ class DocumentFormat(str, Enum):
22
+ """Supported document formats for writing"""
23
+
24
+ TXT = "txt"
25
+ PLAIN_TEXT = "txt" # Alias for TXT
26
+ JSON = "json"
27
+ CSV = "csv"
28
+ XML = "xml"
29
+ MARKDOWN = "md"
30
+ HTML = "html"
31
+ YAML = "yaml"
32
+ PDF = "pdf"
33
+ DOCX = "docx"
34
+ XLSX = "xlsx"
35
+ BINARY = "binary"
36
+
37
+
38
+ class WriteMode(str, Enum):
39
+ """Document writing modes"""
40
+
41
+ CREATE = "create" # 创建新文件,如果存在则失败
42
+ OVERWRITE = "overwrite" # 覆盖现有文件
43
+ APPEND = "append" # 追加到现有文件
44
+ UPDATE = "update" # 更新现有文件(智能合并)
45
+ BACKUP_WRITE = "backup_write" # 备份后写入
46
+ VERSION_WRITE = "version_write" # 版本化写入
47
+ INSERT = "insert" # 在指定位置插入内容
48
+ REPLACE = "replace" # 替换指定内容
49
+ DELETE = "delete" # 删除指定内容
50
+
51
+
52
+ class EditOperation(str, Enum):
53
+ """Advanced edit operations"""
54
+
55
+ BOLD = "bold" # 加粗文本
56
+ ITALIC = "italic" # 斜体文本
57
+ UNDERLINE = "underline" # 下划线文本
58
+ STRIKETHROUGH = "strikethrough" # 删除线文本
59
+ HIGHLIGHT = "highlight" # 高亮文本
60
+ INSERT_TEXT = "insert_text" # 插入文本
61
+ DELETE_TEXT = "delete_text" # 删除文本
62
+ REPLACE_TEXT = "replace_text" # 替换文本
63
+ COPY_TEXT = "copy_text" # 复制文本
64
+ CUT_TEXT = "cut_text" # 剪切文本
65
+ PASTE_TEXT = "paste_text" # 粘贴文本
66
+ FIND_REPLACE = "find_replace" # 查找替换
67
+ INSERT_LINE = "insert_line" # 插入行
68
+ DELETE_LINE = "delete_line" # 删除行
69
+ MOVE_LINE = "move_line" # 移动行
70
+
71
+
72
+ class EncodingType(str, Enum):
73
+ """Text encoding types"""
74
+
75
+ UTF8 = "utf-8"
76
+ UTF16 = "utf-16"
77
+ ASCII = "ascii"
78
+ GBK = "gbk"
79
+ AUTO = "auto"
80
+
81
+
82
+ class ValidationLevel(str, Enum):
83
+ """Content validation levels"""
84
+
85
+ NONE = "none" # 无验证
86
+ BASIC = "basic" # 基础验证(格式、大小)
87
+ STRICT = "strict" # 严格验证(内容、结构)
88
+ ENTERPRISE = "enterprise" # 企业级验证(安全、合规)
89
+
90
+
91
+ class DocumentWriterError(Exception):
92
+ """Base exception for document writer errors"""
93
+
94
+
95
+ class WriteError(DocumentWriterError):
96
+ """Raised when write operations fail"""
97
+
98
+
99
+ class ValidationError(DocumentWriterError):
100
+ """Raised when validation fails"""
101
+
102
+
103
+ class SecurityError(DocumentWriterError):
104
+ """Raised when security validation fails"""
105
+
106
+
107
+ class WritePermissionError(DocumentWriterError):
108
+ """Raised when write permission is denied"""
109
+
110
+
111
+ class ContentValidationError(DocumentWriterError):
112
+ """Raised when content validation fails"""
113
+
114
+
115
+ class StorageError(DocumentWriterError):
116
+ """Raised when storage operations fail"""
117
+
118
+
119
+ @register_tool("document_writer")
120
+ class DocumentWriterTool(BaseTool):
121
+ """
122
+ Modern high-performance document writing component that can:
123
+ 1. Handle multiple document formats and encodings
124
+ 2. Provide production-grade write operations with validation
125
+ 3. Support various write modes (create, overwrite, append, update)
126
+ 4. Implement backup and versioning strategies
127
+ 5. Ensure atomic operations and data integrity
128
+ 6. Support both local and cloud storage
129
+
130
+ Production Features:
131
+ - Atomic writes (no partial writes)
132
+ - Content validation and security scanning
133
+ - Automatic backup and versioning
134
+ - Write permission and quota checks
135
+ - Transaction-like operations
136
+ - Audit logging
137
+ """
138
+
139
+ # Configuration schema
140
+ class Config(BaseSettings):
141
+ """Configuration for the document writer tool
142
+
143
+ Automatically reads from environment variables with DOC_WRITER_ prefix.
144
+ Example: DOC_WRITER_GCS_PROJECT_ID -> gcs_project_id
145
+ """
146
+
147
+ model_config = SettingsConfigDict(env_prefix="DOC_WRITER_")
148
+
149
+ temp_dir: str = Field(
150
+ default=os.path.join(tempfile.gettempdir(), "document_writer"),
151
+ description="Temporary directory for document processing",
152
+ )
153
+ backup_dir: str = Field(
154
+ default=os.path.join(tempfile.gettempdir(), "document_backups"),
155
+ description="Directory for document backups",
156
+ )
157
+ output_dir: Optional[str] = Field(
158
+ default=None, description="Default output directory for documents"
159
+ )
160
+ max_file_size: int = Field(
161
+ default=100 * 1024 * 1024, description="Maximum file size in bytes"
162
+ )
163
+ max_backup_versions: int = Field(
164
+ default=10, description="Maximum number of backup versions to keep"
165
+ )
166
+ default_encoding: str = Field(
167
+ default="utf-8", description="Default text encoding for documents"
168
+ )
169
+ enable_backup: bool = Field(
170
+ default=True,
171
+ description="Whether to enable automatic backup functionality",
172
+ )
173
+ enable_versioning: bool = Field(
174
+ default=True, description="Whether to enable document versioning"
175
+ )
176
+ enable_content_validation: bool = Field(
177
+ default=True, description="Whether to enable content validation"
178
+ )
179
+ enable_security_scan: bool = Field(
180
+ default=True, description="Whether to enable security scanning"
181
+ )
182
+ atomic_write: bool = Field(
183
+ default=True, description="Whether to use atomic write operations"
184
+ )
185
+ validation_level: str = Field(default="basic", description="Content validation level")
186
+ timeout_seconds: int = Field(default=60, description="Operation timeout in seconds")
187
+ auto_backup: bool = Field(
188
+ default=True,
189
+ description="Whether to automatically backup before write operations",
190
+ )
191
+ atomic_writes: bool = Field(
192
+ default=True, description="Whether to use atomic write operations"
193
+ )
194
+ default_format: str = Field(default="md", description="Default document format")
195
+ version_control: bool = Field(default=True, description="Whether to enable version control")
196
+ security_scan: bool = Field(default=True, description="Whether to enable security scanning")
197
+ enable_cloud_storage: bool = Field(
198
+ default=True,
199
+ description="Whether to enable cloud storage integration",
200
+ )
201
+ gcs_bucket_name: str = Field(
202
+ default="aiecs-documents",
203
+ description="Google Cloud Storage bucket name",
204
+ )
205
+ gcs_project_id: Optional[str] = Field(
206
+ default=None, description="Google Cloud Storage project ID"
207
+ )
208
+
209
+ def __init__(self, config: Optional[Dict] = None):
210
+ """Initialize DocumentWriterTool with settings"""
211
+ super().__init__(config)
212
+
213
+ # Parse configuration
214
+ self.config = self.Config(**(config or {}))
215
+
216
+ self.logger = logging.getLogger(__name__)
217
+
218
+ # Create necessary directories
219
+ os.makedirs(self.config.temp_dir, exist_ok=True)
220
+ os.makedirs(self.config.backup_dir, exist_ok=True)
221
+
222
+ # Initialize cloud storage
223
+ self._init_cloud_storage()
224
+
225
+ # Initialize content validators
226
+ self._init_validators()
227
+
228
+ def _init_cloud_storage(self):
229
+ """Initialize cloud storage for document writing"""
230
+ self.file_storage = None
231
+
232
+ if self.config.enable_cloud_storage:
233
+ try:
234
+ from aiecs.infrastructure.persistence.file_storage import (
235
+ FileStorage,
236
+ )
237
+
238
+ storage_config = {
239
+ "gcs_bucket_name": self.config.gcs_bucket_name,
240
+ "gcs_project_id": self.config.gcs_project_id,
241
+ "enable_local_fallback": True,
242
+ "local_storage_path": self.config.temp_dir,
243
+ }
244
+
245
+ self.file_storage = FileStorage(storage_config)
246
+ asyncio.create_task(self._init_storage_async())
247
+
248
+ except ImportError:
249
+ self.logger.warning("FileStorage not available, cloud storage disabled")
250
+ except Exception as e:
251
+ self.logger.warning(f"Failed to initialize cloud storage: {e}")
252
+
253
+ async def _init_storage_async(self):
254
+ """Async initialization of file storage"""
255
+ try:
256
+ if self.file_storage:
257
+ await self.file_storage.initialize()
258
+ self.logger.info("Cloud storage initialized successfully")
259
+ except Exception as e:
260
+ self.logger.warning(f"Cloud storage initialization failed: {e}")
261
+ self.file_storage = None
262
+
263
+ def _init_validators(self):
264
+ """Initialize content validators"""
265
+ self.validators = {
266
+ DocumentFormat.JSON: self._validate_json_content,
267
+ DocumentFormat.XML: self._validate_xml_content,
268
+ DocumentFormat.CSV: self._validate_csv_content,
269
+ DocumentFormat.YAML: self._validate_yaml_content,
270
+ DocumentFormat.HTML: self._validate_html_content,
271
+ }
272
+
273
+ # Schema definitions
274
+ class WriteDocumentSchema(BaseModel):
275
+ """Schema for write_document operation"""
276
+
277
+ target_path: str = Field(description="Target file path (local or cloud)")
278
+ content: Union[str, bytes, Dict, List] = Field(description="Content to write")
279
+ format: DocumentFormat = Field(description="Document format")
280
+ mode: WriteMode = Field(default=WriteMode.CREATE, description="Write mode")
281
+ encoding: EncodingType = Field(default=EncodingType.UTF8, description="Text encoding")
282
+ validation_level: ValidationLevel = Field(
283
+ default=ValidationLevel.BASIC, description="Validation level"
284
+ )
285
+ metadata: Optional[Dict[str, Any]] = Field(default=None, description="Additional metadata")
286
+ backup_comment: Optional[str] = Field(default=None, description="Backup comment")
287
+
288
+ class BatchWriteSchema(BaseModel):
289
+ """Schema for batch_write_documents operation"""
290
+
291
+ write_operations: List[Dict[str, Any]] = Field(description="List of write operations")
292
+ transaction_mode: bool = Field(default=True, description="Use transaction mode")
293
+ rollback_on_error: bool = Field(default=True, description="Rollback on any error")
294
+
295
+ class EditDocumentSchema(BaseModel):
296
+ """Schema for edit_document operation"""
297
+
298
+ target_path: str = Field(description="Target file path")
299
+ operation: EditOperation = Field(description="Edit operation to perform")
300
+ content: Optional[str] = Field(default=None, description="Content for the operation")
301
+ position: Optional[Dict[str, Any]] = Field(
302
+ default=None, description="Position info (line, column, offset)"
303
+ )
304
+ selection: Optional[Dict[str, Any]] = Field(
305
+ default=None, description="Text selection range"
306
+ )
307
+ format_options: Optional[Dict[str, Any]] = Field(
308
+ default=None, description="Formatting options"
309
+ )
310
+
311
+ class FormatTextSchema(BaseModel):
312
+ """Schema for format_text operation"""
313
+
314
+ target_path: str = Field(description="Target file path")
315
+ text_to_format: str = Field(description="Text to apply formatting to")
316
+ format_type: EditOperation = Field(description="Type of formatting")
317
+ format_options: Optional[Dict[str, Any]] = Field(
318
+ default=None, description="Additional format options"
319
+ )
320
+
321
+ class FindReplaceSchema(BaseModel):
322
+ """Schema for find_replace operation"""
323
+
324
+ target_path: str = Field(description="Target file path")
325
+ find_text: str = Field(description="Text to find")
326
+ replace_text: str = Field(description="Text to replace with")
327
+ replace_all: bool = Field(default=False, description="Replace all occurrences")
328
+ case_sensitive: bool = Field(default=True, description="Case sensitive search")
329
+ regex_mode: bool = Field(default=False, description="Use regex for find/replace")
330
+
331
+ def write_document(
332
+ self,
333
+ target_path: str,
334
+ content: Union[str, bytes, Dict, List],
335
+ format: DocumentFormat,
336
+ mode: WriteMode = WriteMode.CREATE,
337
+ encoding: EncodingType = EncodingType.UTF8,
338
+ validation_level: ValidationLevel = ValidationLevel.BASIC,
339
+ metadata: Optional[Dict[str, Any]] = None,
340
+ backup_comment: Optional[str] = None,
341
+ ) -> Dict[str, Any]:
342
+ """
343
+ Write document with production-grade features
344
+
345
+ Args:
346
+ target_path: Target file path (local or cloud)
347
+ content: Content to write
348
+ format: Document format
349
+ mode: Write mode (create, overwrite, append, update, etc.)
350
+ encoding: Text encoding
351
+ validation_level: Content validation level
352
+ metadata: Additional metadata
353
+ backup_comment: Comment for backup
354
+
355
+ Returns:
356
+ Dict containing write results and metadata
357
+ """
358
+ try:
359
+ start_time = datetime.now()
360
+ operation_id = str(uuid.uuid4())
361
+
362
+ self.logger.info(f"Starting write operation {operation_id}: {target_path}")
363
+
364
+ # Step 1: Validate inputs
365
+ self._validate_write_inputs(target_path, content, format, mode)
366
+
367
+ # Step 2: Prepare content
368
+ processed_content, content_metadata = self._prepare_content(
369
+ content, format, encoding, validation_level
370
+ )
371
+
372
+ # Step 3: Handle write mode logic
373
+ write_plan = self._plan_write_operation(target_path, mode, metadata)
374
+
375
+ # Step 4: Create backup if needed
376
+ backup_info = None
377
+ if self.config.enable_backup and mode in [
378
+ WriteMode.OVERWRITE,
379
+ WriteMode.UPDATE,
380
+ ]:
381
+ backup_info = self._create_backup(target_path, backup_comment)
382
+
383
+ # Step 5: Execute atomic write
384
+ write_result = self._execute_atomic_write(
385
+ target_path, processed_content, format, encoding, write_plan
386
+ )
387
+
388
+ # Step 6: Update metadata and versioning
389
+ version_info = self._handle_versioning(target_path, content_metadata, metadata)
390
+
391
+ # Step 7: Audit logging
392
+ audit_info = self._log_write_operation(
393
+ operation_id, target_path, mode, write_result, backup_info
394
+ )
395
+
396
+ result = {
397
+ "operation_id": operation_id,
398
+ "target_path": target_path,
399
+ "write_mode": mode,
400
+ "format": format,
401
+ "encoding": encoding,
402
+ "content_metadata": content_metadata,
403
+ "write_result": write_result,
404
+ "backup_info": backup_info,
405
+ "version_info": version_info,
406
+ "audit_info": audit_info,
407
+ "processing_metadata": {
408
+ "start_time": start_time.isoformat(),
409
+ "end_time": datetime.now().isoformat(),
410
+ "duration": (datetime.now() - start_time).total_seconds(),
411
+ },
412
+ }
413
+
414
+ self.logger.info(f"Write operation {operation_id} completed successfully")
415
+ return result
416
+
417
+ except Exception as e:
418
+ self.logger.error(f"Write operation failed for {target_path}: {str(e)}")
419
+ # Rollback if needed
420
+ if "backup_info" in locals() and backup_info:
421
+ self._rollback_from_backup(target_path, backup_info)
422
+ raise DocumentWriterError(f"Document write failed: {str(e)}")
423
+
424
+ async def write_document_async(
425
+ self,
426
+ target_path: str,
427
+ content: Union[str, bytes, Dict, List],
428
+ format: DocumentFormat,
429
+ mode: WriteMode = WriteMode.CREATE,
430
+ encoding: EncodingType = EncodingType.UTF8,
431
+ validation_level: ValidationLevel = ValidationLevel.BASIC,
432
+ metadata: Optional[Dict[str, Any]] = None,
433
+ backup_comment: Optional[str] = None,
434
+ ) -> Dict[str, Any]:
435
+ """Async version of write_document"""
436
+ return await asyncio.to_thread(
437
+ self.write_document,
438
+ target_path=target_path,
439
+ content=content,
440
+ format=format,
441
+ mode=mode,
442
+ encoding=encoding,
443
+ validation_level=validation_level,
444
+ metadata=metadata,
445
+ backup_comment=backup_comment,
446
+ )
447
+
448
+ def batch_write_documents(
449
+ self,
450
+ write_operations: List[Dict[str, Any]],
451
+ transaction_mode: bool = True,
452
+ rollback_on_error: bool = True,
453
+ ) -> Dict[str, Any]:
454
+ """
455
+ Batch write multiple documents with transaction support
456
+
457
+ Args:
458
+ write_operations: List of write operation dictionaries
459
+ transaction_mode: Use transaction mode for atomicity
460
+ rollback_on_error: Rollback all operations on any error
461
+
462
+ Returns:
463
+ Dict containing batch write results
464
+ """
465
+ try:
466
+ start_time = datetime.now()
467
+ batch_id = str(uuid.uuid4())
468
+
469
+ self.logger.info(
470
+ f"Starting batch write operation {batch_id}: {len(write_operations)} operations"
471
+ )
472
+
473
+ completed_operations = []
474
+ backup_operations = []
475
+
476
+ try:
477
+ for i, operation in enumerate(write_operations):
478
+ self.logger.info(f"Processing operation {i+1}/{len(write_operations)}")
479
+
480
+ # Execute individual write operation
481
+ result = self.write_document(**operation)
482
+ completed_operations.append(
483
+ {
484
+ "index": i,
485
+ "operation": operation,
486
+ "result": result,
487
+ "status": "success",
488
+ }
489
+ )
490
+
491
+ # Track backup info for potential rollback
492
+ if result.get("backup_info"):
493
+ backup_operations.append(result["backup_info"])
494
+
495
+ batch_result = {
496
+ "batch_id": batch_id,
497
+ "total_operations": len(write_operations),
498
+ "successful_operations": len(completed_operations),
499
+ "failed_operations": 0,
500
+ "operations": completed_operations,
501
+ "transaction_mode": transaction_mode,
502
+ "batch_metadata": {
503
+ "start_time": start_time.isoformat(),
504
+ "end_time": datetime.now().isoformat(),
505
+ "duration": (datetime.now() - start_time).total_seconds(),
506
+ },
507
+ }
508
+
509
+ self.logger.info(f"Batch write operation {batch_id} completed successfully")
510
+ return batch_result
511
+
512
+ except Exception as e:
513
+ self.logger.error(f"Batch write operation {batch_id} failed: {str(e)}")
514
+
515
+ if rollback_on_error and transaction_mode:
516
+ self.logger.info(f"Rolling back batch operation {batch_id}")
517
+ self._rollback_batch_operations(completed_operations, backup_operations)
518
+
519
+ # Create failure result
520
+ batch_result = {
521
+ "batch_id": batch_id,
522
+ "total_operations": len(write_operations),
523
+ "successful_operations": len(completed_operations),
524
+ "failed_operations": len(write_operations) - len(completed_operations),
525
+ "operations": completed_operations,
526
+ "error": str(e),
527
+ "transaction_mode": transaction_mode,
528
+ "rollback_performed": rollback_on_error and transaction_mode,
529
+ }
530
+
531
+ raise DocumentWriterError(f"Batch write operation failed: {str(e)}")
532
+
533
+ except Exception as e:
534
+ raise DocumentWriterError(f"Batch write operation failed: {str(e)}")
535
+
536
+ def _validate_write_inputs(
537
+ self,
538
+ target_path: str,
539
+ content: Any,
540
+ format: DocumentFormat,
541
+ mode: WriteMode,
542
+ ):
543
+ """Validate write operation inputs"""
544
+ # Path validation
545
+ if not target_path or not isinstance(target_path, str):
546
+ raise ValueError("Invalid target path")
547
+
548
+ # Content validation
549
+ if content is None:
550
+ raise ValueError("Content cannot be None")
551
+
552
+ # Size validation
553
+ content_size = self._calculate_content_size(content)
554
+ if content_size > self.config.max_file_size:
555
+ raise ValueError(
556
+ f"Content size {content_size} exceeds maximum {self.config.max_file_size}"
557
+ )
558
+
559
+ # Permission validation
560
+ if not self._check_write_permission(target_path, mode):
561
+ raise WritePermissionError(f"No write permission for {target_path}")
562
+
563
+ def _prepare_content(
564
+ self,
565
+ content: Any,
566
+ format: DocumentFormat,
567
+ encoding: EncodingType,
568
+ validation_level: ValidationLevel,
569
+ ) -> Tuple[Union[str, bytes], Dict]:
570
+ """Prepare and validate content for writing"""
571
+
572
+ # Content conversion based on format
573
+ if format == DocumentFormat.JSON:
574
+ if isinstance(content, (dict, list)):
575
+ processed_content = json.dumps(content, ensure_ascii=False, indent=2)
576
+ else:
577
+ processed_content = str(content)
578
+ elif format == DocumentFormat.CSV:
579
+ processed_content = self._convert_to_csv(content)
580
+ elif format == DocumentFormat.XML:
581
+ processed_content = self._convert_to_xml(content)
582
+ elif format == DocumentFormat.YAML:
583
+ processed_content = self._convert_to_yaml(content)
584
+ elif format == DocumentFormat.HTML:
585
+ processed_content = self._convert_to_html(content)
586
+ elif format == DocumentFormat.MARKDOWN:
587
+ processed_content = self._convert_to_markdown(content)
588
+ elif format == DocumentFormat.BINARY:
589
+ if isinstance(content, bytes):
590
+ processed_content = content
591
+ else:
592
+ processed_content = str(content).encode(encoding.value)
593
+ else:
594
+ processed_content = str(content)
595
+
596
+ # Content validation
597
+ if self.config.enable_content_validation:
598
+ self._validate_content(processed_content, format, validation_level)
599
+
600
+ # Calculate metadata
601
+ content_metadata = {
602
+ "original_type": type(content).__name__,
603
+ "processed_size": (
604
+ len(processed_content) if isinstance(processed_content, (str, bytes)) else 0
605
+ ),
606
+ "format": format,
607
+ "encoding": encoding,
608
+ "checksum": self._calculate_checksum(processed_content),
609
+ "validation_level": validation_level,
610
+ "timestamp": datetime.now().isoformat(),
611
+ }
612
+
613
+ return processed_content, content_metadata
614
+
615
+ def _plan_write_operation(
616
+ self, target_path: str, mode: WriteMode, metadata: Optional[Dict]
617
+ ) -> Dict:
618
+ """Plan the write operation based on mode and target"""
619
+
620
+ plan = {
621
+ "target_path": target_path,
622
+ "mode": mode,
623
+ "file_exists": self._file_exists(target_path),
624
+ "is_cloud_path": self._is_cloud_storage_path(target_path),
625
+ "requires_backup": False,
626
+ "requires_versioning": False,
627
+ "atomic_operation": self.config.atomic_write,
628
+ }
629
+
630
+ if mode == WriteMode.CREATE and plan["file_exists"]:
631
+ raise DocumentWriterError(f"File already exists: {target_path}")
632
+
633
+ if mode in [WriteMode.OVERWRITE, WriteMode.UPDATE] and plan["file_exists"]:
634
+ plan["requires_backup"] = self.config.enable_backup
635
+ plan["requires_versioning"] = self.config.enable_versioning
636
+
637
+ if mode == WriteMode.APPEND and not plan["file_exists"]:
638
+ # Convert to CREATE mode
639
+ plan["mode"] = WriteMode.CREATE
640
+
641
+ return plan
642
+
643
+ def _create_backup(self, target_path: str, comment: Optional[str] = None) -> Dict:
644
+ """Create backup of existing file"""
645
+ if not self._file_exists(target_path):
646
+ return None
647
+
648
+ try:
649
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
650
+ file_stem = Path(target_path).stem
651
+ file_suffix = Path(target_path).suffix
652
+
653
+ backup_filename = f"{file_stem}_backup_{timestamp}{file_suffix}"
654
+ backup_path = os.path.join(self.config.backup_dir, backup_filename)
655
+
656
+ # Copy file to backup location
657
+ if self._is_cloud_storage_path(target_path):
658
+ backup_path = self._backup_cloud_file(target_path, backup_path)
659
+ else:
660
+ shutil.copy2(target_path, backup_path)
661
+
662
+ backup_info = {
663
+ "original_path": target_path,
664
+ "backup_path": backup_path,
665
+ "timestamp": timestamp,
666
+ "comment": comment,
667
+ "checksum": self._calculate_file_checksum(target_path),
668
+ }
669
+
670
+ self.logger.info(f"Created backup: {backup_path}")
671
+ return backup_info
672
+
673
+ except Exception as e:
674
+ self.logger.error(f"Failed to create backup for {target_path}: {e}")
675
+ raise StorageError(f"Backup creation failed: {e}")
676
+
677
+ def _execute_atomic_write(
678
+ self,
679
+ target_path: str,
680
+ content: Union[str, bytes],
681
+ format: DocumentFormat,
682
+ encoding: EncodingType,
683
+ plan: Dict,
684
+ ) -> Dict:
685
+ """Execute atomic write operation"""
686
+
687
+ if plan["is_cloud_path"]:
688
+ return self._write_to_cloud_storage(target_path, content, format, encoding, plan)
689
+ else:
690
+ return self._write_to_local_file(target_path, content, format, encoding, plan)
691
+
692
+ def _write_to_local_file(
693
+ self,
694
+ target_path: str,
695
+ content: Union[str, bytes],
696
+ format: DocumentFormat,
697
+ encoding: EncodingType,
698
+ plan: Dict,
699
+ ) -> Dict:
700
+ """Write to local file system with atomic operation"""
701
+
702
+ try:
703
+ # Create parent directories
704
+ os.makedirs(os.path.dirname(target_path), exist_ok=True)
705
+
706
+ if plan["atomic_operation"]:
707
+ # Atomic write using temporary file
708
+ temp_path = f"{target_path}.tmp.{uuid.uuid4().hex}"
709
+
710
+ try:
711
+ if plan["mode"] == WriteMode.APPEND and plan["file_exists"]:
712
+ # Read existing content first
713
+ with open(target_path, "rb") as f:
714
+ existing_content = f.read()
715
+
716
+ if isinstance(content, str):
717
+ content = existing_content.decode(encoding.value) + content
718
+ else:
719
+ content = existing_content + content
720
+
721
+ # Write to temporary file
722
+ if isinstance(content, bytes):
723
+ with open(temp_path, "wb") as f:
724
+ f.write(content)
725
+ else:
726
+ # Handle both EncodingType enum and string
727
+ enc_value = encoding.value if hasattr(encoding, "value") else str(encoding)
728
+ with open(temp_path, "w", encoding=enc_value) as f:
729
+ f.write(content)
730
+
731
+ # Atomic move
732
+ shutil.move(temp_path, target_path)
733
+
734
+ finally:
735
+ # Cleanup temp file if it still exists
736
+ if os.path.exists(temp_path):
737
+ os.unlink(temp_path)
738
+ else:
739
+ # Direct write
740
+ mode_map = {
741
+ WriteMode.CREATE: "w",
742
+ WriteMode.OVERWRITE: "w",
743
+ WriteMode.APPEND: "a",
744
+ WriteMode.UPDATE: "w",
745
+ }
746
+
747
+ file_mode = mode_map.get(plan["mode"], "w")
748
+ if isinstance(content, bytes):
749
+ file_mode += "b"
750
+
751
+ # Handle both EncodingType enum and string
752
+ enc_value = (
753
+ None
754
+ if isinstance(content, bytes)
755
+ else (encoding.value if hasattr(encoding, "value") else str(encoding))
756
+ )
757
+ with open(target_path, file_mode, encoding=enc_value) as f:
758
+ f.write(content)
759
+
760
+ # Get file stats
761
+ stat = os.stat(target_path)
762
+
763
+ return {
764
+ "path": target_path,
765
+ "size": stat.st_size,
766
+ "checksum": self._calculate_file_checksum(target_path),
767
+ "modified_time": datetime.fromtimestamp(stat.st_mtime).isoformat(),
768
+ "atomic_write": plan["atomic_operation"],
769
+ }
770
+
771
+ except Exception as e:
772
+ raise StorageError(f"Local file write failed: {e}")
773
+
774
+ async def _write_to_cloud_storage(
775
+ self,
776
+ target_path: str,
777
+ content: Union[str, bytes],
778
+ format: DocumentFormat,
779
+ encoding: EncodingType,
780
+ plan: Dict,
781
+ ) -> Dict:
782
+ """Write to cloud storage"""
783
+
784
+ if not self.file_storage:
785
+ raise StorageError("Cloud storage not available")
786
+
787
+ try:
788
+ storage_path = self._parse_cloud_storage_path(target_path)
789
+
790
+ # Handle append mode for cloud storage
791
+ if plan["mode"] == WriteMode.APPEND and plan["file_exists"]:
792
+ existing_content = await self.file_storage.retrieve(storage_path)
793
+ if isinstance(content, str) and isinstance(existing_content, str):
794
+ content = existing_content + content
795
+ elif isinstance(content, bytes) and isinstance(existing_content, bytes):
796
+ content = existing_content + content
797
+
798
+ # Store in cloud storage
799
+ await self.file_storage.store(storage_path, content)
800
+
801
+ return {
802
+ "path": target_path,
803
+ "storage_path": storage_path,
804
+ "size": (len(content) if isinstance(content, (str, bytes)) else 0),
805
+ "checksum": self._calculate_checksum(content),
806
+ "cloud_storage": True,
807
+ }
808
+
809
+ except Exception as e:
810
+ raise StorageError(f"Cloud storage write failed: {e}")
811
+
812
+ def _handle_versioning(
813
+ self,
814
+ target_path: str,
815
+ content_metadata: Dict,
816
+ metadata: Optional[Dict],
817
+ ) -> Optional[Dict]:
818
+ """Handle document versioning"""
819
+
820
+ if not self.config.enable_versioning:
821
+ return None
822
+
823
+ try:
824
+ version_info = {
825
+ "path": target_path,
826
+ "version": self._get_next_version(target_path),
827
+ "timestamp": datetime.now().isoformat(),
828
+ "content_metadata": content_metadata,
829
+ "user_metadata": metadata or {},
830
+ }
831
+
832
+ # Store version info
833
+ version_file = f"{target_path}.versions.json"
834
+ versions = self._load_version_history(version_file)
835
+ versions.append(version_info)
836
+
837
+ # Keep only recent versions
838
+ if len(versions) > self.config.max_backup_versions:
839
+ versions = versions[-self.config.max_backup_versions :]
840
+
841
+ self._save_version_history(version_file, versions)
842
+
843
+ return version_info
844
+
845
+ except Exception as e:
846
+ self.logger.warning(f"Versioning failed for {target_path}: {e}")
847
+ return None
848
+
849
+ def _validate_content(
850
+ self,
851
+ content: Union[str, bytes],
852
+ format: DocumentFormat,
853
+ validation_level: ValidationLevel,
854
+ ):
855
+ """Validate content based on format and validation level"""
856
+
857
+ if validation_level == ValidationLevel.NONE:
858
+ return
859
+
860
+ try:
861
+ # Format-specific validation
862
+ if format in self.validators:
863
+ self.validators[format](content, validation_level)
864
+
865
+ # Security validation for enterprise level
866
+ if validation_level == ValidationLevel.ENTERPRISE:
867
+ self._security_scan_content(content)
868
+
869
+ except Exception as e:
870
+ raise ContentValidationError(f"Content validation failed: {e}")
871
+
872
+ def _validate_json_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
873
+ """Validate JSON content"""
874
+ try:
875
+ if isinstance(content, bytes):
876
+ content = content.decode("utf-8")
877
+ json.loads(content)
878
+ except json.JSONDecodeError as e:
879
+ raise ContentValidationError(f"Invalid JSON: {e}")
880
+
881
+ def _validate_xml_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
882
+ """Validate XML content"""
883
+ try:
884
+ import xml.etree.ElementTree as ET
885
+
886
+ if isinstance(content, bytes):
887
+ content = content.decode("utf-8")
888
+ ET.fromstring(content)
889
+ except ET.ParseError as e:
890
+ raise ContentValidationError(f"Invalid XML: {e}")
891
+
892
+ def _validate_csv_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
893
+ """Validate CSV content"""
894
+ try:
895
+ import csv
896
+ import io
897
+
898
+ if isinstance(content, bytes):
899
+ content = content.decode("utf-8")
900
+ csv.reader(io.StringIO(content))
901
+ except Exception as e:
902
+ raise ContentValidationError(f"Invalid CSV: {e}")
903
+
904
+ def _validate_yaml_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
905
+ """Validate YAML content"""
906
+ try:
907
+ import yaml
908
+
909
+ if isinstance(content, bytes):
910
+ content = content.decode("utf-8")
911
+ yaml.safe_load(content)
912
+ except yaml.YAMLError as e:
913
+ raise ContentValidationError(f"Invalid YAML: {e}")
914
+
915
+ def _validate_html_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
916
+ """Validate HTML content"""
917
+ try:
918
+ from bs4 import BeautifulSoup
919
+
920
+ if isinstance(content, bytes):
921
+ content = content.decode("utf-8")
922
+ BeautifulSoup(content, "html.parser")
923
+ except Exception as e:
924
+ raise ContentValidationError(f"Invalid HTML: {e}")
925
+
926
+ def _security_scan_content(self, content: Union[str, bytes]):
927
+ """Perform security scan on content"""
928
+ if isinstance(content, bytes):
929
+ content = content.decode("utf-8", errors="ignore")
930
+
931
+ # Check for suspicious patterns
932
+ suspicious_patterns = [
933
+ r"<script[^>]*>", # JavaScript
934
+ r"javascript:", # JavaScript URLs
935
+ r"vbscript:", # VBScript URLs
936
+ r"data:.*base64", # Base64 data URLs
937
+ r"eval\s*\(", # eval() calls
938
+ r"exec\s*\(", # exec() calls
939
+ ]
940
+
941
+ import re
942
+
943
+ for pattern in suspicious_patterns:
944
+ if re.search(pattern, content, re.IGNORECASE):
945
+ raise ContentValidationError("Security scan failed: suspicious pattern detected")
946
+
947
+ # Helper methods
948
+ def _calculate_content_size(self, content: Any) -> int:
949
+ """Calculate content size in bytes"""
950
+ if isinstance(content, bytes):
951
+ return len(content)
952
+ elif isinstance(content, str):
953
+ return len(content.encode("utf-8"))
954
+ else:
955
+ return len(str(content).encode("utf-8"))
956
+
957
+ def _calculate_checksum(self, content: Union[str, bytes]) -> str:
958
+ """Calculate content checksum"""
959
+ if isinstance(content, str):
960
+ content = content.encode("utf-8")
961
+ return hashlib.sha256(content).hexdigest()
962
+
963
+ def _calculate_file_checksum(self, file_path: str) -> str:
964
+ """Calculate file checksum"""
965
+ hash_sha256 = hashlib.sha256()
966
+ with open(file_path, "rb") as f:
967
+ for chunk in iter(lambda: f.read(4096), b""):
968
+ hash_sha256.update(chunk)
969
+ return hash_sha256.hexdigest()
970
+
971
+ def _check_write_permission(self, target_path: str, mode: WriteMode) -> bool:
972
+ """Check write permission for target path"""
973
+ try:
974
+ if self._is_cloud_storage_path(target_path):
975
+ return self.file_storage is not None
976
+
977
+ parent_dir = os.path.dirname(target_path)
978
+ if not os.path.exists(parent_dir):
979
+ # Check if we can create the directory
980
+ return os.access(os.path.dirname(parent_dir), os.W_OK)
981
+
982
+ if os.path.exists(target_path):
983
+ return os.access(target_path, os.W_OK)
984
+ else:
985
+ return os.access(parent_dir, os.W_OK)
986
+
987
+ except Exception:
988
+ return False
989
+
990
+ def _file_exists(self, file_path: str) -> bool:
991
+ """Check if file exists (local or cloud)"""
992
+ if self._is_cloud_storage_path(file_path):
993
+ # For cloud storage, we'd need to implement exists check
994
+ return False # Simplified for now
995
+ else:
996
+ return os.path.exists(file_path)
997
+
998
+ def _is_cloud_storage_path(self, source: str) -> bool:
999
+ """Check if source is a cloud storage path"""
1000
+ cloud_schemes = ["gs", "s3", "azure", "cloud"]
1001
+ try:
1002
+ from urllib.parse import urlparse
1003
+
1004
+ parsed = urlparse(source)
1005
+ return parsed.scheme in cloud_schemes
1006
+ except Exception:
1007
+ return False
1008
+
1009
+ def _parse_cloud_storage_path(self, source: str) -> str:
1010
+ """Parse cloud storage path to get storage key"""
1011
+ try:
1012
+ from urllib.parse import urlparse
1013
+
1014
+ parsed = urlparse(source)
1015
+ return parsed.path.lstrip("/")
1016
+ except Exception:
1017
+ return source
1018
+
1019
+ # Content conversion methods
1020
+ def _convert_to_csv(self, content: Any) -> str:
1021
+ """Convert content to CSV format"""
1022
+ import csv
1023
+ import io
1024
+
1025
+ output = io.StringIO()
1026
+ writer = csv.writer(output)
1027
+
1028
+ if isinstance(content, list):
1029
+ for row in content:
1030
+ if isinstance(row, (list, tuple)):
1031
+ writer.writerow(row)
1032
+ else:
1033
+ writer.writerow([row])
1034
+ elif isinstance(content, dict):
1035
+ # Convert dict to CSV with headers
1036
+ if content:
1037
+ headers = list(content.keys())
1038
+ writer.writerow(headers)
1039
+ writer.writerow([content[h] for h in headers])
1040
+ else:
1041
+ writer.writerow([str(content)])
1042
+
1043
+ return output.getvalue()
1044
+
1045
+ def _convert_to_xml(self, content: Any) -> str:
1046
+ """Convert content to XML format"""
1047
+ import xml.etree.ElementTree as ET
1048
+
1049
+ if isinstance(content, dict):
1050
+ root = ET.Element("document")
1051
+ for key, value in content.items():
1052
+ elem = ET.SubElement(root, str(key))
1053
+ elem.text = str(value)
1054
+ return ET.tostring(root, encoding="unicode")
1055
+ else:
1056
+ root = ET.Element("document")
1057
+ root.text = str(content)
1058
+ return ET.tostring(root, encoding="unicode")
1059
+
1060
+ def _convert_to_yaml(self, content: Any) -> str:
1061
+ """Convert content to YAML format"""
1062
+ try:
1063
+ import yaml
1064
+
1065
+ return yaml.dump(content, default_flow_style=False, allow_unicode=True)
1066
+ except ImportError:
1067
+ # Fallback to simple string representation
1068
+ return str(content)
1069
+
1070
+ def _convert_to_html(self, content: Any) -> str:
1071
+ """Convert content to HTML format"""
1072
+ if isinstance(content, dict):
1073
+ html = "<html><body>\n"
1074
+ for key, value in content.items():
1075
+ html += f"<h3>{key}</h3>\n<p>{value}</p>\n"
1076
+ html += "</body></html>"
1077
+ return html
1078
+ else:
1079
+ return f"<html><body><pre>{str(content)}</pre></body></html>"
1080
+
1081
+ def _convert_to_markdown(self, content: Any) -> str:
1082
+ """Convert content to Markdown format"""
1083
+ if isinstance(content, dict):
1084
+ md = ""
1085
+ for key, value in content.items():
1086
+ md += f"## {key}\n\n{value}\n\n"
1087
+ return md
1088
+ else:
1089
+ return str(content)
1090
+
1091
+ # Versioning methods
1092
+ def _get_next_version(self, file_path: str) -> int:
1093
+ """Get next version number for file"""
1094
+ version_file = f"{file_path}.versions.json"
1095
+ versions = self._load_version_history(version_file)
1096
+ return len(versions) + 1
1097
+
1098
+ def _load_version_history(self, version_file: str) -> List[Dict]:
1099
+ """Load version history from file"""
1100
+ try:
1101
+ if os.path.exists(version_file):
1102
+ with open(version_file, "r") as f:
1103
+ return json.load(f)
1104
+ except Exception:
1105
+ pass
1106
+ return []
1107
+
1108
+ def _save_version_history(self, version_file: str, versions: List[Dict]):
1109
+ """Save version history to file"""
1110
+ try:
1111
+ with open(version_file, "w") as f:
1112
+ json.dump(versions, f, indent=2)
1113
+ except Exception as e:
1114
+ self.logger.warning(f"Failed to save version history: {e}")
1115
+
1116
+ # Backup and rollback methods
1117
+ def _backup_cloud_file(self, source_path: str, backup_path: str) -> str:
1118
+ """Backup cloud file"""
1119
+ # Simplified implementation
1120
+ return backup_path
1121
+
1122
+ def _rollback_from_backup(self, target_path: str, backup_info: Dict):
1123
+ """Rollback file from backup"""
1124
+ try:
1125
+ if backup_info and os.path.exists(backup_info["backup_path"]):
1126
+ shutil.copy2(backup_info["backup_path"], target_path)
1127
+ self.logger.info(f"Rolled back {target_path} from backup")
1128
+ except Exception as e:
1129
+ self.logger.error(f"Rollback failed: {e}")
1130
+
1131
+ def _rollback_batch_operations(
1132
+ self, completed_operations: List[Dict], backup_operations: List[Dict]
1133
+ ):
1134
+ """Rollback batch operations"""
1135
+ for op in reversed(completed_operations):
1136
+ try:
1137
+ result = op.get("result", {})
1138
+ backup_info = result.get("backup_info")
1139
+ if backup_info:
1140
+ self._rollback_from_backup(result["write_result"]["path"], backup_info)
1141
+ except Exception as e:
1142
+ self.logger.error(f"Batch rollback failed for operation: {e}")
1143
+
1144
+ def _log_write_operation(
1145
+ self,
1146
+ operation_id: str,
1147
+ target_path: str,
1148
+ mode: WriteMode,
1149
+ write_result: Dict,
1150
+ backup_info: Optional[Dict],
1151
+ ) -> Dict:
1152
+ """Log write operation for audit"""
1153
+ audit_info = {
1154
+ "operation_id": operation_id,
1155
+ "timestamp": datetime.now().isoformat(),
1156
+ "target_path": target_path,
1157
+ "mode": mode,
1158
+ "success": True,
1159
+ "file_size": write_result.get("size", 0),
1160
+ "checksum": write_result.get("checksum"),
1161
+ "backup_created": backup_info is not None,
1162
+ }
1163
+
1164
+ # Log to audit file
1165
+ try:
1166
+ audit_file = os.path.join(self.config.temp_dir, "write_audit.log")
1167
+ with open(audit_file, "a") as f:
1168
+ f.write(json.dumps(audit_info) + "\n")
1169
+ except Exception as e:
1170
+ self.logger.warning(f"Audit logging failed: {e}")
1171
+
1172
+ return audit_info
1173
+
1174
+ def edit_document(
1175
+ self,
1176
+ target_path: str,
1177
+ operation: EditOperation,
1178
+ content: Optional[str] = None,
1179
+ position: Optional[Dict[str, Any]] = None,
1180
+ selection: Optional[Dict[str, Any]] = None,
1181
+ format_options: Optional[Dict[str, Any]] = None,
1182
+ ) -> Dict[str, Any]:
1183
+ """
1184
+ Perform advanced editing operations on documents
1185
+
1186
+ Args:
1187
+ target_path: Target file path
1188
+ operation: Edit operation to perform
1189
+ content: Content for the operation (if applicable)
1190
+ position: Position info (line, column, offset)
1191
+ selection: Text selection range
1192
+ format_options: Additional format options
1193
+
1194
+ Returns:
1195
+ Dict containing edit results
1196
+ """
1197
+ try:
1198
+ start_time = datetime.now()
1199
+ operation_id = str(uuid.uuid4())
1200
+
1201
+ self.logger.info(
1202
+ f"Starting edit operation {operation_id}: {operation} on {target_path}"
1203
+ )
1204
+
1205
+ # Read current document content
1206
+ current_content = self._read_document_content(target_path)
1207
+
1208
+ # Perform the specific edit operation
1209
+ if operation == EditOperation.INSERT_TEXT:
1210
+ edited_content = self._insert_text(current_content, content, position)
1211
+ elif operation == EditOperation.DELETE_TEXT:
1212
+ edited_content = self._delete_text(current_content, selection)
1213
+ elif operation == EditOperation.REPLACE_TEXT:
1214
+ edited_content = self._replace_text(current_content, selection, content)
1215
+ elif operation == EditOperation.BOLD:
1216
+ edited_content = self._format_text_bold(current_content, selection, format_options)
1217
+ elif operation == EditOperation.ITALIC:
1218
+ edited_content = self._format_text_italic(
1219
+ current_content, selection, format_options
1220
+ )
1221
+ elif operation == EditOperation.UNDERLINE:
1222
+ edited_content = self._format_text_underline(
1223
+ current_content, selection, format_options
1224
+ )
1225
+ elif operation == EditOperation.STRIKETHROUGH:
1226
+ edited_content = self._format_text_strikethrough(
1227
+ current_content, selection, format_options
1228
+ )
1229
+ elif operation == EditOperation.HIGHLIGHT:
1230
+ edited_content = self._format_text_highlight(
1231
+ current_content, selection, format_options
1232
+ )
1233
+ elif operation == EditOperation.INSERT_LINE:
1234
+ edited_content = self._insert_line(current_content, position, content)
1235
+ elif operation == EditOperation.DELETE_LINE:
1236
+ edited_content = self._delete_line(current_content, position)
1237
+ elif operation == EditOperation.MOVE_LINE:
1238
+ edited_content = self._move_line(current_content, position, format_options)
1239
+ elif operation == EditOperation.COPY_TEXT:
1240
+ return self._copy_text(current_content, selection)
1241
+ elif operation == EditOperation.CUT_TEXT:
1242
+ edited_content, cut_content = self._cut_text(current_content, selection)
1243
+ # Store cut content in clipboard
1244
+ self._store_clipboard_content(cut_content)
1245
+ elif operation == EditOperation.PASTE_TEXT:
1246
+ clipboard_content = self._get_clipboard_content()
1247
+ edited_content = self._paste_text(current_content, position, clipboard_content)
1248
+ else:
1249
+ raise ValueError(f"Unsupported edit operation: {operation}")
1250
+
1251
+ # Write the edited content back to file
1252
+ file_format = self._detect_file_format(target_path)
1253
+ write_result = self.write_document(
1254
+ target_path=target_path,
1255
+ content=edited_content,
1256
+ format=file_format,
1257
+ mode="backup_write", # Always backup before editing
1258
+ backup_comment=f"Edit operation: {operation}",
1259
+ )
1260
+
1261
+ result = {
1262
+ "operation_id": operation_id,
1263
+ "target_path": target_path,
1264
+ "operation": operation,
1265
+ "edit_metadata": {
1266
+ "original_size": len(current_content),
1267
+ "edited_size": (len(edited_content) if isinstance(edited_content, str) else 0),
1268
+ "position": position,
1269
+ "selection": selection,
1270
+ },
1271
+ "write_result": write_result,
1272
+ "processing_metadata": {
1273
+ "start_time": start_time.isoformat(),
1274
+ "end_time": datetime.now().isoformat(),
1275
+ "duration": (datetime.now() - start_time).total_seconds(),
1276
+ },
1277
+ }
1278
+
1279
+ self.logger.info(f"Edit operation {operation_id} completed successfully")
1280
+ return result
1281
+
1282
+ except Exception as e:
1283
+ raise DocumentWriterError(f"Edit operation failed: {str(e)}")
1284
+
1285
+ def format_text(
1286
+ self,
1287
+ target_path: str,
1288
+ text_to_format: str,
1289
+ format_type: EditOperation,
1290
+ format_options: Optional[Dict[str, Any]] = None,
1291
+ ) -> Dict[str, Any]:
1292
+ """
1293
+ Apply formatting to specific text in a document
1294
+
1295
+ Args:
1296
+ target_path: Target file path
1297
+ text_to_format: Text to apply formatting to
1298
+ format_type: Type of formatting (bold, italic, etc.)
1299
+ format_options: Additional format options
1300
+
1301
+ Returns:
1302
+ Dict containing formatting results
1303
+ """
1304
+ try:
1305
+ current_content = self._read_document_content(target_path)
1306
+
1307
+ # Find all occurrences of the text
1308
+ formatted_content = self._apply_text_formatting(
1309
+ current_content, text_to_format, format_type, format_options
1310
+ )
1311
+
1312
+ # Write back to file
1313
+ file_format = self._detect_file_format(target_path)
1314
+ write_result = self.write_document(
1315
+ target_path=target_path,
1316
+ content=formatted_content,
1317
+ format=file_format,
1318
+ mode="backup_write",
1319
+ )
1320
+
1321
+ return {
1322
+ "target_path": target_path,
1323
+ "text_formatted": text_to_format,
1324
+ "format_type": format_type,
1325
+ "write_result": write_result,
1326
+ }
1327
+
1328
+ except Exception as e:
1329
+ raise DocumentWriterError(f"Text formatting failed: {str(e)}")
1330
+
1331
+ def find_replace(
1332
+ self,
1333
+ target_path: str,
1334
+ find_text: str,
1335
+ replace_text: str,
1336
+ replace_all: bool = False,
1337
+ case_sensitive: bool = True,
1338
+ regex_mode: bool = False,
1339
+ ) -> Dict[str, Any]:
1340
+ """
1341
+ Find and replace text in a document
1342
+
1343
+ Args:
1344
+ target_path: Target file path
1345
+ find_text: Text to find
1346
+ replace_text: Text to replace with
1347
+ replace_all: Replace all occurrences
1348
+ case_sensitive: Case sensitive search
1349
+ regex_mode: Use regex for find/replace
1350
+
1351
+ Returns:
1352
+ Dict containing find/replace results
1353
+ """
1354
+ try:
1355
+ current_content = self._read_document_content(target_path)
1356
+
1357
+ # Perform find and replace
1358
+ new_content, replacements = self._perform_find_replace(
1359
+ current_content,
1360
+ find_text,
1361
+ replace_text,
1362
+ replace_all,
1363
+ case_sensitive,
1364
+ regex_mode,
1365
+ )
1366
+
1367
+ if replacements > 0:
1368
+ # Write back to file
1369
+ file_format = self._detect_file_format(target_path)
1370
+ write_result = self.write_document(
1371
+ target_path=target_path,
1372
+ content=new_content,
1373
+ format=file_format,
1374
+ mode="backup_write",
1375
+ backup_comment=f"Find/Replace: '{find_text}' -> '{replace_text}'",
1376
+ )
1377
+
1378
+ return {
1379
+ "target_path": target_path,
1380
+ "find_text": find_text,
1381
+ "replace_text": replace_text,
1382
+ "replacements_made": replacements,
1383
+ "write_result": write_result,
1384
+ }
1385
+ else:
1386
+ return {
1387
+ "target_path": target_path,
1388
+ "find_text": find_text,
1389
+ "replace_text": replace_text,
1390
+ "replacements_made": 0,
1391
+ "message": "No matches found",
1392
+ }
1393
+
1394
+ except Exception as e:
1395
+ raise DocumentWriterError(f"Find/replace operation failed: {str(e)}")
1396
+
1397
+ # Helper methods for editing operations
1398
+ def _read_document_content(self, file_path: str) -> str:
1399
+ """Read document content for editing"""
1400
+ try:
1401
+ with open(file_path, "r", encoding="utf-8") as f:
1402
+ return f.read()
1403
+ except UnicodeDecodeError:
1404
+ # Try with different encodings
1405
+ for encoding in ["gbk", "latin1", "cp1252"]:
1406
+ try:
1407
+ with open(file_path, "r", encoding=encoding) as f:
1408
+ return f.read()
1409
+ except Exception:
1410
+ continue
1411
+ raise DocumentWriterError(f"Cannot decode file: {file_path}")
1412
+ except Exception as e:
1413
+ raise DocumentWriterError(f"Cannot read file {file_path}: {str(e)}")
1414
+
1415
+ def _detect_file_format(self, file_path: str) -> str:
1416
+ """Detect file format from extension"""
1417
+ ext = os.path.splitext(file_path)[1].lower()
1418
+ format_map = {
1419
+ ".txt": "txt",
1420
+ ".json": "json",
1421
+ ".csv": "csv",
1422
+ ".xml": "xml",
1423
+ ".html": "html",
1424
+ ".htm": "html",
1425
+ ".md": "markdown",
1426
+ ".markdown": "markdown",
1427
+ ".yaml": "yaml",
1428
+ ".yml": "yaml",
1429
+ }
1430
+ return format_map.get(ext, "txt")
1431
+
1432
+ def _insert_text(self, content: str, text: str, position: Optional[Dict[str, Any]]) -> str:
1433
+ """Insert text at specified position"""
1434
+ if not position:
1435
+ return content + text
1436
+
1437
+ if "offset" in position:
1438
+ offset = position["offset"]
1439
+ return content[:offset] + text + content[offset:]
1440
+ elif "line" in position:
1441
+ lines = content.split("\n")
1442
+ line_num = position.get("line", 0)
1443
+ column = position.get("column", 0)
1444
+
1445
+ if line_num < len(lines):
1446
+ line = lines[line_num]
1447
+ lines[line_num] = line[:column] + text + line[column:]
1448
+ else:
1449
+ lines.append(text)
1450
+ return "\n".join(lines)
1451
+ else:
1452
+ return content + text
1453
+
1454
+ def _delete_text(self, content: str, selection: Optional[Dict[str, Any]]) -> str:
1455
+ """Delete text in specified selection"""
1456
+ if not selection:
1457
+ return content
1458
+
1459
+ if "start_offset" in selection and "end_offset" in selection:
1460
+ start = selection["start_offset"]
1461
+ end = selection["end_offset"]
1462
+ return content[:start] + content[end:]
1463
+ elif "start_line" in selection and "end_line" in selection:
1464
+ lines = content.split("\n")
1465
+ start_line = selection["start_line"]
1466
+ end_line = selection["end_line"]
1467
+ start_col = selection.get("start_column", 0)
1468
+ end_col = selection.get(
1469
+ "end_column",
1470
+ len(lines[end_line]) if end_line < len(lines) else 0,
1471
+ )
1472
+
1473
+ if start_line == end_line:
1474
+ # Same line deletion
1475
+ line = lines[start_line]
1476
+ lines[start_line] = line[:start_col] + line[end_col:]
1477
+ else:
1478
+ # Multi-line deletion
1479
+ lines[start_line] = lines[start_line][:start_col]
1480
+ if end_line < len(lines):
1481
+ lines[start_line] += lines[end_line][end_col:]
1482
+ del lines[start_line + 1 : end_line + 1]
1483
+
1484
+ return "\n".join(lines)
1485
+
1486
+ return content
1487
+
1488
+ def _replace_text(
1489
+ self,
1490
+ content: str,
1491
+ selection: Optional[Dict[str, Any]],
1492
+ replacement: str,
1493
+ ) -> str:
1494
+ """Replace text in specified selection"""
1495
+ if not selection:
1496
+ return content
1497
+
1498
+ # First delete the selected text, then insert replacement
1499
+ content_after_delete = self._delete_text(content, selection)
1500
+
1501
+ # Calculate new insertion position after deletion
1502
+ if "start_offset" in selection:
1503
+ insert_pos = {"offset": selection["start_offset"]}
1504
+ elif "start_line" in selection:
1505
+ insert_pos = {
1506
+ "line": selection["start_line"],
1507
+ "column": selection.get("start_column", 0),
1508
+ }
1509
+ else:
1510
+ insert_pos = None
1511
+
1512
+ return self._insert_text(content_after_delete, replacement, insert_pos)
1513
+
1514
+ def _format_text_bold(
1515
+ self,
1516
+ content: str,
1517
+ selection: Optional[Dict[str, Any]],
1518
+ options: Optional[Dict[str, Any]],
1519
+ ) -> str:
1520
+ """Apply bold formatting to selected text"""
1521
+ if not selection:
1522
+ return content
1523
+
1524
+ format_type = options.get("format_type", "markdown") if options else "markdown"
1525
+
1526
+ if format_type == "markdown":
1527
+ return self._apply_markdown_formatting(content, selection, "**", "**")
1528
+ elif format_type == "html":
1529
+ return self._apply_html_formatting(content, selection, "<strong>", "</strong>")
1530
+ else:
1531
+ return content
1532
+
1533
+ def _format_text_italic(
1534
+ self,
1535
+ content: str,
1536
+ selection: Optional[Dict[str, Any]],
1537
+ options: Optional[Dict[str, Any]],
1538
+ ) -> str:
1539
+ """Apply italic formatting to selected text"""
1540
+ if not selection:
1541
+ return content
1542
+
1543
+ format_type = options.get("format_type", "markdown") if options else "markdown"
1544
+
1545
+ if format_type == "markdown":
1546
+ return self._apply_markdown_formatting(content, selection, "*", "*")
1547
+ elif format_type == "html":
1548
+ return self._apply_html_formatting(content, selection, "<em>", "</em>")
1549
+ else:
1550
+ return content
1551
+
1552
+ def _format_text_underline(
1553
+ self,
1554
+ content: str,
1555
+ selection: Optional[Dict[str, Any]],
1556
+ options: Optional[Dict[str, Any]],
1557
+ ) -> str:
1558
+ """Apply underline formatting to selected text"""
1559
+ if not selection:
1560
+ return content
1561
+
1562
+ format_type = options.get("format_type", "html") if options else "html"
1563
+
1564
+ if format_type == "html":
1565
+ return self._apply_html_formatting(content, selection, "<u>", "</u>")
1566
+ else:
1567
+ return content
1568
+
1569
+ def _format_text_strikethrough(
1570
+ self,
1571
+ content: str,
1572
+ selection: Optional[Dict[str, Any]],
1573
+ options: Optional[Dict[str, Any]],
1574
+ ) -> str:
1575
+ """Apply strikethrough formatting to selected text"""
1576
+ if not selection:
1577
+ return content
1578
+
1579
+ format_type = options.get("format_type", "markdown") if options else "markdown"
1580
+
1581
+ if format_type == "markdown":
1582
+ return self._apply_markdown_formatting(content, selection, "~~", "~~")
1583
+ elif format_type == "html":
1584
+ return self._apply_html_formatting(content, selection, "<del>", "</del>")
1585
+ else:
1586
+ return content
1587
+
1588
+ def _format_text_highlight(
1589
+ self,
1590
+ content: str,
1591
+ selection: Optional[Dict[str, Any]],
1592
+ options: Optional[Dict[str, Any]],
1593
+ ) -> str:
1594
+ """Apply highlight formatting to selected text"""
1595
+ if not selection:
1596
+ return content
1597
+
1598
+ format_type = options.get("format_type", "html") if options else "html"
1599
+ color = options.get("color", "yellow") if options else "yellow"
1600
+
1601
+ if format_type == "html":
1602
+ return self._apply_html_formatting(
1603
+ content,
1604
+ selection,
1605
+ f'<mark style="background-color: {color}">',
1606
+ "</mark>",
1607
+ )
1608
+ elif format_type == "markdown":
1609
+ return self._apply_markdown_formatting(content, selection, "==", "==")
1610
+ else:
1611
+ return content
1612
+
1613
+ def _apply_markdown_formatting(
1614
+ self,
1615
+ content: str,
1616
+ selection: Dict[str, Any],
1617
+ start_marker: str,
1618
+ end_marker: str,
1619
+ ) -> str:
1620
+ """Apply markdown formatting to selected text"""
1621
+ selected_text = self._extract_selected_text(content, selection)
1622
+ formatted_text = start_marker + selected_text + end_marker
1623
+ return self._replace_text(content, selection, formatted_text)
1624
+
1625
+ def _apply_html_formatting(
1626
+ self,
1627
+ content: str,
1628
+ selection: Dict[str, Any],
1629
+ start_tag: str,
1630
+ end_tag: str,
1631
+ ) -> str:
1632
+ """Apply HTML formatting to selected text"""
1633
+ selected_text = self._extract_selected_text(content, selection)
1634
+ formatted_text = start_tag + selected_text + end_tag
1635
+ return self._replace_text(content, selection, formatted_text)
1636
+
1637
+ def _extract_selected_text(self, content: str, selection: Dict[str, Any]) -> str:
1638
+ """Extract text from selection"""
1639
+ if "start_offset" in selection and "end_offset" in selection:
1640
+ return content[selection["start_offset"] : selection["end_offset"]]
1641
+ elif "start_line" in selection and "end_line" in selection:
1642
+ lines = content.split("\n")
1643
+ start_line = selection["start_line"]
1644
+ end_line = selection["end_line"]
1645
+ start_col = selection.get("start_column", 0)
1646
+ end_col = selection.get(
1647
+ "end_column",
1648
+ len(lines[end_line]) if end_line < len(lines) else 0,
1649
+ )
1650
+
1651
+ if start_line == end_line:
1652
+ return lines[start_line][start_col:end_col]
1653
+ else:
1654
+ result = [lines[start_line][start_col:]]
1655
+ result.extend(lines[start_line + 1 : end_line])
1656
+ if end_line < len(lines):
1657
+ result.append(lines[end_line][:end_col])
1658
+ return "\n".join(result)
1659
+ return ""
1660
+
1661
+ def _insert_line(
1662
+ self,
1663
+ content: str,
1664
+ position: Optional[Dict[str, Any]],
1665
+ line_content: str,
1666
+ ) -> str:
1667
+ """Insert a new line at specified position"""
1668
+ lines = content.split("\n")
1669
+ line_num = position.get("line", len(lines)) if position else len(lines)
1670
+
1671
+ lines.insert(line_num, line_content)
1672
+ return "\n".join(lines)
1673
+
1674
+ def _delete_line(self, content: str, position: Optional[Dict[str, Any]]) -> str:
1675
+ """Delete line at specified position"""
1676
+ lines = content.split("\n")
1677
+ line_num = position.get("line", 0) if position else 0
1678
+
1679
+ if 0 <= line_num < len(lines):
1680
+ del lines[line_num]
1681
+
1682
+ return "\n".join(lines)
1683
+
1684
+ def _move_line(
1685
+ self,
1686
+ content: str,
1687
+ position: Optional[Dict[str, Any]],
1688
+ options: Optional[Dict[str, Any]],
1689
+ ) -> str:
1690
+ """Move line to different position"""
1691
+ lines = content.split("\n")
1692
+ from_line = position.get("line", 0) if position else 0
1693
+ to_line = options.get("to_line", 0) if options else 0
1694
+
1695
+ if 0 <= from_line < len(lines) and 0 <= to_line < len(lines):
1696
+ line_content = lines.pop(from_line)
1697
+ lines.insert(to_line, line_content)
1698
+
1699
+ return "\n".join(lines)
1700
+
1701
+ def _copy_text(self, content: str, selection: Optional[Dict[str, Any]]) -> Dict[str, Any]:
1702
+ """Copy selected text to clipboard"""
1703
+ selected_text = self._extract_selected_text(content, selection) if selection else content
1704
+ self._store_clipboard_content(selected_text)
1705
+
1706
+ return {
1707
+ "operation": "copy",
1708
+ "copied_text": selected_text,
1709
+ "copied_length": len(selected_text),
1710
+ }
1711
+
1712
+ def _cut_text(self, content: str, selection: Optional[Dict[str, Any]]) -> Tuple[str, str]:
1713
+ """Cut selected text (copy and delete)"""
1714
+ selected_text = self._extract_selected_text(content, selection) if selection else content
1715
+ new_content = self._delete_text(content, selection) if selection else ""
1716
+
1717
+ return new_content, selected_text
1718
+
1719
+ def _paste_text(
1720
+ self,
1721
+ content: str,
1722
+ position: Optional[Dict[str, Any]],
1723
+ clipboard_content: str,
1724
+ ) -> str:
1725
+ """Paste text from clipboard"""
1726
+ return self._insert_text(content, clipboard_content, position)
1727
+
1728
+ def _store_clipboard_content(self, content: str):
1729
+ """Store content in clipboard (simplified implementation)"""
1730
+ clipboard_file = os.path.join(self.config.temp_dir, "clipboard.txt")
1731
+ try:
1732
+ with open(clipboard_file, "w", encoding="utf-8") as f:
1733
+ f.write(content)
1734
+ except Exception as e:
1735
+ self.logger.warning(f"Failed to store clipboard content: {e}")
1736
+
1737
+ def _get_clipboard_content(self) -> str:
1738
+ """Get content from clipboard"""
1739
+ clipboard_file = os.path.join(self.config.temp_dir, "clipboard.txt")
1740
+ try:
1741
+ with open(clipboard_file, "r", encoding="utf-8") as f:
1742
+ return f.read()
1743
+ except Exception:
1744
+ return ""
1745
+
1746
+ def _apply_text_formatting(
1747
+ self,
1748
+ content: str,
1749
+ text_to_format: str,
1750
+ format_type: EditOperation,
1751
+ options: Optional[Dict[str, Any]],
1752
+ ) -> str:
1753
+ """Apply formatting to all occurrences of specific text"""
1754
+ if format_type == EditOperation.BOLD:
1755
+ replacement = f"**{text_to_format}**"
1756
+ elif format_type == EditOperation.ITALIC:
1757
+ replacement = f"*{text_to_format}*"
1758
+ elif format_type == EditOperation.UNDERLINE:
1759
+ replacement = f"<u>{text_to_format}</u>"
1760
+ elif format_type == EditOperation.STRIKETHROUGH:
1761
+ replacement = f"~~{text_to_format}~~"
1762
+ elif format_type == EditOperation.HIGHLIGHT:
1763
+ color = options.get("color", "yellow") if options else "yellow"
1764
+ replacement = f'<mark style="background-color: {color}">{text_to_format}</mark>'
1765
+ else:
1766
+ replacement = text_to_format
1767
+
1768
+ return content.replace(text_to_format, replacement)
1769
+
1770
+ def _perform_find_replace(
1771
+ self,
1772
+ content: str,
1773
+ find_text: str,
1774
+ replace_text: str,
1775
+ replace_all: bool,
1776
+ case_sensitive: bool,
1777
+ regex_mode: bool,
1778
+ ) -> Tuple[str, int]:
1779
+ """Perform find and replace operation"""
1780
+ import re
1781
+
1782
+ replacements = 0
1783
+
1784
+ if regex_mode:
1785
+ flags = 0 if case_sensitive else re.IGNORECASE
1786
+ if replace_all:
1787
+ new_content, replacements = re.subn(find_text, replace_text, content, flags=flags)
1788
+ else:
1789
+ new_content = re.sub(find_text, replace_text, content, count=1, flags=flags)
1790
+ replacements = 1 if new_content != content else 0
1791
+ else:
1792
+ if case_sensitive:
1793
+ if replace_all:
1794
+ replacements = content.count(find_text)
1795
+ new_content = content.replace(find_text, replace_text)
1796
+ else:
1797
+ new_content = content.replace(find_text, replace_text, 1)
1798
+ replacements = 1 if new_content != content else 0
1799
+ else:
1800
+ # Case insensitive replacement
1801
+ import re
1802
+
1803
+ pattern = re.escape(find_text)
1804
+ if replace_all:
1805
+ new_content, replacements = re.subn(
1806
+ pattern, replace_text, content, flags=re.IGNORECASE
1807
+ )
1808
+ else:
1809
+ new_content = re.sub(
1810
+ pattern,
1811
+ replace_text,
1812
+ content,
1813
+ count=1,
1814
+ flags=re.IGNORECASE,
1815
+ )
1816
+ replacements = 1 if new_content != content else 0
1817
+
1818
+ return new_content, replacements