aiecs 1.0.1__py3-none-any.whl → 1.7.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show
  1. aiecs/__init__.py +13 -16
  2. aiecs/__main__.py +7 -7
  3. aiecs/aiecs_client.py +269 -75
  4. aiecs/application/executors/operation_executor.py +79 -54
  5. aiecs/application/knowledge_graph/__init__.py +7 -0
  6. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  7. aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
  8. aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
  11. aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
  12. aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
  13. aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
  14. aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
  15. aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
  16. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  17. aiecs/application/knowledge_graph/extractors/base.py +98 -0
  18. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
  19. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
  20. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
  21. aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
  22. aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
  23. aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
  24. aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
  25. aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
  26. aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
  27. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
  28. aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
  29. aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
  30. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
  31. aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
  32. aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
  33. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
  34. aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
  35. aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
  36. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  37. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
  38. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
  39. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  40. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
  41. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  42. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  43. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
  44. aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
  45. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
  46. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  47. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  48. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
  49. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
  50. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
  51. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
  52. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
  53. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
  54. aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
  55. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
  56. aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
  57. aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
  58. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
  59. aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
  60. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  61. aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
  62. aiecs/application/knowledge_graph/search/reranker.py +293 -0
  63. aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
  64. aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
  65. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  66. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
  67. aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
  68. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  69. aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
  70. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  71. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
  72. aiecs/common/__init__.py +9 -0
  73. aiecs/common/knowledge_graph/__init__.py +17 -0
  74. aiecs/common/knowledge_graph/runnable.py +471 -0
  75. aiecs/config/__init__.py +20 -5
  76. aiecs/config/config.py +762 -31
  77. aiecs/config/graph_config.py +131 -0
  78. aiecs/config/tool_config.py +435 -0
  79. aiecs/core/__init__.py +29 -13
  80. aiecs/core/interface/__init__.py +2 -2
  81. aiecs/core/interface/execution_interface.py +22 -22
  82. aiecs/core/interface/storage_interface.py +37 -88
  83. aiecs/core/registry/__init__.py +31 -0
  84. aiecs/core/registry/service_registry.py +92 -0
  85. aiecs/domain/__init__.py +270 -1
  86. aiecs/domain/agent/__init__.py +191 -0
  87. aiecs/domain/agent/base_agent.py +3949 -0
  88. aiecs/domain/agent/exceptions.py +99 -0
  89. aiecs/domain/agent/graph_aware_mixin.py +569 -0
  90. aiecs/domain/agent/hybrid_agent.py +1731 -0
  91. aiecs/domain/agent/integration/__init__.py +29 -0
  92. aiecs/domain/agent/integration/context_compressor.py +216 -0
  93. aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
  94. aiecs/domain/agent/integration/protocols.py +281 -0
  95. aiecs/domain/agent/integration/retry_policy.py +218 -0
  96. aiecs/domain/agent/integration/role_config.py +213 -0
  97. aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
  98. aiecs/domain/agent/lifecycle.py +291 -0
  99. aiecs/domain/agent/llm_agent.py +692 -0
  100. aiecs/domain/agent/memory/__init__.py +12 -0
  101. aiecs/domain/agent/memory/conversation.py +1124 -0
  102. aiecs/domain/agent/migration/__init__.py +14 -0
  103. aiecs/domain/agent/migration/conversion.py +163 -0
  104. aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
  105. aiecs/domain/agent/models.py +894 -0
  106. aiecs/domain/agent/observability.py +479 -0
  107. aiecs/domain/agent/persistence.py +449 -0
  108. aiecs/domain/agent/prompts/__init__.py +29 -0
  109. aiecs/domain/agent/prompts/builder.py +159 -0
  110. aiecs/domain/agent/prompts/formatters.py +187 -0
  111. aiecs/domain/agent/prompts/template.py +255 -0
  112. aiecs/domain/agent/registry.py +253 -0
  113. aiecs/domain/agent/tool_agent.py +444 -0
  114. aiecs/domain/agent/tools/__init__.py +15 -0
  115. aiecs/domain/agent/tools/schema_generator.py +377 -0
  116. aiecs/domain/community/__init__.py +155 -0
  117. aiecs/domain/community/agent_adapter.py +469 -0
  118. aiecs/domain/community/analytics.py +432 -0
  119. aiecs/domain/community/collaborative_workflow.py +648 -0
  120. aiecs/domain/community/communication_hub.py +634 -0
  121. aiecs/domain/community/community_builder.py +320 -0
  122. aiecs/domain/community/community_integration.py +796 -0
  123. aiecs/domain/community/community_manager.py +803 -0
  124. aiecs/domain/community/decision_engine.py +849 -0
  125. aiecs/domain/community/exceptions.py +231 -0
  126. aiecs/domain/community/models/__init__.py +33 -0
  127. aiecs/domain/community/models/community_models.py +234 -0
  128. aiecs/domain/community/resource_manager.py +461 -0
  129. aiecs/domain/community/shared_context_manager.py +589 -0
  130. aiecs/domain/context/__init__.py +40 -10
  131. aiecs/domain/context/context_engine.py +1910 -0
  132. aiecs/domain/context/conversation_models.py +87 -53
  133. aiecs/domain/context/graph_memory.py +582 -0
  134. aiecs/domain/execution/model.py +12 -4
  135. aiecs/domain/knowledge_graph/__init__.py +19 -0
  136. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  137. aiecs/domain/knowledge_graph/models/entity.py +148 -0
  138. aiecs/domain/knowledge_graph/models/evidence.py +178 -0
  139. aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
  140. aiecs/domain/knowledge_graph/models/path.py +171 -0
  141. aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
  142. aiecs/domain/knowledge_graph/models/query.py +261 -0
  143. aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
  144. aiecs/domain/knowledge_graph/models/relation.py +202 -0
  145. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  146. aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
  147. aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
  148. aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
  149. aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
  150. aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
  151. aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
  152. aiecs/domain/task/dsl_processor.py +172 -56
  153. aiecs/domain/task/model.py +20 -8
  154. aiecs/domain/task/task_context.py +27 -24
  155. aiecs/infrastructure/__init__.py +0 -2
  156. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  157. aiecs/infrastructure/graph_storage/base.py +837 -0
  158. aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
  159. aiecs/infrastructure/graph_storage/cache.py +424 -0
  160. aiecs/infrastructure/graph_storage/distributed.py +223 -0
  161. aiecs/infrastructure/graph_storage/error_handling.py +380 -0
  162. aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
  163. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  164. aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
  165. aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
  166. aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
  167. aiecs/infrastructure/graph_storage/metrics.py +344 -0
  168. aiecs/infrastructure/graph_storage/migration.py +400 -0
  169. aiecs/infrastructure/graph_storage/pagination.py +483 -0
  170. aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
  171. aiecs/infrastructure/graph_storage/postgres.py +1563 -0
  172. aiecs/infrastructure/graph_storage/property_storage.py +353 -0
  173. aiecs/infrastructure/graph_storage/protocols.py +76 -0
  174. aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
  175. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  176. aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
  177. aiecs/infrastructure/graph_storage/streaming.py +487 -0
  178. aiecs/infrastructure/graph_storage/tenant.py +412 -0
  179. aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
  180. aiecs/infrastructure/messaging/websocket_manager.py +51 -35
  181. aiecs/infrastructure/monitoring/__init__.py +22 -0
  182. aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
  183. aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
  184. aiecs/infrastructure/monitoring/structured_logger.py +3 -7
  185. aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
  186. aiecs/infrastructure/persistence/__init__.py +14 -1
  187. aiecs/infrastructure/persistence/context_engine_client.py +184 -0
  188. aiecs/infrastructure/persistence/database_manager.py +67 -43
  189. aiecs/infrastructure/persistence/file_storage.py +180 -103
  190. aiecs/infrastructure/persistence/redis_client.py +74 -21
  191. aiecs/llm/__init__.py +73 -25
  192. aiecs/llm/callbacks/__init__.py +11 -0
  193. aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
  194. aiecs/llm/client_factory.py +230 -37
  195. aiecs/llm/client_resolver.py +155 -0
  196. aiecs/llm/clients/__init__.py +38 -0
  197. aiecs/llm/clients/base_client.py +328 -0
  198. aiecs/llm/clients/google_function_calling_mixin.py +415 -0
  199. aiecs/llm/clients/googleai_client.py +314 -0
  200. aiecs/llm/clients/openai_client.py +158 -0
  201. aiecs/llm/clients/openai_compatible_mixin.py +367 -0
  202. aiecs/llm/clients/vertex_client.py +1186 -0
  203. aiecs/llm/clients/xai_client.py +201 -0
  204. aiecs/llm/config/__init__.py +51 -0
  205. aiecs/llm/config/config_loader.py +272 -0
  206. aiecs/llm/config/config_validator.py +206 -0
  207. aiecs/llm/config/model_config.py +143 -0
  208. aiecs/llm/protocols.py +149 -0
  209. aiecs/llm/utils/__init__.py +10 -0
  210. aiecs/llm/utils/validate_config.py +89 -0
  211. aiecs/main.py +140 -121
  212. aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
  213. aiecs/scripts/aid/__init__.py +19 -0
  214. aiecs/scripts/aid/module_checker.py +499 -0
  215. aiecs/scripts/aid/version_manager.py +235 -0
  216. aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
  217. aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
  218. aiecs/scripts/dependance_check/__init__.py +15 -0
  219. aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
  220. aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
  221. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
  222. aiecs/scripts/dependance_patch/__init__.py +7 -0
  223. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  224. aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
  225. aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
  226. aiecs/scripts/knowledge_graph/__init__.py +3 -0
  227. aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
  228. aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
  229. aiecs/scripts/tools_develop/README.md +671 -0
  230. aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
  231. aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
  232. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  233. aiecs/scripts/tools_develop/__init__.py +21 -0
  234. aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
  235. aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
  236. aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
  237. aiecs/scripts/tools_develop/schema_coverage.py +511 -0
  238. aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
  239. aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
  240. aiecs/scripts/tools_develop/verify_tools.py +352 -0
  241. aiecs/tasks/__init__.py +0 -1
  242. aiecs/tasks/worker.py +115 -47
  243. aiecs/tools/__init__.py +194 -72
  244. aiecs/tools/apisource/__init__.py +99 -0
  245. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  246. aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
  247. aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
  248. aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
  249. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  250. aiecs/tools/apisource/monitoring/metrics.py +330 -0
  251. aiecs/tools/apisource/providers/__init__.py +112 -0
  252. aiecs/tools/apisource/providers/base.py +671 -0
  253. aiecs/tools/apisource/providers/census.py +397 -0
  254. aiecs/tools/apisource/providers/fred.py +535 -0
  255. aiecs/tools/apisource/providers/newsapi.py +409 -0
  256. aiecs/tools/apisource/providers/worldbank.py +352 -0
  257. aiecs/tools/apisource/reliability/__init__.py +12 -0
  258. aiecs/tools/apisource/reliability/error_handler.py +363 -0
  259. aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
  260. aiecs/tools/apisource/tool.py +832 -0
  261. aiecs/tools/apisource/utils/__init__.py +9 -0
  262. aiecs/tools/apisource/utils/validators.py +334 -0
  263. aiecs/tools/base_tool.py +415 -21
  264. aiecs/tools/docs/__init__.py +121 -0
  265. aiecs/tools/docs/ai_document_orchestrator.py +607 -0
  266. aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
  267. aiecs/tools/docs/content_insertion_tool.py +1320 -0
  268. aiecs/tools/docs/document_creator_tool.py +1464 -0
  269. aiecs/tools/docs/document_layout_tool.py +1160 -0
  270. aiecs/tools/docs/document_parser_tool.py +1016 -0
  271. aiecs/tools/docs/document_writer_tool.py +2008 -0
  272. aiecs/tools/knowledge_graph/__init__.py +17 -0
  273. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
  274. aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
  275. aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
  276. aiecs/tools/langchain_adapter.py +300 -138
  277. aiecs/tools/schema_generator.py +455 -0
  278. aiecs/tools/search_tool/__init__.py +100 -0
  279. aiecs/tools/search_tool/analyzers.py +581 -0
  280. aiecs/tools/search_tool/cache.py +264 -0
  281. aiecs/tools/search_tool/constants.py +128 -0
  282. aiecs/tools/search_tool/context.py +224 -0
  283. aiecs/tools/search_tool/core.py +778 -0
  284. aiecs/tools/search_tool/deduplicator.py +119 -0
  285. aiecs/tools/search_tool/error_handler.py +242 -0
  286. aiecs/tools/search_tool/metrics.py +343 -0
  287. aiecs/tools/search_tool/rate_limiter.py +172 -0
  288. aiecs/tools/search_tool/schemas.py +275 -0
  289. aiecs/tools/statistics/__init__.py +80 -0
  290. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
  291. aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
  292. aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
  293. aiecs/tools/statistics/data_loader_tool.py +555 -0
  294. aiecs/tools/statistics/data_profiler_tool.py +638 -0
  295. aiecs/tools/statistics/data_transformer_tool.py +580 -0
  296. aiecs/tools/statistics/data_visualizer_tool.py +498 -0
  297. aiecs/tools/statistics/model_trainer_tool.py +507 -0
  298. aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
  299. aiecs/tools/task_tools/__init__.py +49 -36
  300. aiecs/tools/task_tools/chart_tool.py +200 -184
  301. aiecs/tools/task_tools/classfire_tool.py +268 -267
  302. aiecs/tools/task_tools/image_tool.py +220 -141
  303. aiecs/tools/task_tools/office_tool.py +226 -146
  304. aiecs/tools/task_tools/pandas_tool.py +477 -121
  305. aiecs/tools/task_tools/report_tool.py +390 -142
  306. aiecs/tools/task_tools/research_tool.py +149 -79
  307. aiecs/tools/task_tools/scraper_tool.py +339 -145
  308. aiecs/tools/task_tools/stats_tool.py +448 -209
  309. aiecs/tools/temp_file_manager.py +26 -24
  310. aiecs/tools/tool_executor/__init__.py +18 -16
  311. aiecs/tools/tool_executor/tool_executor.py +364 -52
  312. aiecs/utils/LLM_output_structor.py +74 -48
  313. aiecs/utils/__init__.py +14 -3
  314. aiecs/utils/base_callback.py +0 -3
  315. aiecs/utils/cache_provider.py +696 -0
  316. aiecs/utils/execution_utils.py +50 -31
  317. aiecs/utils/prompt_loader.py +1 -0
  318. aiecs/utils/token_usage_repository.py +37 -11
  319. aiecs/ws/socket_server.py +14 -4
  320. {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/METADATA +52 -15
  321. aiecs-1.7.17.dist-info/RECORD +337 -0
  322. aiecs-1.7.17.dist-info/entry_points.txt +13 -0
  323. aiecs/config/registry.py +0 -19
  324. aiecs/domain/context/content_engine.py +0 -982
  325. aiecs/llm/base_client.py +0 -99
  326. aiecs/llm/openai_client.py +0 -125
  327. aiecs/llm/vertex_client.py +0 -186
  328. aiecs/llm/xai_client.py +0 -184
  329. aiecs/scripts/dependency_checker.py +0 -857
  330. aiecs/scripts/quick_dependency_check.py +0 -269
  331. aiecs/tools/task_tools/search_api.py +0 -7
  332. aiecs-1.0.1.dist-info/RECORD +0 -90
  333. aiecs-1.0.1.dist-info/entry_points.txt +0 -7
  334. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  335. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  336. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  337. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  338. {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/WHEEL +0 -0
  339. {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/licenses/LICENSE +0 -0
  340. {aiecs-1.0.1.dist-info → aiecs-1.7.17.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,2008 @@
1
+ import os
2
+ import json
3
+ import uuid
4
+ import hashlib
5
+ import logging
6
+ import asyncio
7
+ import shutil
8
+ from typing import Dict, Any, List, Optional, Union, Tuple
9
+ from enum import Enum
10
+ from datetime import datetime
11
+ from pathlib import Path
12
+ import tempfile
13
+
14
+ from pydantic import BaseModel, Field
15
+ from pydantic_settings import BaseSettings, SettingsConfigDict
16
+
17
+ from aiecs.tools.base_tool import BaseTool
18
+ from aiecs.tools import register_tool
19
+
20
+
21
+ class DocumentFormat(str, Enum):
22
+ """Supported document formats for writing"""
23
+
24
+ TXT = "txt"
25
+ PLAIN_TEXT = "txt" # Alias for TXT
26
+ JSON = "json"
27
+ CSV = "csv"
28
+ XML = "xml"
29
+ MARKDOWN = "md"
30
+ HTML = "html"
31
+ YAML = "yaml"
32
+ PDF = "pdf"
33
+ DOCX = "docx"
34
+ XLSX = "xlsx"
35
+ PPTX = "pptx"
36
+ PPT = "ppt"
37
+ BINARY = "binary"
38
+
39
+
40
+ class WriteMode(str, Enum):
41
+ """Document writing modes"""
42
+
43
+ CREATE = "create" # 创建新文件,如果存在则失败
44
+ OVERWRITE = "overwrite" # 覆盖现有文件
45
+ APPEND = "append" # 追加到现有文件
46
+ UPDATE = "update" # 更新现有文件(智能合并)
47
+ BACKUP_WRITE = "backup_write" # 备份后写入
48
+ VERSION_WRITE = "version_write" # 版本化写入
49
+ INSERT = "insert" # 在指定位置插入内容
50
+ REPLACE = "replace" # 替换指定内容
51
+ DELETE = "delete" # 删除指定内容
52
+
53
+
54
+ class EditOperation(str, Enum):
55
+ """Advanced edit operations"""
56
+
57
+ BOLD = "bold" # 加粗文本
58
+ ITALIC = "italic" # 斜体文本
59
+ UNDERLINE = "underline" # 下划线文本
60
+ STRIKETHROUGH = "strikethrough" # 删除线文本
61
+ HIGHLIGHT = "highlight" # 高亮文本
62
+ INSERT_TEXT = "insert_text" # 插入文本
63
+ DELETE_TEXT = "delete_text" # 删除文本
64
+ REPLACE_TEXT = "replace_text" # 替换文本
65
+ COPY_TEXT = "copy_text" # 复制文本
66
+ CUT_TEXT = "cut_text" # 剪切文本
67
+ PASTE_TEXT = "paste_text" # 粘贴文本
68
+ FIND_REPLACE = "find_replace" # 查找替换
69
+ INSERT_LINE = "insert_line" # 插入行
70
+ DELETE_LINE = "delete_line" # 删除行
71
+ MOVE_LINE = "move_line" # 移动行
72
+
73
+
74
+ class EncodingType(str, Enum):
75
+ """Text encoding types"""
76
+
77
+ UTF8 = "utf-8"
78
+ UTF16 = "utf-16"
79
+ ASCII = "ascii"
80
+ GBK = "gbk"
81
+ AUTO = "auto"
82
+
83
+
84
+ class ValidationLevel(str, Enum):
85
+ """Content validation levels"""
86
+
87
+ NONE = "none" # 无验证
88
+ BASIC = "basic" # 基础验证(格式、大小)
89
+ STRICT = "strict" # 严格验证(内容、结构)
90
+ ENTERPRISE = "enterprise" # 企业级验证(安全、合规)
91
+
92
+
93
+ class DocumentWriterError(Exception):
94
+ """Base exception for document writer errors"""
95
+
96
+
97
+ class WriteError(DocumentWriterError):
98
+ """Raised when write operations fail"""
99
+
100
+
101
+ class ValidationError(DocumentWriterError):
102
+ """Raised when validation fails"""
103
+
104
+
105
+ class SecurityError(DocumentWriterError):
106
+ """Raised when security validation fails"""
107
+
108
+
109
+ class WritePermissionError(DocumentWriterError):
110
+ """Raised when write permission is denied"""
111
+
112
+
113
+ class ContentValidationError(DocumentWriterError):
114
+ """Raised when content validation fails"""
115
+
116
+
117
+ class StorageError(DocumentWriterError):
118
+ """Raised when storage operations fail"""
119
+
120
+
121
+ @register_tool("document_writer")
122
+ class DocumentWriterTool(BaseTool):
123
+ """
124
+ Modern high-performance document writing component that can:
125
+ 1. Handle multiple document formats and encodings
126
+ 2. Provide production-grade write operations with validation
127
+ 3. Support various write modes (create, overwrite, append, update)
128
+ 4. Implement backup and versioning strategies
129
+ 5. Ensure atomic operations and data integrity
130
+ 6. Support both local and cloud storage
131
+
132
+ Production Features:
133
+ - Atomic writes (no partial writes)
134
+ - Content validation and security scanning
135
+ - Automatic backup and versioning
136
+ - Write permission and quota checks
137
+ - Transaction-like operations
138
+ - Audit logging
139
+ """
140
+
141
+ # Configuration schema
142
+ class Config(BaseSettings):
143
+ """Configuration for the document writer tool
144
+
145
+ Automatically reads from environment variables with DOC_WRITER_ prefix.
146
+ Example: DOC_WRITER_GCS_PROJECT_ID -> gcs_project_id
147
+ """
148
+
149
+ model_config = SettingsConfigDict(env_prefix="DOC_WRITER_")
150
+
151
+ temp_dir: str = Field(
152
+ default=os.path.join(tempfile.gettempdir(), "document_writer"),
153
+ description="Temporary directory for document processing",
154
+ )
155
+ backup_dir: str = Field(
156
+ default=os.path.join(tempfile.gettempdir(), "document_backups"),
157
+ description="Directory for document backups",
158
+ )
159
+ output_dir: Optional[str] = Field(default=None, description="Default output directory for documents")
160
+ max_file_size: int = Field(default=100 * 1024 * 1024, description="Maximum file size in bytes")
161
+ max_backup_versions: int = Field(default=10, description="Maximum number of backup versions to keep")
162
+ default_encoding: str = Field(default="utf-8", description="Default text encoding for documents")
163
+ enable_backup: bool = Field(
164
+ default=True,
165
+ description="Whether to enable automatic backup functionality",
166
+ )
167
+ enable_versioning: bool = Field(default=True, description="Whether to enable document versioning")
168
+ enable_content_validation: bool = Field(default=True, description="Whether to enable content validation")
169
+ enable_security_scan: bool = Field(default=True, description="Whether to enable security scanning")
170
+ atomic_write: bool = Field(default=True, description="Whether to use atomic write operations")
171
+ validation_level: str = Field(default="basic", description="Content validation level")
172
+ timeout_seconds: int = Field(default=60, description="Operation timeout in seconds")
173
+ auto_backup: bool = Field(
174
+ default=True,
175
+ description="Whether to automatically backup before write operations",
176
+ )
177
+ atomic_writes: bool = Field(default=True, description="Whether to use atomic write operations")
178
+ default_format: str = Field(default="md", description="Default document format")
179
+ version_control: bool = Field(default=True, description="Whether to enable version control")
180
+ security_scan: bool = Field(default=True, description="Whether to enable security scanning")
181
+ enable_cloud_storage: bool = Field(
182
+ default=True,
183
+ description="Whether to enable cloud storage integration",
184
+ )
185
+ gcs_bucket_name: str = Field(
186
+ default="aiecs-documents",
187
+ description="Google Cloud Storage bucket name",
188
+ )
189
+ gcs_project_id: Optional[str] = Field(default=None, description="Google Cloud Storage project ID")
190
+
191
+ def __init__(self, config: Optional[Dict] = None, **kwargs):
192
+ """Initialize DocumentWriterTool with settings
193
+
194
+ Configuration is automatically loaded by BaseTool from:
195
+ 1. Explicit config dict (highest priority)
196
+ 2. YAML config files (config/tools/document_writer_tool.yaml)
197
+ 3. Environment variables (via dotenv from .env files)
198
+ 4. Tool defaults (lowest priority)
199
+
200
+ Args:
201
+ config: Optional configuration overrides
202
+ **kwargs: Additional arguments passed to BaseTool (e.g., tool_name)
203
+ """
204
+ super().__init__(config, **kwargs)
205
+
206
+ # Configuration is automatically loaded by BaseTool into self._config_obj
207
+ # Access config via self._config_obj (BaseSettings instance)
208
+ self.config = self._config_obj if self._config_obj else self.Config()
209
+
210
+ self.logger = logging.getLogger(__name__)
211
+
212
+ # Create necessary directories
213
+ os.makedirs(self.config.temp_dir, exist_ok=True)
214
+ os.makedirs(self.config.backup_dir, exist_ok=True)
215
+
216
+ # Initialize cloud storage
217
+ self._init_cloud_storage()
218
+
219
+ # Initialize office tool for PPTX/DOCX writing
220
+ self._init_office_tool()
221
+
222
+ # Initialize content validators
223
+ self._init_validators()
224
+
225
+ def _init_cloud_storage(self):
226
+ """Initialize cloud storage for document writing"""
227
+ self.file_storage = None
228
+
229
+ if self.config.enable_cloud_storage:
230
+ try:
231
+ from aiecs.infrastructure.persistence.file_storage import (
232
+ FileStorage,
233
+ )
234
+
235
+ storage_config = {
236
+ "gcs_bucket_name": self.config.gcs_bucket_name,
237
+ "gcs_project_id": self.config.gcs_project_id,
238
+ "enable_local_fallback": True,
239
+ "local_storage_path": self.config.temp_dir,
240
+ }
241
+
242
+ self.file_storage = FileStorage(storage_config)
243
+ # Initialize storage asynchronously if in async context, otherwise defer
244
+ try:
245
+ loop = asyncio.get_running_loop()
246
+ # We're in an async context, create task
247
+ asyncio.create_task(self._init_storage_async())
248
+ except RuntimeError:
249
+ # Not in async context, initialization will happen on first async operation
250
+ # or can be called explicitly via write_document_async
251
+ pass
252
+
253
+ except ImportError:
254
+ self.logger.warning("FileStorage not available, cloud storage disabled")
255
+ except Exception as e:
256
+ self.logger.warning(f"Failed to initialize cloud storage: {e}")
257
+
258
+ async def _init_storage_async(self):
259
+ """Async initialization of file storage"""
260
+ try:
261
+ if self.file_storage:
262
+ await self.file_storage.initialize()
263
+ self.logger.info("Cloud storage initialized successfully")
264
+ except Exception as e:
265
+ self.logger.warning(f"Cloud storage initialization failed: {e}")
266
+ self.file_storage = None
267
+
268
+ def _init_office_tool(self):
269
+ """Initialize office tool for PPTX/DOCX writing"""
270
+ try:
271
+ from aiecs.tools.task_tools.office_tool import OfficeTool
272
+
273
+ self.office_tool = OfficeTool()
274
+ self.logger.info("OfficeTool initialized successfully for PPTX/DOCX support")
275
+ except ImportError:
276
+ self.logger.warning("OfficeTool not available, PPTX/DOCX writing will be limited")
277
+ self.office_tool = None
278
+
279
+ def _init_validators(self):
280
+ """Initialize content validators"""
281
+ self.validators = {
282
+ DocumentFormat.JSON: self._validate_json_content,
283
+ DocumentFormat.XML: self._validate_xml_content,
284
+ DocumentFormat.CSV: self._validate_csv_content,
285
+ DocumentFormat.YAML: self._validate_yaml_content,
286
+ DocumentFormat.HTML: self._validate_html_content,
287
+ }
288
+
289
+ def _run_async_safely(self, coro):
290
+ """Safely run async coroutine from sync context
291
+
292
+ This method handles both cases:
293
+ 1. If already in an async context (event loop running), creates a new event loop in a thread
294
+ 2. If not in async context, uses asyncio.run() to create new event loop
295
+
296
+ Args:
297
+ coro: Coroutine to run
298
+
299
+ Returns:
300
+ Result of the coroutine
301
+ """
302
+ try:
303
+ # Try to get the running event loop
304
+ asyncio.get_running_loop()
305
+ # If we get here, we're in an async context
306
+ # We need to run the coroutine in a separate thread with its own event loop
307
+ import concurrent.futures
308
+ import threading
309
+
310
+ result = None
311
+ exception = None
312
+
313
+ def run_in_thread():
314
+ nonlocal result, exception
315
+ try:
316
+ new_loop = asyncio.new_event_loop()
317
+ asyncio.set_event_loop(new_loop)
318
+ result = new_loop.run_until_complete(coro)
319
+ new_loop.close()
320
+ except Exception as e:
321
+ exception = e
322
+
323
+ thread = threading.Thread(target=run_in_thread)
324
+ thread.start()
325
+ thread.join()
326
+
327
+ if exception:
328
+ raise exception
329
+ return result
330
+ except RuntimeError:
331
+ # No running event loop, safe to use asyncio.run()
332
+ return asyncio.run(coro)
333
+
334
+ # Schema definitions
335
+ class Write_documentSchema(BaseModel):
336
+ """Schema for write_document operation"""
337
+
338
+ target_path: str = Field(description="Target file path (local or cloud)")
339
+ content: Union[str, bytes, Dict, List] = Field(description="Content to write")
340
+ format: DocumentFormat = Field(description="Document format")
341
+ mode: WriteMode = Field(default=WriteMode.CREATE, description="Write mode")
342
+ encoding: EncodingType = Field(default=EncodingType.UTF8, description="Text encoding")
343
+ validation_level: ValidationLevel = Field(default=ValidationLevel.BASIC, description="Validation level")
344
+ metadata: Optional[Dict[str, Any]] = Field(default=None, description="Additional metadata")
345
+ backup_comment: Optional[str] = Field(default=None, description="Backup comment")
346
+
347
+ class Batch_write_documentsSchema(BaseModel):
348
+ """Schema for batch_write_documents operation"""
349
+
350
+ write_operations: List[Dict[str, Any]] = Field(description="List of write operations")
351
+ transaction_mode: bool = Field(default=True, description="Use transaction mode")
352
+ rollback_on_error: bool = Field(default=True, description="Rollback on any error")
353
+
354
+ class Edit_documentSchema(BaseModel):
355
+ """Schema for edit_document operation"""
356
+
357
+ target_path: str = Field(description="Target file path")
358
+ operation: EditOperation = Field(description="Edit operation to perform")
359
+ content: Optional[str] = Field(default=None, description="Content for the operation")
360
+ position: Optional[Dict[str, Any]] = Field(default=None, description="Position info (line, column, offset)")
361
+ selection: Optional[Dict[str, Any]] = Field(default=None, description="Text selection range")
362
+ format_options: Optional[Dict[str, Any]] = Field(default=None, description="Formatting options")
363
+
364
+ class Format_textSchema(BaseModel):
365
+ """Schema for format_text operation"""
366
+
367
+ target_path: str = Field(description="Target file path")
368
+ text_to_format: str = Field(description="Text to apply formatting to")
369
+ format_type: EditOperation = Field(description="Type of formatting")
370
+ format_options: Optional[Dict[str, Any]] = Field(default=None, description="Additional format options")
371
+
372
+ class Find_replaceSchema(BaseModel):
373
+ """Schema for find_replace operation"""
374
+
375
+ target_path: str = Field(description="Target file path")
376
+ find_text: str = Field(description="Text to find")
377
+ replace_text: str = Field(description="Text to replace with")
378
+ replace_all: bool = Field(default=False, description="Replace all occurrences")
379
+ case_sensitive: bool = Field(default=True, description="Case sensitive search")
380
+ regex_mode: bool = Field(default=False, description="Use regex for find/replace")
381
+
382
+ def write_document(
383
+ self,
384
+ target_path: str,
385
+ content: Union[str, bytes, Dict, List],
386
+ format: DocumentFormat,
387
+ mode: WriteMode = WriteMode.CREATE,
388
+ encoding: EncodingType = EncodingType.UTF8,
389
+ validation_level: ValidationLevel = ValidationLevel.BASIC,
390
+ metadata: Optional[Dict[str, Any]] = None,
391
+ backup_comment: Optional[str] = None,
392
+ ) -> Dict[str, Any]:
393
+ """
394
+ Write document with production-grade features
395
+
396
+ Args:
397
+ target_path: Target file path (local or cloud)
398
+ content: Content to write
399
+ format: Document format
400
+ mode: Write mode (create, overwrite, append, update, etc.)
401
+ encoding: Text encoding
402
+ validation_level: Content validation level
403
+ metadata: Additional metadata
404
+ backup_comment: Comment for backup
405
+
406
+ Returns:
407
+ Dict containing write results and metadata
408
+ """
409
+ try:
410
+ start_time = datetime.now()
411
+ operation_id = str(uuid.uuid4())
412
+
413
+ self.logger.info(f"Starting write operation {operation_id}: {target_path}")
414
+
415
+ # Step 1: Validate inputs
416
+ self._validate_write_inputs(target_path, content, format, mode)
417
+
418
+ # Step 2: Prepare content
419
+ processed_content, content_metadata = self._prepare_content(content, format, encoding, validation_level)
420
+
421
+ # Step 3: Handle write mode logic
422
+ write_plan = self._plan_write_operation(target_path, mode, metadata)
423
+
424
+ # Step 4: Create backup if needed
425
+ backup_info = None
426
+ if self.config.enable_backup and mode in [
427
+ WriteMode.OVERWRITE,
428
+ WriteMode.UPDATE,
429
+ ]:
430
+ backup_info = self._create_backup(target_path, backup_comment)
431
+
432
+ # Step 5: Execute atomic write
433
+ write_result = self._run_async_safely(self._execute_atomic_write(target_path, processed_content, format, encoding, write_plan))
434
+
435
+ # Step 6: Update metadata and versioning
436
+ version_info = self._handle_versioning(target_path, content_metadata, metadata)
437
+
438
+ # Step 7: Audit logging
439
+ audit_info = self._log_write_operation(operation_id, target_path, mode, write_result, backup_info)
440
+
441
+ result = {
442
+ "operation_id": operation_id,
443
+ "target_path": target_path,
444
+ "write_mode": mode,
445
+ "format": format,
446
+ "encoding": encoding,
447
+ "content_metadata": content_metadata,
448
+ "write_result": write_result,
449
+ "backup_info": backup_info,
450
+ "version_info": version_info,
451
+ "audit_info": audit_info,
452
+ "processing_metadata": {
453
+ "start_time": start_time.isoformat(),
454
+ "end_time": datetime.now().isoformat(),
455
+ "duration": (datetime.now() - start_time).total_seconds(),
456
+ },
457
+ }
458
+
459
+ self.logger.info(f"Write operation {operation_id} completed successfully")
460
+ return result
461
+
462
+ except Exception as e:
463
+ self.logger.error(f"Write operation failed for {target_path}: {str(e)}")
464
+ # Rollback if needed
465
+ if "backup_info" in locals() and backup_info:
466
+ self._rollback_from_backup(target_path, backup_info)
467
+ raise DocumentWriterError(f"Document write failed: {str(e)}")
468
+
469
+ async def write_document_async(
470
+ self,
471
+ target_path: str,
472
+ content: Union[str, bytes, Dict, List],
473
+ format: DocumentFormat,
474
+ mode: WriteMode = WriteMode.CREATE,
475
+ encoding: EncodingType = EncodingType.UTF8,
476
+ validation_level: ValidationLevel = ValidationLevel.BASIC,
477
+ metadata: Optional[Dict[str, Any]] = None,
478
+ backup_comment: Optional[str] = None,
479
+ ) -> Dict[str, Any]:
480
+ """Async version of write_document"""
481
+ return await asyncio.to_thread(
482
+ self.write_document,
483
+ target_path=target_path,
484
+ content=content,
485
+ format=format,
486
+ mode=mode,
487
+ encoding=encoding,
488
+ validation_level=validation_level,
489
+ metadata=metadata,
490
+ backup_comment=backup_comment,
491
+ )
492
+
493
+ def batch_write_documents(
494
+ self,
495
+ write_operations: List[Dict[str, Any]],
496
+ transaction_mode: bool = True,
497
+ rollback_on_error: bool = True,
498
+ ) -> Dict[str, Any]:
499
+ """
500
+ Batch write multiple documents with transaction support
501
+
502
+ Args:
503
+ write_operations: List of write operation dictionaries
504
+ transaction_mode: Use transaction mode for atomicity
505
+ rollback_on_error: Rollback all operations on any error
506
+
507
+ Returns:
508
+ Dict containing batch write results
509
+ """
510
+ try:
511
+ start_time = datetime.now()
512
+ batch_id = str(uuid.uuid4())
513
+
514
+ self.logger.info(f"Starting batch write operation {batch_id}: {len(write_operations)} operations")
515
+
516
+ completed_operations = []
517
+ backup_operations = []
518
+
519
+ try:
520
+ for i, operation in enumerate(write_operations):
521
+ self.logger.info(f"Processing operation {i+1}/{len(write_operations)}")
522
+
523
+ # Execute individual write operation
524
+ result = self.write_document(**operation)
525
+ completed_operations.append(
526
+ {
527
+ "index": i,
528
+ "operation": operation,
529
+ "result": result,
530
+ "status": "success",
531
+ }
532
+ )
533
+
534
+ # Track backup info for potential rollback
535
+ if result.get("backup_info"):
536
+ backup_operations.append(result["backup_info"])
537
+
538
+ batch_result = {
539
+ "batch_id": batch_id,
540
+ "total_operations": len(write_operations),
541
+ "successful_operations": len(completed_operations),
542
+ "failed_operations": 0,
543
+ "operations": completed_operations,
544
+ "transaction_mode": transaction_mode,
545
+ "batch_metadata": {
546
+ "start_time": start_time.isoformat(),
547
+ "end_time": datetime.now().isoformat(),
548
+ "duration": (datetime.now() - start_time).total_seconds(),
549
+ },
550
+ }
551
+
552
+ self.logger.info(f"Batch write operation {batch_id} completed successfully")
553
+ return batch_result
554
+
555
+ except Exception as e:
556
+ self.logger.error(f"Batch write operation {batch_id} failed: {str(e)}")
557
+
558
+ if rollback_on_error and transaction_mode:
559
+ self.logger.info(f"Rolling back batch operation {batch_id}")
560
+ self._rollback_batch_operations(completed_operations, backup_operations)
561
+
562
+ # Create failure result
563
+ batch_result = {
564
+ "batch_id": batch_id,
565
+ "total_operations": len(write_operations),
566
+ "successful_operations": len(completed_operations),
567
+ "failed_operations": len(write_operations) - len(completed_operations),
568
+ "operations": completed_operations,
569
+ "error": str(e),
570
+ "transaction_mode": transaction_mode,
571
+ "rollback_performed": rollback_on_error and transaction_mode,
572
+ }
573
+
574
+ raise DocumentWriterError(f"Batch write operation failed: {str(e)}")
575
+
576
+ except Exception as e:
577
+ raise DocumentWriterError(f"Batch write operation failed: {str(e)}")
578
+
579
+ def _validate_write_inputs(
580
+ self,
581
+ target_path: str,
582
+ content: Any,
583
+ format: DocumentFormat,
584
+ mode: WriteMode,
585
+ ):
586
+ """Validate write operation inputs"""
587
+ # Path validation
588
+ if not target_path or not isinstance(target_path, str):
589
+ raise ValueError("Invalid target path")
590
+
591
+ # Content validation
592
+ if content is None:
593
+ raise ValueError("Content cannot be None")
594
+
595
+ # Size validation
596
+ content_size = self._calculate_content_size(content)
597
+ if content_size > self.config.max_file_size:
598
+ raise ValueError(f"Content size {content_size} exceeds maximum {self.config.max_file_size}")
599
+
600
+ # Permission validation
601
+ if not self._check_write_permission(target_path, mode):
602
+ raise WritePermissionError(f"No write permission for {target_path}")
603
+
604
+ def _prepare_content(
605
+ self,
606
+ content: Any,
607
+ format: DocumentFormat,
608
+ encoding: EncodingType,
609
+ validation_level: ValidationLevel,
610
+ ) -> Tuple[Union[str, bytes], Dict]:
611
+ """Prepare and validate content for writing"""
612
+
613
+ # Content conversion based on format
614
+ processed_content: Union[str, bytes]
615
+ if format == DocumentFormat.JSON:
616
+ if isinstance(content, (dict, list)):
617
+ processed_content = json.dumps(content, ensure_ascii=False, indent=2)
618
+ else:
619
+ processed_content = str(content)
620
+ elif format == DocumentFormat.CSV:
621
+ processed_content = self._convert_to_csv(content)
622
+ elif format == DocumentFormat.XML:
623
+ processed_content = self._convert_to_xml(content)
624
+ elif format == DocumentFormat.YAML:
625
+ processed_content = self._convert_to_yaml(content)
626
+ elif format == DocumentFormat.HTML:
627
+ processed_content = self._convert_to_html(content)
628
+ elif format == DocumentFormat.MARKDOWN:
629
+ processed_content = self._convert_to_markdown(content)
630
+ elif format == DocumentFormat.BINARY:
631
+ if isinstance(content, bytes):
632
+ processed_content = content
633
+ else:
634
+ processed_content = str(content).encode(encoding.value)
635
+ else:
636
+ processed_content = str(content)
637
+
638
+ # Content validation
639
+ if self.config.enable_content_validation:
640
+ self._validate_content(processed_content, format, validation_level)
641
+
642
+ # Calculate metadata
643
+ content_metadata = {
644
+ "original_type": type(content).__name__,
645
+ "processed_size": (len(processed_content) if isinstance(processed_content, (str, bytes)) else 0),
646
+ "format": format,
647
+ "encoding": encoding,
648
+ "checksum": self._calculate_checksum(processed_content),
649
+ "validation_level": validation_level,
650
+ "timestamp": datetime.now().isoformat(),
651
+ }
652
+
653
+ return processed_content, content_metadata
654
+
655
+ def _plan_write_operation(self, target_path: str, mode: WriteMode, metadata: Optional[Dict]) -> Dict:
656
+ """Plan the write operation based on mode and target"""
657
+
658
+ plan = {
659
+ "target_path": target_path,
660
+ "mode": mode,
661
+ "file_exists": self._file_exists(target_path),
662
+ "is_cloud_path": self._is_cloud_storage_path(target_path),
663
+ "requires_backup": False,
664
+ "requires_versioning": False,
665
+ "atomic_operation": self.config.atomic_write,
666
+ }
667
+
668
+ if mode == WriteMode.CREATE and plan["file_exists"]:
669
+ raise DocumentWriterError(f"File already exists: {target_path}")
670
+
671
+ if mode in [WriteMode.OVERWRITE, WriteMode.UPDATE] and plan["file_exists"]:
672
+ plan["requires_backup"] = self.config.enable_backup
673
+ plan["requires_versioning"] = self.config.enable_versioning
674
+
675
+ if mode == WriteMode.APPEND and not plan["file_exists"]:
676
+ # Convert to CREATE mode
677
+ plan["mode"] = WriteMode.CREATE
678
+
679
+ return plan
680
+
681
+ def _create_backup(self, target_path: str, comment: Optional[str] = None) -> Dict:
682
+ """Create backup of existing file"""
683
+ if not self._file_exists(target_path):
684
+ return {}
685
+
686
+ try:
687
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
688
+ file_stem = Path(target_path).stem
689
+ file_suffix = Path(target_path).suffix
690
+
691
+ backup_filename = f"{file_stem}_backup_{timestamp}{file_suffix}"
692
+ backup_path = os.path.join(self.config.backup_dir, backup_filename)
693
+
694
+ # Copy file to backup location
695
+ if self._is_cloud_storage_path(target_path):
696
+ backup_path = self._backup_cloud_file(target_path, backup_path)
697
+ else:
698
+ shutil.copy2(target_path, backup_path)
699
+
700
+ backup_info = {
701
+ "original_path": target_path,
702
+ "backup_path": backup_path,
703
+ "timestamp": timestamp,
704
+ "comment": comment,
705
+ "checksum": self._calculate_file_checksum(target_path),
706
+ }
707
+
708
+ self.logger.info(f"Created backup: {backup_path}")
709
+ return backup_info
710
+
711
+ except Exception as e:
712
+ self.logger.error(f"Failed to create backup for {target_path}: {e}")
713
+ raise StorageError(f"Backup creation failed: {e}")
714
+
715
+ async def _execute_atomic_write(
716
+ self,
717
+ target_path: str,
718
+ content: Union[str, bytes],
719
+ format: DocumentFormat,
720
+ encoding: EncodingType,
721
+ plan: Dict,
722
+ ) -> Dict:
723
+ """Execute atomic write operation"""
724
+
725
+ if plan["is_cloud_path"]:
726
+ return await self._write_to_cloud_storage(target_path, content, format, encoding, plan)
727
+ else:
728
+ return self._write_to_local_file(target_path, content, format, encoding, plan)
729
+
730
+ def _write_to_local_file(
731
+ self,
732
+ target_path: str,
733
+ content: Union[str, bytes],
734
+ format: DocumentFormat,
735
+ encoding: EncodingType,
736
+ plan: Dict,
737
+ ) -> Dict:
738
+ """Write to local file system with atomic operation"""
739
+
740
+ try:
741
+ # Handle PPTX format using office_tool
742
+ if format in [DocumentFormat.PPTX, DocumentFormat.PPT]:
743
+ return self._write_pptx_file(target_path, content, plan)
744
+
745
+ # Handle DOCX format using office_tool
746
+ if format == DocumentFormat.DOCX:
747
+ return self._write_docx_file(target_path, content, plan)
748
+
749
+ # Create parent directories
750
+ os.makedirs(os.path.dirname(target_path), exist_ok=True)
751
+
752
+ if plan["atomic_operation"]:
753
+ # Atomic write using temporary file
754
+ temp_path = f"{target_path}.tmp.{uuid.uuid4().hex}"
755
+
756
+ try:
757
+ if plan["mode"] == WriteMode.APPEND and plan["file_exists"]:
758
+ # Read existing content first
759
+ with open(target_path, "rb") as f:
760
+ existing_content = f.read()
761
+
762
+ if isinstance(content, str):
763
+ content = existing_content.decode(encoding.value) + content
764
+ else:
765
+ content = existing_content + content
766
+
767
+ # Write to temporary file
768
+ if isinstance(content, bytes):
769
+ with open(temp_path, "wb") as f:
770
+ f.write(content)
771
+ else:
772
+ # Handle both EncodingType enum and string
773
+ enc_value = encoding.value if hasattr(encoding, "value") else str(encoding)
774
+ with open(temp_path, "w", encoding=enc_value) as f:
775
+ f.write(content)
776
+
777
+ # Atomic move
778
+ shutil.move(temp_path, target_path)
779
+
780
+ finally:
781
+ # Cleanup temp file if it still exists
782
+ if os.path.exists(temp_path):
783
+ os.unlink(temp_path)
784
+ else:
785
+ # Direct write
786
+ mode_map = {
787
+ WriteMode.CREATE: "w",
788
+ WriteMode.OVERWRITE: "w",
789
+ WriteMode.APPEND: "a",
790
+ WriteMode.UPDATE: "w",
791
+ }
792
+
793
+ file_mode = mode_map.get(plan["mode"], "w")
794
+ if isinstance(content, bytes):
795
+ file_mode += "b"
796
+
797
+ # Handle both EncodingType enum and string
798
+ file_enc_value: Optional[str] = None if isinstance(content, bytes) else (encoding.value if hasattr(encoding, "value") else str(encoding))
799
+ with open(target_path, file_mode, encoding=file_enc_value) as f:
800
+ f.write(content)
801
+
802
+ # Get file stats
803
+ stat = os.stat(target_path)
804
+
805
+ return {
806
+ "path": target_path,
807
+ "size": stat.st_size,
808
+ "checksum": self._calculate_file_checksum(target_path),
809
+ "modified_time": datetime.fromtimestamp(stat.st_mtime).isoformat(),
810
+ "atomic_write": plan["atomic_operation"],
811
+ }
812
+
813
+ except Exception as e:
814
+ raise StorageError(f"Local file write failed: {e}")
815
+
816
+ def _write_pptx_file(self, target_path: str, content: Union[str, bytes], plan: Dict) -> Dict:
817
+ """Write content to PPTX file using office_tool"""
818
+ if not self.office_tool:
819
+ raise StorageError("OfficeTool not available. Cannot write PPTX files.")
820
+
821
+ try:
822
+ # Convert bytes to string if needed
823
+ if isinstance(content, bytes):
824
+ content_str = content.decode("utf-8")
825
+ else:
826
+ content_str = str(content)
827
+
828
+ # Parse content to extract slides
829
+ slides = self._parse_content_to_slides(content_str)
830
+
831
+ # Handle append mode
832
+ if plan["mode"] == WriteMode.APPEND and plan["file_exists"]:
833
+ # Read existing slides
834
+ existing_slides = self.office_tool.read_pptx(target_path)
835
+ slides = existing_slides + slides
836
+
837
+ # Use office_tool to write PPTX
838
+ result = self.office_tool.write_pptx(
839
+ slides=slides,
840
+ output_path=target_path,
841
+ image_path=None,
842
+ )
843
+
844
+ if not result.get("success"):
845
+ raise StorageError(f"Failed to write PPTX file: {result}")
846
+
847
+ # Get file stats
848
+ stat = os.stat(target_path)
849
+
850
+ return {
851
+ "path": target_path,
852
+ "size": stat.st_size,
853
+ "checksum": self._calculate_file_checksum(target_path),
854
+ "modified_time": datetime.fromtimestamp(stat.st_mtime).isoformat(),
855
+ "atomic_write": False, # Office tool handles its own atomicity
856
+ }
857
+
858
+ except Exception as e:
859
+ raise StorageError(f"PPTX file write failed: {e}")
860
+
861
+ def _write_docx_file(self, target_path: str, content: Union[str, bytes], plan: Dict) -> Dict:
862
+ """Write content to DOCX file using office_tool"""
863
+ if not self.office_tool:
864
+ raise StorageError("OfficeTool not available. Cannot write DOCX files.")
865
+
866
+ try:
867
+ # Convert bytes to string if needed
868
+ if isinstance(content, bytes):
869
+ content_str = content.decode("utf-8")
870
+ else:
871
+ content_str = str(content)
872
+
873
+ # Handle append mode
874
+ if plan["mode"] == WriteMode.APPEND and plan["file_exists"]:
875
+ # Read existing content
876
+ existing_doc = self.office_tool.read_docx(target_path)
877
+ existing_text = "\n".join(existing_doc.get("paragraphs", []))
878
+ content_str = existing_text + "\n" + content_str
879
+
880
+ # Use office_tool to write DOCX
881
+ result = self.office_tool.write_docx(
882
+ text=content_str,
883
+ output_path=target_path,
884
+ table_data=None,
885
+ )
886
+
887
+ if not result.get("success"):
888
+ raise StorageError(f"Failed to write DOCX file: {result}")
889
+
890
+ # Get file stats
891
+ stat = os.stat(target_path)
892
+
893
+ return {
894
+ "path": target_path,
895
+ "size": stat.st_size,
896
+ "checksum": self._calculate_file_checksum(target_path),
897
+ "modified_time": datetime.fromtimestamp(stat.st_mtime).isoformat(),
898
+ "atomic_write": False, # Office tool handles its own atomicity
899
+ }
900
+
901
+ except Exception as e:
902
+ raise StorageError(f"DOCX file write failed: {e}")
903
+
904
+ def _parse_content_to_slides(self, content: str) -> List[str]:
905
+ """Parse content string into list of slide contents
906
+
907
+ Supports multiple slide separation formats:
908
+ - "---" separator (markdown style)
909
+ - "## Slide X:" headers
910
+ - Empty lines between slides
911
+ """
912
+ slides = []
913
+
914
+ # Split by "---" separator (common in markdown presentations)
915
+ if "---" in content:
916
+ parts = content.split("---")
917
+ for part in parts:
918
+ part = part.strip()
919
+ if part:
920
+ # Remove slide headers like "## Slide X: Title"
921
+ lines = part.split("\n")
922
+ cleaned_lines = []
923
+ for line in lines:
924
+ # Skip slide headers
925
+ if line.strip().startswith("## Slide") and ":" in line:
926
+ continue
927
+ cleaned_lines.append(line)
928
+ slide_content = "\n".join(cleaned_lines).strip()
929
+ if slide_content:
930
+ slides.append(slide_content)
931
+ else:
932
+ # Try to split by "## Slide" headers
933
+ if "## Slide" in content:
934
+ parts = content.split("## Slide")
935
+ for i, part in enumerate(parts):
936
+ if i == 0:
937
+ # First part might be title slide
938
+ part = part.strip()
939
+ if part:
940
+ slides.append(part)
941
+ else:
942
+ # Extract content after "Slide X: Title"
943
+ lines = part.split("\n", 1)
944
+ if len(lines) > 1:
945
+ slide_content = lines[1].strip()
946
+ if slide_content:
947
+ slides.append(slide_content)
948
+ else:
949
+ # Fallback: split by double newlines (paragraph breaks)
950
+ parts = content.split("\n\n")
951
+ current_slide = []
952
+ for part in parts:
953
+ part = part.strip()
954
+ if part:
955
+ # If it's a header, start a new slide
956
+ if part.startswith("#"):
957
+ if current_slide:
958
+ slides.append("\n".join(current_slide))
959
+ current_slide = []
960
+ current_slide.append(part)
961
+
962
+ if current_slide:
963
+ slides.append("\n".join(current_slide))
964
+
965
+ # If no slides found, create a single slide with all content
966
+ if not slides:
967
+ slides = [content.strip()] if content.strip() else [""]
968
+
969
+ return slides
970
+
971
+ async def _write_to_cloud_storage(
972
+ self,
973
+ target_path: str,
974
+ content: Union[str, bytes],
975
+ format: DocumentFormat,
976
+ encoding: EncodingType,
977
+ plan: Dict,
978
+ ) -> Dict:
979
+ """Write to cloud storage"""
980
+
981
+ if not self.file_storage:
982
+ raise StorageError("Cloud storage not available")
983
+
984
+ try:
985
+ storage_path = self._parse_cloud_storage_path(target_path)
986
+
987
+ # Handle append mode for cloud storage
988
+ if plan["mode"] == WriteMode.APPEND and plan["file_exists"]:
989
+ existing_content = await self.file_storage.retrieve(storage_path)
990
+ if isinstance(content, str) and isinstance(existing_content, str):
991
+ content = existing_content + content
992
+ elif isinstance(content, bytes) and isinstance(existing_content, bytes):
993
+ content = existing_content + content
994
+
995
+ # Store in cloud storage
996
+ await self.file_storage.store(storage_path, content)
997
+
998
+ return {
999
+ "path": target_path,
1000
+ "storage_path": storage_path,
1001
+ "size": (len(content) if isinstance(content, (str, bytes)) else 0),
1002
+ "checksum": self._calculate_checksum(content),
1003
+ "cloud_storage": True,
1004
+ }
1005
+
1006
+ except Exception as e:
1007
+ raise StorageError(f"Cloud storage write failed: {e}")
1008
+
1009
+ def _handle_versioning(
1010
+ self,
1011
+ target_path: str,
1012
+ content_metadata: Dict,
1013
+ metadata: Optional[Dict],
1014
+ ) -> Optional[Dict]:
1015
+ """Handle document versioning"""
1016
+
1017
+ if not self.config.enable_versioning:
1018
+ return None
1019
+
1020
+ try:
1021
+ version_info = {
1022
+ "path": target_path,
1023
+ "version": self._get_next_version(target_path),
1024
+ "timestamp": datetime.now().isoformat(),
1025
+ "content_metadata": content_metadata,
1026
+ "user_metadata": metadata or {},
1027
+ }
1028
+
1029
+ # Store version info
1030
+ version_file = f"{target_path}.versions.json"
1031
+ versions = self._load_version_history(version_file)
1032
+ versions.append(version_info)
1033
+
1034
+ # Keep only recent versions
1035
+ if len(versions) > self.config.max_backup_versions:
1036
+ versions = versions[-self.config.max_backup_versions :]
1037
+
1038
+ self._save_version_history(version_file, versions)
1039
+
1040
+ return version_info
1041
+
1042
+ except Exception as e:
1043
+ self.logger.warning(f"Versioning failed for {target_path}: {e}")
1044
+ return None
1045
+
1046
+ def _validate_content(
1047
+ self,
1048
+ content: Union[str, bytes],
1049
+ format: DocumentFormat,
1050
+ validation_level: ValidationLevel,
1051
+ ):
1052
+ """Validate content based on format and validation level"""
1053
+
1054
+ if validation_level == ValidationLevel.NONE:
1055
+ return
1056
+
1057
+ try:
1058
+ # Format-specific validation
1059
+ if format in self.validators:
1060
+ self.validators[format](content, validation_level)
1061
+
1062
+ # Security validation for enterprise level
1063
+ if validation_level == ValidationLevel.ENTERPRISE:
1064
+ self._security_scan_content(content)
1065
+
1066
+ except Exception as e:
1067
+ raise ContentValidationError(f"Content validation failed: {e}")
1068
+
1069
+ def _validate_json_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
1070
+ """Validate JSON content"""
1071
+ try:
1072
+ if isinstance(content, bytes):
1073
+ content = content.decode("utf-8")
1074
+ json.loads(content)
1075
+ except json.JSONDecodeError as e:
1076
+ raise ContentValidationError(f"Invalid JSON: {e}")
1077
+
1078
+ def _validate_xml_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
1079
+ """Validate XML content"""
1080
+ try:
1081
+ import xml.etree.ElementTree as ET
1082
+
1083
+ if isinstance(content, bytes):
1084
+ content = content.decode("utf-8")
1085
+ ET.fromstring(content)
1086
+ except ET.ParseError as e:
1087
+ raise ContentValidationError(f"Invalid XML: {e}")
1088
+
1089
+ def _validate_csv_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
1090
+ """Validate CSV content"""
1091
+ try:
1092
+ import csv
1093
+ import io
1094
+
1095
+ if isinstance(content, bytes):
1096
+ content = content.decode("utf-8")
1097
+ csv.reader(io.StringIO(content))
1098
+ except Exception as e:
1099
+ raise ContentValidationError(f"Invalid CSV: {e}")
1100
+
1101
+ def _validate_yaml_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
1102
+ """Validate YAML content"""
1103
+ try:
1104
+ import yaml
1105
+
1106
+ if isinstance(content, bytes):
1107
+ content = content.decode("utf-8")
1108
+ yaml.safe_load(content)
1109
+ except yaml.YAMLError as e:
1110
+ raise ContentValidationError(f"Invalid YAML: {e}")
1111
+
1112
+ def _validate_html_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
1113
+ """Validate HTML content"""
1114
+ try:
1115
+ from bs4 import BeautifulSoup
1116
+
1117
+ if isinstance(content, bytes):
1118
+ content = content.decode("utf-8")
1119
+ BeautifulSoup(content, "html.parser")
1120
+ except Exception as e:
1121
+ raise ContentValidationError(f"Invalid HTML: {e}")
1122
+
1123
+ def _security_scan_content(self, content: Union[str, bytes]):
1124
+ """Perform security scan on content"""
1125
+ if isinstance(content, bytes):
1126
+ content = content.decode("utf-8", errors="ignore")
1127
+
1128
+ # Check for suspicious patterns
1129
+ suspicious_patterns = [
1130
+ r"<script[^>]*>", # JavaScript
1131
+ r"javascript:", # JavaScript URLs
1132
+ r"vbscript:", # VBScript URLs
1133
+ r"data:.*base64", # Base64 data URLs
1134
+ r"eval\s*\(", # eval() calls
1135
+ r"exec\s*\(", # exec() calls
1136
+ ]
1137
+
1138
+ import re
1139
+
1140
+ for pattern in suspicious_patterns:
1141
+ if re.search(pattern, content, re.IGNORECASE):
1142
+ raise ContentValidationError("Security scan failed: suspicious pattern detected")
1143
+
1144
+ # Helper methods
1145
+ def _calculate_content_size(self, content: Any) -> int:
1146
+ """Calculate content size in bytes"""
1147
+ if isinstance(content, bytes):
1148
+ return len(content)
1149
+ elif isinstance(content, str):
1150
+ return len(content.encode("utf-8"))
1151
+ else:
1152
+ return len(str(content).encode("utf-8"))
1153
+
1154
+ def _calculate_checksum(self, content: Union[str, bytes]) -> str:
1155
+ """Calculate content checksum"""
1156
+ if isinstance(content, str):
1157
+ content = content.encode("utf-8")
1158
+ return hashlib.sha256(content).hexdigest()
1159
+
1160
+ def _calculate_file_checksum(self, file_path: str) -> str:
1161
+ """Calculate file checksum"""
1162
+ hash_sha256 = hashlib.sha256()
1163
+ with open(file_path, "rb") as f:
1164
+ for chunk in iter(lambda: f.read(4096), b""):
1165
+ hash_sha256.update(chunk)
1166
+ return hash_sha256.hexdigest()
1167
+
1168
+ def _check_write_permission(self, target_path: str, mode: WriteMode) -> bool:
1169
+ """Check write permission for target path"""
1170
+ try:
1171
+ if self._is_cloud_storage_path(target_path):
1172
+ return self.file_storage is not None
1173
+
1174
+ parent_dir = os.path.dirname(target_path)
1175
+ if not os.path.exists(parent_dir):
1176
+ # Check if we can create the directory
1177
+ return os.access(os.path.dirname(parent_dir), os.W_OK)
1178
+
1179
+ if os.path.exists(target_path):
1180
+ return os.access(target_path, os.W_OK)
1181
+ else:
1182
+ return os.access(parent_dir, os.W_OK)
1183
+
1184
+ except Exception:
1185
+ return False
1186
+
1187
+ def _file_exists(self, file_path: str) -> bool:
1188
+ """Check if file exists (local or cloud)"""
1189
+ if self._is_cloud_storage_path(file_path):
1190
+ # For cloud storage, we'd need to implement exists check
1191
+ return False # Simplified for now
1192
+ else:
1193
+ return os.path.exists(file_path)
1194
+
1195
+ def _is_cloud_storage_path(self, source: str) -> bool:
1196
+ """Check if source is a cloud storage path"""
1197
+ cloud_schemes = ["gs", "s3", "azure", "cloud"]
1198
+ try:
1199
+ from urllib.parse import urlparse
1200
+
1201
+ parsed = urlparse(source)
1202
+ return parsed.scheme in cloud_schemes
1203
+ except Exception:
1204
+ return False
1205
+
1206
+ def _parse_cloud_storage_path(self, source: str) -> str:
1207
+ """Parse cloud storage path to get storage key"""
1208
+ try:
1209
+ from urllib.parse import urlparse
1210
+
1211
+ parsed = urlparse(source)
1212
+ return parsed.path.lstrip("/")
1213
+ except Exception:
1214
+ return source
1215
+
1216
+ # Content conversion methods
1217
+ def _convert_to_csv(self, content: Any) -> str:
1218
+ """Convert content to CSV format"""
1219
+ import csv
1220
+ import io
1221
+
1222
+ output = io.StringIO()
1223
+ writer = csv.writer(output)
1224
+
1225
+ if isinstance(content, list):
1226
+ for row in content:
1227
+ if isinstance(row, (list, tuple)):
1228
+ writer.writerow(row)
1229
+ else:
1230
+ writer.writerow([row])
1231
+ elif isinstance(content, dict):
1232
+ # Convert dict to CSV with headers
1233
+ if content:
1234
+ headers = list(content.keys())
1235
+ writer.writerow(headers)
1236
+ writer.writerow([content[h] for h in headers])
1237
+ else:
1238
+ writer.writerow([str(content)])
1239
+
1240
+ return output.getvalue()
1241
+
1242
+ def _convert_to_xml(self, content: Any) -> str:
1243
+ """Convert content to XML format"""
1244
+ import xml.etree.ElementTree as ET
1245
+
1246
+ if isinstance(content, dict):
1247
+ root = ET.Element("document")
1248
+ for key, value in content.items():
1249
+ elem = ET.SubElement(root, str(key))
1250
+ elem.text = str(value)
1251
+ return ET.tostring(root, encoding="unicode")
1252
+ else:
1253
+ root = ET.Element("document")
1254
+ root.text = str(content)
1255
+ return ET.tostring(root, encoding="unicode")
1256
+
1257
+ def _convert_to_yaml(self, content: Any) -> str:
1258
+ """Convert content to YAML format"""
1259
+ try:
1260
+ import yaml
1261
+
1262
+ return yaml.dump(content, default_flow_style=False, allow_unicode=True)
1263
+ except ImportError:
1264
+ # Fallback to simple string representation
1265
+ return str(content)
1266
+
1267
+ def _convert_to_html(self, content: Any) -> str:
1268
+ """Convert content to HTML format"""
1269
+ if isinstance(content, dict):
1270
+ html = "<html><body>\n"
1271
+ for key, value in content.items():
1272
+ html += f"<h3>{key}</h3>\n<p>{value}</p>\n"
1273
+ html += "</body></html>"
1274
+ return html
1275
+ else:
1276
+ return f"<html><body><pre>{str(content)}</pre></body></html>"
1277
+
1278
+ def _convert_to_markdown(self, content: Any) -> str:
1279
+ """Convert content to Markdown format"""
1280
+ if isinstance(content, dict):
1281
+ md = ""
1282
+ for key, value in content.items():
1283
+ md += f"## {key}\n\n{value}\n\n"
1284
+ return md
1285
+ else:
1286
+ return str(content)
1287
+
1288
+ # Versioning methods
1289
+ def _get_next_version(self, file_path: str) -> int:
1290
+ """Get next version number for file"""
1291
+ version_file = f"{file_path}.versions.json"
1292
+ versions = self._load_version_history(version_file)
1293
+ return len(versions) + 1
1294
+
1295
+ def _load_version_history(self, version_file: str) -> List[Dict]:
1296
+ """Load version history from file"""
1297
+ try:
1298
+ if os.path.exists(version_file):
1299
+ with open(version_file, "r") as f:
1300
+ return json.load(f)
1301
+ except Exception:
1302
+ pass
1303
+ return []
1304
+
1305
+ def _save_version_history(self, version_file: str, versions: List[Dict]):
1306
+ """Save version history to file"""
1307
+ try:
1308
+ with open(version_file, "w") as f:
1309
+ json.dump(versions, f, indent=2)
1310
+ except Exception as e:
1311
+ self.logger.warning(f"Failed to save version history: {e}")
1312
+
1313
+ # Backup and rollback methods
1314
+ def _backup_cloud_file(self, source_path: str, backup_path: str) -> str:
1315
+ """Backup cloud file"""
1316
+ # Simplified implementation
1317
+ return backup_path
1318
+
1319
+ def _rollback_from_backup(self, target_path: str, backup_info: Dict):
1320
+ """Rollback file from backup"""
1321
+ try:
1322
+ if backup_info and os.path.exists(backup_info["backup_path"]):
1323
+ shutil.copy2(backup_info["backup_path"], target_path)
1324
+ self.logger.info(f"Rolled back {target_path} from backup")
1325
+ except Exception as e:
1326
+ self.logger.error(f"Rollback failed: {e}")
1327
+
1328
+ def _rollback_batch_operations(self, completed_operations: List[Dict], backup_operations: List[Dict]):
1329
+ """Rollback batch operations"""
1330
+ for op in reversed(completed_operations):
1331
+ try:
1332
+ result = op.get("result", {})
1333
+ backup_info = result.get("backup_info")
1334
+ if backup_info:
1335
+ self._rollback_from_backup(result["write_result"]["path"], backup_info)
1336
+ except Exception as e:
1337
+ self.logger.error(f"Batch rollback failed for operation: {e}")
1338
+
1339
+ def _log_write_operation(
1340
+ self,
1341
+ operation_id: str,
1342
+ target_path: str,
1343
+ mode: WriteMode,
1344
+ write_result: Dict,
1345
+ backup_info: Optional[Dict],
1346
+ ) -> Dict:
1347
+ """Log write operation for audit"""
1348
+ audit_info = {
1349
+ "operation_id": operation_id,
1350
+ "timestamp": datetime.now().isoformat(),
1351
+ "target_path": target_path,
1352
+ "mode": mode,
1353
+ "success": True,
1354
+ "file_size": write_result.get("size", 0),
1355
+ "checksum": write_result.get("checksum"),
1356
+ "backup_created": backup_info is not None,
1357
+ }
1358
+
1359
+ # Log to audit file
1360
+ try:
1361
+ audit_file = os.path.join(self.config.temp_dir, "write_audit.log")
1362
+ with open(audit_file, "a") as f:
1363
+ f.write(json.dumps(audit_info) + "\n")
1364
+ except Exception as e:
1365
+ self.logger.warning(f"Audit logging failed: {e}")
1366
+
1367
+ return audit_info
1368
+
1369
+ def edit_document(
1370
+ self,
1371
+ target_path: str,
1372
+ operation: EditOperation,
1373
+ content: Optional[str] = None,
1374
+ position: Optional[Dict[str, Any]] = None,
1375
+ selection: Optional[Dict[str, Any]] = None,
1376
+ format_options: Optional[Dict[str, Any]] = None,
1377
+ ) -> Dict[str, Any]:
1378
+ """
1379
+ Perform advanced editing operations on documents
1380
+
1381
+ Args:
1382
+ target_path: Target file path
1383
+ operation: Edit operation to perform
1384
+ content: Content for the operation (if applicable)
1385
+ position: Position info (line, column, offset)
1386
+ selection: Text selection range
1387
+ format_options: Additional format options
1388
+
1389
+ Returns:
1390
+ Dict containing edit results
1391
+ """
1392
+ try:
1393
+ start_time = datetime.now()
1394
+ operation_id = str(uuid.uuid4())
1395
+
1396
+ self.logger.info(f"Starting edit operation {operation_id}: {operation} on {target_path}")
1397
+
1398
+ # Read current document content
1399
+ current_content = self._read_document_content(target_path)
1400
+
1401
+ # Perform the specific edit operation
1402
+ if operation == EditOperation.INSERT_TEXT:
1403
+ if content is None:
1404
+ raise ValueError("content is required for INSERT_TEXT operation")
1405
+ edited_content = self._insert_text(current_content, content, position)
1406
+ elif operation == EditOperation.DELETE_TEXT:
1407
+ edited_content = self._delete_text(current_content, selection)
1408
+ elif operation == EditOperation.REPLACE_TEXT:
1409
+ if content is None:
1410
+ raise ValueError("content is required for REPLACE_TEXT operation")
1411
+ edited_content = self._replace_text(current_content, selection, content)
1412
+ elif operation == EditOperation.BOLD:
1413
+ edited_content = self._format_text_bold(current_content, selection, format_options)
1414
+ elif operation == EditOperation.ITALIC:
1415
+ edited_content = self._format_text_italic(current_content, selection, format_options)
1416
+ elif operation == EditOperation.UNDERLINE:
1417
+ edited_content = self._format_text_underline(current_content, selection, format_options)
1418
+ elif operation == EditOperation.STRIKETHROUGH:
1419
+ edited_content = self._format_text_strikethrough(current_content, selection, format_options)
1420
+ elif operation == EditOperation.HIGHLIGHT:
1421
+ edited_content = self._format_text_highlight(current_content, selection, format_options)
1422
+ elif operation == EditOperation.INSERT_LINE:
1423
+ if content is None:
1424
+ raise ValueError("content is required for INSERT_LINE operation")
1425
+ edited_content = self._insert_line(current_content, position, content)
1426
+ elif operation == EditOperation.DELETE_LINE:
1427
+ edited_content = self._delete_line(current_content, position)
1428
+ elif operation == EditOperation.MOVE_LINE:
1429
+ edited_content = self._move_line(current_content, position, format_options)
1430
+ elif operation == EditOperation.COPY_TEXT:
1431
+ return self._copy_text(current_content, selection)
1432
+ elif operation == EditOperation.CUT_TEXT:
1433
+ edited_content, cut_content = self._cut_text(current_content, selection)
1434
+ # Store cut content in clipboard
1435
+ self._store_clipboard_content(cut_content)
1436
+ elif operation == EditOperation.PASTE_TEXT:
1437
+ clipboard_content = self._get_clipboard_content()
1438
+ edited_content = self._paste_text(current_content, position, clipboard_content)
1439
+ else:
1440
+ raise ValueError(f"Unsupported edit operation: {operation}")
1441
+
1442
+ # Write the edited content back to file
1443
+ file_format_str = self._detect_file_format(target_path)
1444
+ file_format = DocumentFormat(file_format_str) if file_format_str in [f.value for f in DocumentFormat] else DocumentFormat.TXT
1445
+ write_result = self.write_document(
1446
+ target_path=target_path,
1447
+ content=edited_content,
1448
+ format=file_format,
1449
+ mode=WriteMode.BACKUP_WRITE, # Always backup before editing
1450
+ backup_comment=f"Edit operation: {operation}",
1451
+ )
1452
+
1453
+ result = {
1454
+ "operation_id": operation_id,
1455
+ "target_path": target_path,
1456
+ "operation": operation,
1457
+ "edit_metadata": {
1458
+ "original_size": len(current_content),
1459
+ "edited_size": (len(edited_content) if isinstance(edited_content, str) else 0),
1460
+ "position": position,
1461
+ "selection": selection,
1462
+ },
1463
+ "write_result": write_result,
1464
+ "processing_metadata": {
1465
+ "start_time": start_time.isoformat(),
1466
+ "end_time": datetime.now().isoformat(),
1467
+ "duration": (datetime.now() - start_time).total_seconds(),
1468
+ },
1469
+ }
1470
+
1471
+ self.logger.info(f"Edit operation {operation_id} completed successfully")
1472
+ return result
1473
+
1474
+ except Exception as e:
1475
+ raise DocumentWriterError(f"Edit operation failed: {str(e)}")
1476
+
1477
+ def format_text(
1478
+ self,
1479
+ target_path: str,
1480
+ text_to_format: str,
1481
+ format_type: EditOperation,
1482
+ format_options: Optional[Dict[str, Any]] = None,
1483
+ ) -> Dict[str, Any]:
1484
+ """
1485
+ Apply formatting to specific text in a document
1486
+
1487
+ Args:
1488
+ target_path: Target file path
1489
+ text_to_format: Text to apply formatting to
1490
+ format_type: Type of formatting (bold, italic, etc.)
1491
+ format_options: Additional format options
1492
+
1493
+ Returns:
1494
+ Dict containing formatting results
1495
+ """
1496
+ try:
1497
+ current_content = self._read_document_content(target_path)
1498
+
1499
+ # Find all occurrences of the text
1500
+ formatted_content = self._apply_text_formatting(current_content, text_to_format, format_type, format_options)
1501
+
1502
+ # Write back to file
1503
+ file_format_str = self._detect_file_format(target_path)
1504
+ file_format = DocumentFormat(file_format_str) if file_format_str in [f.value for f in DocumentFormat] else DocumentFormat.TXT
1505
+ write_result = self.write_document(
1506
+ target_path=target_path,
1507
+ content=formatted_content,
1508
+ format=file_format,
1509
+ mode=WriteMode.BACKUP_WRITE,
1510
+ )
1511
+
1512
+ return {
1513
+ "target_path": target_path,
1514
+ "text_formatted": text_to_format,
1515
+ "format_type": format_type,
1516
+ "write_result": write_result,
1517
+ }
1518
+
1519
+ except Exception as e:
1520
+ raise DocumentWriterError(f"Text formatting failed: {str(e)}")
1521
+
1522
+ def find_replace(
1523
+ self,
1524
+ target_path: str,
1525
+ find_text: str,
1526
+ replace_text: str,
1527
+ replace_all: bool = False,
1528
+ case_sensitive: bool = True,
1529
+ regex_mode: bool = False,
1530
+ ) -> Dict[str, Any]:
1531
+ """
1532
+ Find and replace text in a document
1533
+
1534
+ Args:
1535
+ target_path: Target file path
1536
+ find_text: Text to find
1537
+ replace_text: Text to replace with
1538
+ replace_all: Replace all occurrences
1539
+ case_sensitive: Case sensitive search
1540
+ regex_mode: Use regex for find/replace
1541
+
1542
+ Returns:
1543
+ Dict containing find/replace results
1544
+ """
1545
+ try:
1546
+ current_content = self._read_document_content(target_path)
1547
+
1548
+ # Perform find and replace
1549
+ new_content, replacements = self._perform_find_replace(
1550
+ current_content,
1551
+ find_text,
1552
+ replace_text,
1553
+ replace_all,
1554
+ case_sensitive,
1555
+ regex_mode,
1556
+ )
1557
+
1558
+ if replacements > 0:
1559
+ # Write back to file
1560
+ file_format_str = self._detect_file_format(target_path)
1561
+ file_format = DocumentFormat(file_format_str) if file_format_str in [f.value for f in DocumentFormat] else DocumentFormat.TXT
1562
+ write_result = self.write_document(
1563
+ target_path=target_path,
1564
+ content=new_content,
1565
+ format=file_format,
1566
+ mode=WriteMode.BACKUP_WRITE,
1567
+ backup_comment=f"Find/Replace: '{find_text}' -> '{replace_text}'",
1568
+ )
1569
+
1570
+ return {
1571
+ "target_path": target_path,
1572
+ "find_text": find_text,
1573
+ "replace_text": replace_text,
1574
+ "replacements_made": replacements,
1575
+ "write_result": write_result,
1576
+ }
1577
+ else:
1578
+ return {
1579
+ "target_path": target_path,
1580
+ "find_text": find_text,
1581
+ "replace_text": replace_text,
1582
+ "replacements_made": 0,
1583
+ "message": "No matches found",
1584
+ }
1585
+
1586
+ except Exception as e:
1587
+ raise DocumentWriterError(f"Find/replace operation failed: {str(e)}")
1588
+
1589
+ # Helper methods for editing operations
1590
+ def _read_document_content(self, file_path: str) -> str:
1591
+ """Read document content for editing"""
1592
+ try:
1593
+ with open(file_path, "r", encoding="utf-8") as f:
1594
+ return f.read()
1595
+ except UnicodeDecodeError:
1596
+ # Try with different encodings
1597
+ for encoding in ["gbk", "latin1", "cp1252"]:
1598
+ try:
1599
+ with open(file_path, "r", encoding=encoding) as f:
1600
+ return f.read()
1601
+ except Exception:
1602
+ continue
1603
+ raise DocumentWriterError(f"Cannot decode file: {file_path}")
1604
+ except Exception as e:
1605
+ raise DocumentWriterError(f"Cannot read file {file_path}: {str(e)}")
1606
+
1607
+ def _detect_file_format(self, file_path: str) -> str:
1608
+ """Detect file format from extension"""
1609
+ ext = os.path.splitext(file_path)[1].lower()
1610
+ format_map = {
1611
+ ".txt": "txt",
1612
+ ".json": "json",
1613
+ ".csv": "csv",
1614
+ ".xml": "xml",
1615
+ ".html": "html",
1616
+ ".htm": "html",
1617
+ ".md": "markdown",
1618
+ ".markdown": "markdown",
1619
+ ".yaml": "yaml",
1620
+ ".yml": "yaml",
1621
+ }
1622
+ return format_map.get(ext, "txt")
1623
+
1624
+ def _insert_text(self, content: str, text: str, position: Optional[Dict[str, Any]]) -> str:
1625
+ """Insert text at specified position"""
1626
+ if not position:
1627
+ return content + text
1628
+
1629
+ if "offset" in position:
1630
+ offset = position["offset"]
1631
+ return content[:offset] + text + content[offset:]
1632
+ elif "line" in position:
1633
+ lines = content.split("\n")
1634
+ line_num = position.get("line", 0)
1635
+ column = position.get("column", 0)
1636
+
1637
+ if line_num < len(lines):
1638
+ line = lines[line_num]
1639
+ lines[line_num] = line[:column] + text + line[column:]
1640
+ else:
1641
+ lines.append(text)
1642
+ return "\n".join(lines)
1643
+ else:
1644
+ return content + text
1645
+
1646
+ def _delete_text(self, content: str, selection: Optional[Dict[str, Any]]) -> str:
1647
+ """Delete text in specified selection"""
1648
+ if not selection:
1649
+ return content
1650
+
1651
+ if "start_offset" in selection and "end_offset" in selection:
1652
+ start = selection["start_offset"]
1653
+ end = selection["end_offset"]
1654
+ return content[:start] + content[end:]
1655
+ elif "start_line" in selection and "end_line" in selection:
1656
+ lines = content.split("\n")
1657
+ start_line = selection["start_line"]
1658
+ end_line = selection["end_line"]
1659
+ start_col = selection.get("start_column", 0)
1660
+ end_col = selection.get(
1661
+ "end_column",
1662
+ len(lines[end_line]) if end_line < len(lines) else 0,
1663
+ )
1664
+
1665
+ if start_line == end_line:
1666
+ # Same line deletion
1667
+ line = lines[start_line]
1668
+ lines[start_line] = line[:start_col] + line[end_col:]
1669
+ else:
1670
+ # Multi-line deletion
1671
+ lines[start_line] = lines[start_line][:start_col]
1672
+ if end_line < len(lines):
1673
+ lines[start_line] += lines[end_line][end_col:]
1674
+ del lines[start_line + 1 : end_line + 1]
1675
+
1676
+ return "\n".join(lines)
1677
+
1678
+ return content
1679
+
1680
+ def _replace_text(
1681
+ self,
1682
+ content: str,
1683
+ selection: Optional[Dict[str, Any]],
1684
+ replacement: str,
1685
+ ) -> str:
1686
+ """Replace text in specified selection"""
1687
+ if not selection:
1688
+ return content
1689
+
1690
+ # First delete the selected text, then insert replacement
1691
+ content_after_delete = self._delete_text(content, selection)
1692
+
1693
+ # Calculate new insertion position after deletion
1694
+ if "start_offset" in selection:
1695
+ insert_pos = {"offset": selection["start_offset"]}
1696
+ elif "start_line" in selection:
1697
+ insert_pos = {
1698
+ "line": selection["start_line"],
1699
+ "column": selection.get("start_column", 0),
1700
+ }
1701
+ else:
1702
+ insert_pos = None
1703
+
1704
+ return self._insert_text(content_after_delete, replacement, insert_pos)
1705
+
1706
+ def _format_text_bold(
1707
+ self,
1708
+ content: str,
1709
+ selection: Optional[Dict[str, Any]],
1710
+ options: Optional[Dict[str, Any]],
1711
+ ) -> str:
1712
+ """Apply bold formatting to selected text"""
1713
+ if not selection:
1714
+ return content
1715
+
1716
+ format_type = options.get("format_type", "markdown") if options else "markdown"
1717
+
1718
+ if format_type == "markdown":
1719
+ return self._apply_markdown_formatting(content, selection, "**", "**")
1720
+ elif format_type == "html":
1721
+ return self._apply_html_formatting(content, selection, "<strong>", "</strong>")
1722
+ else:
1723
+ return content
1724
+
1725
+ def _format_text_italic(
1726
+ self,
1727
+ content: str,
1728
+ selection: Optional[Dict[str, Any]],
1729
+ options: Optional[Dict[str, Any]],
1730
+ ) -> str:
1731
+ """Apply italic formatting to selected text"""
1732
+ if not selection:
1733
+ return content
1734
+
1735
+ format_type = options.get("format_type", "markdown") if options else "markdown"
1736
+
1737
+ if format_type == "markdown":
1738
+ return self._apply_markdown_formatting(content, selection, "*", "*")
1739
+ elif format_type == "html":
1740
+ return self._apply_html_formatting(content, selection, "<em>", "</em>")
1741
+ else:
1742
+ return content
1743
+
1744
+ def _format_text_underline(
1745
+ self,
1746
+ content: str,
1747
+ selection: Optional[Dict[str, Any]],
1748
+ options: Optional[Dict[str, Any]],
1749
+ ) -> str:
1750
+ """Apply underline formatting to selected text"""
1751
+ if not selection:
1752
+ return content
1753
+
1754
+ format_type = options.get("format_type", "html") if options else "html"
1755
+
1756
+ if format_type == "html":
1757
+ return self._apply_html_formatting(content, selection, "<u>", "</u>")
1758
+ else:
1759
+ return content
1760
+
1761
+ def _format_text_strikethrough(
1762
+ self,
1763
+ content: str,
1764
+ selection: Optional[Dict[str, Any]],
1765
+ options: Optional[Dict[str, Any]],
1766
+ ) -> str:
1767
+ """Apply strikethrough formatting to selected text"""
1768
+ if not selection:
1769
+ return content
1770
+
1771
+ format_type = options.get("format_type", "markdown") if options else "markdown"
1772
+
1773
+ if format_type == "markdown":
1774
+ return self._apply_markdown_formatting(content, selection, "~~", "~~")
1775
+ elif format_type == "html":
1776
+ return self._apply_html_formatting(content, selection, "<del>", "</del>")
1777
+ else:
1778
+ return content
1779
+
1780
+ def _format_text_highlight(
1781
+ self,
1782
+ content: str,
1783
+ selection: Optional[Dict[str, Any]],
1784
+ options: Optional[Dict[str, Any]],
1785
+ ) -> str:
1786
+ """Apply highlight formatting to selected text"""
1787
+ if not selection:
1788
+ return content
1789
+
1790
+ format_type = options.get("format_type", "html") if options else "html"
1791
+ color = options.get("color", "yellow") if options else "yellow"
1792
+
1793
+ if format_type == "html":
1794
+ return self._apply_html_formatting(
1795
+ content,
1796
+ selection,
1797
+ f'<mark style="background-color: {color}">',
1798
+ "</mark>",
1799
+ )
1800
+ elif format_type == "markdown":
1801
+ return self._apply_markdown_formatting(content, selection, "==", "==")
1802
+ else:
1803
+ return content
1804
+
1805
+ def _apply_markdown_formatting(
1806
+ self,
1807
+ content: str,
1808
+ selection: Dict[str, Any],
1809
+ start_marker: str,
1810
+ end_marker: str,
1811
+ ) -> str:
1812
+ """Apply markdown formatting to selected text"""
1813
+ selected_text = self._extract_selected_text(content, selection)
1814
+ formatted_text = start_marker + selected_text + end_marker
1815
+ return self._replace_text(content, selection, formatted_text)
1816
+
1817
+ def _apply_html_formatting(
1818
+ self,
1819
+ content: str,
1820
+ selection: Dict[str, Any],
1821
+ start_tag: str,
1822
+ end_tag: str,
1823
+ ) -> str:
1824
+ """Apply HTML formatting to selected text"""
1825
+ selected_text = self._extract_selected_text(content, selection)
1826
+ formatted_text = start_tag + selected_text + end_tag
1827
+ return self._replace_text(content, selection, formatted_text)
1828
+
1829
+ def _extract_selected_text(self, content: str, selection: Dict[str, Any]) -> str:
1830
+ """Extract text from selection"""
1831
+ if "start_offset" in selection and "end_offset" in selection:
1832
+ return content[selection["start_offset"] : selection["end_offset"]]
1833
+ elif "start_line" in selection and "end_line" in selection:
1834
+ lines = content.split("\n")
1835
+ start_line = selection["start_line"]
1836
+ end_line = selection["end_line"]
1837
+ start_col = selection.get("start_column", 0)
1838
+ end_col = selection.get(
1839
+ "end_column",
1840
+ len(lines[end_line]) if end_line < len(lines) else 0,
1841
+ )
1842
+
1843
+ if start_line == end_line:
1844
+ return lines[start_line][start_col:end_col]
1845
+ else:
1846
+ result = [lines[start_line][start_col:]]
1847
+ result.extend(lines[start_line + 1 : end_line])
1848
+ if end_line < len(lines):
1849
+ result.append(lines[end_line][:end_col])
1850
+ return "\n".join(result)
1851
+ return ""
1852
+
1853
+ def _insert_line(
1854
+ self,
1855
+ content: str,
1856
+ position: Optional[Dict[str, Any]],
1857
+ line_content: str,
1858
+ ) -> str:
1859
+ """Insert a new line at specified position"""
1860
+ lines = content.split("\n")
1861
+ line_num = position.get("line", len(lines)) if position else len(lines)
1862
+
1863
+ lines.insert(line_num, line_content)
1864
+ return "\n".join(lines)
1865
+
1866
+ def _delete_line(self, content: str, position: Optional[Dict[str, Any]]) -> str:
1867
+ """Delete line at specified position"""
1868
+ lines = content.split("\n")
1869
+ line_num = position.get("line", 0) if position else 0
1870
+
1871
+ if 0 <= line_num < len(lines):
1872
+ del lines[line_num]
1873
+
1874
+ return "\n".join(lines)
1875
+
1876
+ def _move_line(
1877
+ self,
1878
+ content: str,
1879
+ position: Optional[Dict[str, Any]],
1880
+ options: Optional[Dict[str, Any]],
1881
+ ) -> str:
1882
+ """Move line to different position"""
1883
+ lines = content.split("\n")
1884
+ from_line = position.get("line", 0) if position else 0
1885
+ to_line = options.get("to_line", 0) if options else 0
1886
+
1887
+ if 0 <= from_line < len(lines) and 0 <= to_line < len(lines):
1888
+ line_content = lines.pop(from_line)
1889
+ lines.insert(to_line, line_content)
1890
+
1891
+ return "\n".join(lines)
1892
+
1893
+ def _copy_text(self, content: str, selection: Optional[Dict[str, Any]]) -> Dict[str, Any]:
1894
+ """Copy selected text to clipboard"""
1895
+ selected_text = self._extract_selected_text(content, selection) if selection else content
1896
+ self._store_clipboard_content(selected_text)
1897
+
1898
+ return {
1899
+ "operation": "copy",
1900
+ "copied_text": selected_text,
1901
+ "copied_length": len(selected_text),
1902
+ }
1903
+
1904
+ def _cut_text(self, content: str, selection: Optional[Dict[str, Any]]) -> Tuple[str, str]:
1905
+ """Cut selected text (copy and delete)"""
1906
+ selected_text = self._extract_selected_text(content, selection) if selection else content
1907
+ new_content = self._delete_text(content, selection) if selection else ""
1908
+
1909
+ return new_content, selected_text
1910
+
1911
+ def _paste_text(
1912
+ self,
1913
+ content: str,
1914
+ position: Optional[Dict[str, Any]],
1915
+ clipboard_content: str,
1916
+ ) -> str:
1917
+ """Paste text from clipboard"""
1918
+ return self._insert_text(content, clipboard_content, position)
1919
+
1920
+ def _store_clipboard_content(self, content: str):
1921
+ """Store content in clipboard (simplified implementation)"""
1922
+ clipboard_file = os.path.join(self.config.temp_dir, "clipboard.txt")
1923
+ try:
1924
+ with open(clipboard_file, "w", encoding="utf-8") as f:
1925
+ f.write(content)
1926
+ except Exception as e:
1927
+ self.logger.warning(f"Failed to store clipboard content: {e}")
1928
+
1929
+ def _get_clipboard_content(self) -> str:
1930
+ """Get content from clipboard"""
1931
+ clipboard_file = os.path.join(self.config.temp_dir, "clipboard.txt")
1932
+ try:
1933
+ with open(clipboard_file, "r", encoding="utf-8") as f:
1934
+ return f.read()
1935
+ except Exception:
1936
+ return ""
1937
+
1938
+ def _apply_text_formatting(
1939
+ self,
1940
+ content: str,
1941
+ text_to_format: str,
1942
+ format_type: EditOperation,
1943
+ options: Optional[Dict[str, Any]],
1944
+ ) -> str:
1945
+ """Apply formatting to all occurrences of specific text"""
1946
+ if format_type == EditOperation.BOLD:
1947
+ replacement = f"**{text_to_format}**"
1948
+ elif format_type == EditOperation.ITALIC:
1949
+ replacement = f"*{text_to_format}*"
1950
+ elif format_type == EditOperation.UNDERLINE:
1951
+ replacement = f"<u>{text_to_format}</u>"
1952
+ elif format_type == EditOperation.STRIKETHROUGH:
1953
+ replacement = f"~~{text_to_format}~~"
1954
+ elif format_type == EditOperation.HIGHLIGHT:
1955
+ color = options.get("color", "yellow") if options else "yellow"
1956
+ replacement = f'<mark style="background-color: {color}">{text_to_format}</mark>'
1957
+ else:
1958
+ replacement = text_to_format
1959
+
1960
+ return content.replace(text_to_format, replacement)
1961
+
1962
+ def _perform_find_replace(
1963
+ self,
1964
+ content: str,
1965
+ find_text: str,
1966
+ replace_text: str,
1967
+ replace_all: bool,
1968
+ case_sensitive: bool,
1969
+ regex_mode: bool,
1970
+ ) -> Tuple[str, int]:
1971
+ """Perform find and replace operation"""
1972
+ import re
1973
+
1974
+ replacements = 0
1975
+
1976
+ if regex_mode:
1977
+ flags = 0 if case_sensitive else re.IGNORECASE
1978
+ if replace_all:
1979
+ new_content, replacements = re.subn(find_text, replace_text, content, flags=flags)
1980
+ else:
1981
+ new_content = re.sub(find_text, replace_text, content, count=1, flags=flags)
1982
+ replacements = 1 if new_content != content else 0
1983
+ else:
1984
+ if case_sensitive:
1985
+ if replace_all:
1986
+ replacements = content.count(find_text)
1987
+ new_content = content.replace(find_text, replace_text)
1988
+ else:
1989
+ new_content = content.replace(find_text, replace_text, 1)
1990
+ replacements = 1 if new_content != content else 0
1991
+ else:
1992
+ # Case insensitive replacement
1993
+ import re
1994
+
1995
+ pattern = re.escape(find_text)
1996
+ if replace_all:
1997
+ new_content, replacements = re.subn(pattern, replace_text, content, flags=re.IGNORECASE)
1998
+ else:
1999
+ new_content = re.sub(
2000
+ pattern,
2001
+ replace_text,
2002
+ content,
2003
+ count=1,
2004
+ flags=re.IGNORECASE,
2005
+ )
2006
+ replacements = 1 if new_content != content else 0
2007
+
2008
+ return new_content, replacements