aiecs 1.0.1__py3-none-any.whl → 1.7.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (340) hide show
  1. aiecs/__init__.py +13 -16
  2. aiecs/__main__.py +7 -7
  3. aiecs/aiecs_client.py +269 -75
  4. aiecs/application/executors/operation_executor.py +79 -54
  5. aiecs/application/knowledge_graph/__init__.py +7 -0
  6. aiecs/application/knowledge_graph/builder/__init__.py +37 -0
  7. aiecs/application/knowledge_graph/builder/data_quality.py +302 -0
  8. aiecs/application/knowledge_graph/builder/data_reshaping.py +293 -0
  9. aiecs/application/knowledge_graph/builder/document_builder.py +369 -0
  10. aiecs/application/knowledge_graph/builder/graph_builder.py +490 -0
  11. aiecs/application/knowledge_graph/builder/import_optimizer.py +396 -0
  12. aiecs/application/knowledge_graph/builder/schema_inference.py +462 -0
  13. aiecs/application/knowledge_graph/builder/schema_mapping.py +563 -0
  14. aiecs/application/knowledge_graph/builder/structured_pipeline.py +1384 -0
  15. aiecs/application/knowledge_graph/builder/text_chunker.py +317 -0
  16. aiecs/application/knowledge_graph/extractors/__init__.py +27 -0
  17. aiecs/application/knowledge_graph/extractors/base.py +98 -0
  18. aiecs/application/knowledge_graph/extractors/llm_entity_extractor.py +422 -0
  19. aiecs/application/knowledge_graph/extractors/llm_relation_extractor.py +347 -0
  20. aiecs/application/knowledge_graph/extractors/ner_entity_extractor.py +241 -0
  21. aiecs/application/knowledge_graph/fusion/__init__.py +78 -0
  22. aiecs/application/knowledge_graph/fusion/ab_testing.py +395 -0
  23. aiecs/application/knowledge_graph/fusion/abbreviation_expander.py +327 -0
  24. aiecs/application/knowledge_graph/fusion/alias_index.py +597 -0
  25. aiecs/application/knowledge_graph/fusion/alias_matcher.py +384 -0
  26. aiecs/application/knowledge_graph/fusion/cache_coordinator.py +343 -0
  27. aiecs/application/knowledge_graph/fusion/entity_deduplicator.py +433 -0
  28. aiecs/application/knowledge_graph/fusion/entity_linker.py +511 -0
  29. aiecs/application/knowledge_graph/fusion/evaluation_dataset.py +240 -0
  30. aiecs/application/knowledge_graph/fusion/knowledge_fusion.py +632 -0
  31. aiecs/application/knowledge_graph/fusion/matching_config.py +489 -0
  32. aiecs/application/knowledge_graph/fusion/name_normalizer.py +352 -0
  33. aiecs/application/knowledge_graph/fusion/relation_deduplicator.py +183 -0
  34. aiecs/application/knowledge_graph/fusion/semantic_name_matcher.py +464 -0
  35. aiecs/application/knowledge_graph/fusion/similarity_pipeline.py +534 -0
  36. aiecs/application/knowledge_graph/pattern_matching/__init__.py +21 -0
  37. aiecs/application/knowledge_graph/pattern_matching/pattern_matcher.py +342 -0
  38. aiecs/application/knowledge_graph/pattern_matching/query_executor.py +366 -0
  39. aiecs/application/knowledge_graph/profiling/__init__.py +12 -0
  40. aiecs/application/knowledge_graph/profiling/query_plan_visualizer.py +195 -0
  41. aiecs/application/knowledge_graph/profiling/query_profiler.py +223 -0
  42. aiecs/application/knowledge_graph/reasoning/__init__.py +27 -0
  43. aiecs/application/knowledge_graph/reasoning/evidence_synthesis.py +341 -0
  44. aiecs/application/knowledge_graph/reasoning/inference_engine.py +500 -0
  45. aiecs/application/knowledge_graph/reasoning/logic_form_parser.py +163 -0
  46. aiecs/application/knowledge_graph/reasoning/logic_parser/__init__.py +79 -0
  47. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_builder.py +513 -0
  48. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_nodes.py +913 -0
  49. aiecs/application/knowledge_graph/reasoning/logic_parser/ast_validator.py +866 -0
  50. aiecs/application/knowledge_graph/reasoning/logic_parser/error_handler.py +475 -0
  51. aiecs/application/knowledge_graph/reasoning/logic_parser/parser.py +396 -0
  52. aiecs/application/knowledge_graph/reasoning/logic_parser/query_context.py +208 -0
  53. aiecs/application/knowledge_graph/reasoning/logic_query_integration.py +170 -0
  54. aiecs/application/knowledge_graph/reasoning/query_planner.py +855 -0
  55. aiecs/application/knowledge_graph/reasoning/reasoning_engine.py +518 -0
  56. aiecs/application/knowledge_graph/retrieval/__init__.py +27 -0
  57. aiecs/application/knowledge_graph/retrieval/query_intent_classifier.py +211 -0
  58. aiecs/application/knowledge_graph/retrieval/retrieval_strategies.py +592 -0
  59. aiecs/application/knowledge_graph/retrieval/strategy_types.py +23 -0
  60. aiecs/application/knowledge_graph/search/__init__.py +59 -0
  61. aiecs/application/knowledge_graph/search/hybrid_search.py +457 -0
  62. aiecs/application/knowledge_graph/search/reranker.py +293 -0
  63. aiecs/application/knowledge_graph/search/reranker_strategies.py +535 -0
  64. aiecs/application/knowledge_graph/search/text_similarity.py +392 -0
  65. aiecs/application/knowledge_graph/traversal/__init__.py +15 -0
  66. aiecs/application/knowledge_graph/traversal/enhanced_traversal.py +305 -0
  67. aiecs/application/knowledge_graph/traversal/path_scorer.py +271 -0
  68. aiecs/application/knowledge_graph/validators/__init__.py +13 -0
  69. aiecs/application/knowledge_graph/validators/relation_validator.py +239 -0
  70. aiecs/application/knowledge_graph/visualization/__init__.py +11 -0
  71. aiecs/application/knowledge_graph/visualization/graph_visualizer.py +313 -0
  72. aiecs/common/__init__.py +9 -0
  73. aiecs/common/knowledge_graph/__init__.py +17 -0
  74. aiecs/common/knowledge_graph/runnable.py +471 -0
  75. aiecs/config/__init__.py +20 -5
  76. aiecs/config/config.py +762 -31
  77. aiecs/config/graph_config.py +131 -0
  78. aiecs/config/tool_config.py +399 -0
  79. aiecs/core/__init__.py +29 -13
  80. aiecs/core/interface/__init__.py +2 -2
  81. aiecs/core/interface/execution_interface.py +22 -22
  82. aiecs/core/interface/storage_interface.py +37 -88
  83. aiecs/core/registry/__init__.py +31 -0
  84. aiecs/core/registry/service_registry.py +92 -0
  85. aiecs/domain/__init__.py +270 -1
  86. aiecs/domain/agent/__init__.py +191 -0
  87. aiecs/domain/agent/base_agent.py +3870 -0
  88. aiecs/domain/agent/exceptions.py +99 -0
  89. aiecs/domain/agent/graph_aware_mixin.py +569 -0
  90. aiecs/domain/agent/hybrid_agent.py +1435 -0
  91. aiecs/domain/agent/integration/__init__.py +29 -0
  92. aiecs/domain/agent/integration/context_compressor.py +216 -0
  93. aiecs/domain/agent/integration/context_engine_adapter.py +587 -0
  94. aiecs/domain/agent/integration/protocols.py +281 -0
  95. aiecs/domain/agent/integration/retry_policy.py +218 -0
  96. aiecs/domain/agent/integration/role_config.py +213 -0
  97. aiecs/domain/agent/knowledge_aware_agent.py +1892 -0
  98. aiecs/domain/agent/lifecycle.py +291 -0
  99. aiecs/domain/agent/llm_agent.py +692 -0
  100. aiecs/domain/agent/memory/__init__.py +12 -0
  101. aiecs/domain/agent/memory/conversation.py +1124 -0
  102. aiecs/domain/agent/migration/__init__.py +14 -0
  103. aiecs/domain/agent/migration/conversion.py +163 -0
  104. aiecs/domain/agent/migration/legacy_wrapper.py +86 -0
  105. aiecs/domain/agent/models.py +884 -0
  106. aiecs/domain/agent/observability.py +479 -0
  107. aiecs/domain/agent/persistence.py +449 -0
  108. aiecs/domain/agent/prompts/__init__.py +29 -0
  109. aiecs/domain/agent/prompts/builder.py +159 -0
  110. aiecs/domain/agent/prompts/formatters.py +187 -0
  111. aiecs/domain/agent/prompts/template.py +255 -0
  112. aiecs/domain/agent/registry.py +253 -0
  113. aiecs/domain/agent/tool_agent.py +444 -0
  114. aiecs/domain/agent/tools/__init__.py +15 -0
  115. aiecs/domain/agent/tools/schema_generator.py +364 -0
  116. aiecs/domain/community/__init__.py +155 -0
  117. aiecs/domain/community/agent_adapter.py +469 -0
  118. aiecs/domain/community/analytics.py +432 -0
  119. aiecs/domain/community/collaborative_workflow.py +648 -0
  120. aiecs/domain/community/communication_hub.py +634 -0
  121. aiecs/domain/community/community_builder.py +320 -0
  122. aiecs/domain/community/community_integration.py +796 -0
  123. aiecs/domain/community/community_manager.py +803 -0
  124. aiecs/domain/community/decision_engine.py +849 -0
  125. aiecs/domain/community/exceptions.py +231 -0
  126. aiecs/domain/community/models/__init__.py +33 -0
  127. aiecs/domain/community/models/community_models.py +234 -0
  128. aiecs/domain/community/resource_manager.py +461 -0
  129. aiecs/domain/community/shared_context_manager.py +589 -0
  130. aiecs/domain/context/__init__.py +40 -10
  131. aiecs/domain/context/context_engine.py +1910 -0
  132. aiecs/domain/context/conversation_models.py +87 -53
  133. aiecs/domain/context/graph_memory.py +582 -0
  134. aiecs/domain/execution/model.py +12 -4
  135. aiecs/domain/knowledge_graph/__init__.py +19 -0
  136. aiecs/domain/knowledge_graph/models/__init__.py +52 -0
  137. aiecs/domain/knowledge_graph/models/entity.py +148 -0
  138. aiecs/domain/knowledge_graph/models/evidence.py +178 -0
  139. aiecs/domain/knowledge_graph/models/inference_rule.py +184 -0
  140. aiecs/domain/knowledge_graph/models/path.py +171 -0
  141. aiecs/domain/knowledge_graph/models/path_pattern.py +171 -0
  142. aiecs/domain/knowledge_graph/models/query.py +261 -0
  143. aiecs/domain/knowledge_graph/models/query_plan.py +181 -0
  144. aiecs/domain/knowledge_graph/models/relation.py +202 -0
  145. aiecs/domain/knowledge_graph/schema/__init__.py +23 -0
  146. aiecs/domain/knowledge_graph/schema/entity_type.py +131 -0
  147. aiecs/domain/knowledge_graph/schema/graph_schema.py +253 -0
  148. aiecs/domain/knowledge_graph/schema/property_schema.py +143 -0
  149. aiecs/domain/knowledge_graph/schema/relation_type.py +163 -0
  150. aiecs/domain/knowledge_graph/schema/schema_manager.py +691 -0
  151. aiecs/domain/knowledge_graph/schema/type_enums.py +209 -0
  152. aiecs/domain/task/dsl_processor.py +172 -56
  153. aiecs/domain/task/model.py +20 -8
  154. aiecs/domain/task/task_context.py +27 -24
  155. aiecs/infrastructure/__init__.py +0 -2
  156. aiecs/infrastructure/graph_storage/__init__.py +11 -0
  157. aiecs/infrastructure/graph_storage/base.py +837 -0
  158. aiecs/infrastructure/graph_storage/batch_operations.py +458 -0
  159. aiecs/infrastructure/graph_storage/cache.py +424 -0
  160. aiecs/infrastructure/graph_storage/distributed.py +223 -0
  161. aiecs/infrastructure/graph_storage/error_handling.py +380 -0
  162. aiecs/infrastructure/graph_storage/graceful_degradation.py +294 -0
  163. aiecs/infrastructure/graph_storage/health_checks.py +378 -0
  164. aiecs/infrastructure/graph_storage/in_memory.py +1197 -0
  165. aiecs/infrastructure/graph_storage/index_optimization.py +446 -0
  166. aiecs/infrastructure/graph_storage/lazy_loading.py +431 -0
  167. aiecs/infrastructure/graph_storage/metrics.py +344 -0
  168. aiecs/infrastructure/graph_storage/migration.py +400 -0
  169. aiecs/infrastructure/graph_storage/pagination.py +483 -0
  170. aiecs/infrastructure/graph_storage/performance_monitoring.py +456 -0
  171. aiecs/infrastructure/graph_storage/postgres.py +1563 -0
  172. aiecs/infrastructure/graph_storage/property_storage.py +353 -0
  173. aiecs/infrastructure/graph_storage/protocols.py +76 -0
  174. aiecs/infrastructure/graph_storage/query_optimizer.py +642 -0
  175. aiecs/infrastructure/graph_storage/schema_cache.py +290 -0
  176. aiecs/infrastructure/graph_storage/sqlite.py +1373 -0
  177. aiecs/infrastructure/graph_storage/streaming.py +487 -0
  178. aiecs/infrastructure/graph_storage/tenant.py +412 -0
  179. aiecs/infrastructure/messaging/celery_task_manager.py +92 -54
  180. aiecs/infrastructure/messaging/websocket_manager.py +51 -35
  181. aiecs/infrastructure/monitoring/__init__.py +22 -0
  182. aiecs/infrastructure/monitoring/executor_metrics.py +45 -11
  183. aiecs/infrastructure/monitoring/global_metrics_manager.py +212 -0
  184. aiecs/infrastructure/monitoring/structured_logger.py +3 -7
  185. aiecs/infrastructure/monitoring/tracing_manager.py +63 -35
  186. aiecs/infrastructure/persistence/__init__.py +14 -1
  187. aiecs/infrastructure/persistence/context_engine_client.py +184 -0
  188. aiecs/infrastructure/persistence/database_manager.py +67 -43
  189. aiecs/infrastructure/persistence/file_storage.py +180 -103
  190. aiecs/infrastructure/persistence/redis_client.py +74 -21
  191. aiecs/llm/__init__.py +73 -25
  192. aiecs/llm/callbacks/__init__.py +11 -0
  193. aiecs/llm/{custom_callbacks.py → callbacks/custom_callbacks.py} +26 -19
  194. aiecs/llm/client_factory.py +224 -36
  195. aiecs/llm/client_resolver.py +155 -0
  196. aiecs/llm/clients/__init__.py +38 -0
  197. aiecs/llm/clients/base_client.py +324 -0
  198. aiecs/llm/clients/google_function_calling_mixin.py +457 -0
  199. aiecs/llm/clients/googleai_client.py +241 -0
  200. aiecs/llm/clients/openai_client.py +158 -0
  201. aiecs/llm/clients/openai_compatible_mixin.py +367 -0
  202. aiecs/llm/clients/vertex_client.py +897 -0
  203. aiecs/llm/clients/xai_client.py +201 -0
  204. aiecs/llm/config/__init__.py +51 -0
  205. aiecs/llm/config/config_loader.py +272 -0
  206. aiecs/llm/config/config_validator.py +206 -0
  207. aiecs/llm/config/model_config.py +143 -0
  208. aiecs/llm/protocols.py +149 -0
  209. aiecs/llm/utils/__init__.py +10 -0
  210. aiecs/llm/utils/validate_config.py +89 -0
  211. aiecs/main.py +140 -121
  212. aiecs/scripts/aid/VERSION_MANAGEMENT.md +138 -0
  213. aiecs/scripts/aid/__init__.py +19 -0
  214. aiecs/scripts/aid/module_checker.py +499 -0
  215. aiecs/scripts/aid/version_manager.py +235 -0
  216. aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +1 -0
  217. aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +1 -0
  218. aiecs/scripts/dependance_check/__init__.py +15 -0
  219. aiecs/scripts/dependance_check/dependency_checker.py +1835 -0
  220. aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +192 -90
  221. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +203 -71
  222. aiecs/scripts/dependance_patch/__init__.py +7 -0
  223. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +11 -0
  224. aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +21 -14
  225. aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +1 -1
  226. aiecs/scripts/knowledge_graph/__init__.py +3 -0
  227. aiecs/scripts/knowledge_graph/run_threshold_experiments.py +212 -0
  228. aiecs/scripts/migrations/multi_tenancy/README.md +142 -0
  229. aiecs/scripts/tools_develop/README.md +671 -0
  230. aiecs/scripts/tools_develop/README_CONFIG_CHECKER.md +273 -0
  231. aiecs/scripts/tools_develop/TOOLS_CONFIG_GUIDE.md +1287 -0
  232. aiecs/scripts/tools_develop/TOOL_AUTO_DISCOVERY.md +234 -0
  233. aiecs/scripts/tools_develop/__init__.py +21 -0
  234. aiecs/scripts/tools_develop/check_all_tools_config.py +548 -0
  235. aiecs/scripts/tools_develop/check_type_annotations.py +257 -0
  236. aiecs/scripts/tools_develop/pre-commit-schema-coverage.sh +66 -0
  237. aiecs/scripts/tools_develop/schema_coverage.py +511 -0
  238. aiecs/scripts/tools_develop/validate_tool_schemas.py +475 -0
  239. aiecs/scripts/tools_develop/verify_executor_config_fix.py +98 -0
  240. aiecs/scripts/tools_develop/verify_tools.py +352 -0
  241. aiecs/tasks/__init__.py +0 -1
  242. aiecs/tasks/worker.py +115 -47
  243. aiecs/tools/__init__.py +194 -72
  244. aiecs/tools/apisource/__init__.py +99 -0
  245. aiecs/tools/apisource/intelligence/__init__.py +19 -0
  246. aiecs/tools/apisource/intelligence/data_fusion.py +632 -0
  247. aiecs/tools/apisource/intelligence/query_analyzer.py +417 -0
  248. aiecs/tools/apisource/intelligence/search_enhancer.py +385 -0
  249. aiecs/tools/apisource/monitoring/__init__.py +9 -0
  250. aiecs/tools/apisource/monitoring/metrics.py +330 -0
  251. aiecs/tools/apisource/providers/__init__.py +112 -0
  252. aiecs/tools/apisource/providers/base.py +671 -0
  253. aiecs/tools/apisource/providers/census.py +397 -0
  254. aiecs/tools/apisource/providers/fred.py +535 -0
  255. aiecs/tools/apisource/providers/newsapi.py +409 -0
  256. aiecs/tools/apisource/providers/worldbank.py +352 -0
  257. aiecs/tools/apisource/reliability/__init__.py +12 -0
  258. aiecs/tools/apisource/reliability/error_handler.py +363 -0
  259. aiecs/tools/apisource/reliability/fallback_strategy.py +376 -0
  260. aiecs/tools/apisource/tool.py +832 -0
  261. aiecs/tools/apisource/utils/__init__.py +9 -0
  262. aiecs/tools/apisource/utils/validators.py +334 -0
  263. aiecs/tools/base_tool.py +415 -21
  264. aiecs/tools/docs/__init__.py +121 -0
  265. aiecs/tools/docs/ai_document_orchestrator.py +607 -0
  266. aiecs/tools/docs/ai_document_writer_orchestrator.py +2350 -0
  267. aiecs/tools/docs/content_insertion_tool.py +1320 -0
  268. aiecs/tools/docs/document_creator_tool.py +1323 -0
  269. aiecs/tools/docs/document_layout_tool.py +1160 -0
  270. aiecs/tools/docs/document_parser_tool.py +1011 -0
  271. aiecs/tools/docs/document_writer_tool.py +1829 -0
  272. aiecs/tools/knowledge_graph/__init__.py +17 -0
  273. aiecs/tools/knowledge_graph/graph_reasoning_tool.py +807 -0
  274. aiecs/tools/knowledge_graph/graph_search_tool.py +944 -0
  275. aiecs/tools/knowledge_graph/kg_builder_tool.py +524 -0
  276. aiecs/tools/langchain_adapter.py +300 -138
  277. aiecs/tools/schema_generator.py +455 -0
  278. aiecs/tools/search_tool/__init__.py +100 -0
  279. aiecs/tools/search_tool/analyzers.py +581 -0
  280. aiecs/tools/search_tool/cache.py +264 -0
  281. aiecs/tools/search_tool/constants.py +128 -0
  282. aiecs/tools/search_tool/context.py +224 -0
  283. aiecs/tools/search_tool/core.py +778 -0
  284. aiecs/tools/search_tool/deduplicator.py +119 -0
  285. aiecs/tools/search_tool/error_handler.py +242 -0
  286. aiecs/tools/search_tool/metrics.py +343 -0
  287. aiecs/tools/search_tool/rate_limiter.py +172 -0
  288. aiecs/tools/search_tool/schemas.py +275 -0
  289. aiecs/tools/statistics/__init__.py +80 -0
  290. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +646 -0
  291. aiecs/tools/statistics/ai_insight_generator_tool.py +508 -0
  292. aiecs/tools/statistics/ai_report_orchestrator_tool.py +684 -0
  293. aiecs/tools/statistics/data_loader_tool.py +555 -0
  294. aiecs/tools/statistics/data_profiler_tool.py +638 -0
  295. aiecs/tools/statistics/data_transformer_tool.py +580 -0
  296. aiecs/tools/statistics/data_visualizer_tool.py +498 -0
  297. aiecs/tools/statistics/model_trainer_tool.py +507 -0
  298. aiecs/tools/statistics/statistical_analyzer_tool.py +472 -0
  299. aiecs/tools/task_tools/__init__.py +49 -36
  300. aiecs/tools/task_tools/chart_tool.py +200 -184
  301. aiecs/tools/task_tools/classfire_tool.py +268 -267
  302. aiecs/tools/task_tools/image_tool.py +175 -131
  303. aiecs/tools/task_tools/office_tool.py +226 -146
  304. aiecs/tools/task_tools/pandas_tool.py +477 -121
  305. aiecs/tools/task_tools/report_tool.py +390 -142
  306. aiecs/tools/task_tools/research_tool.py +149 -79
  307. aiecs/tools/task_tools/scraper_tool.py +339 -145
  308. aiecs/tools/task_tools/stats_tool.py +448 -209
  309. aiecs/tools/temp_file_manager.py +26 -24
  310. aiecs/tools/tool_executor/__init__.py +18 -16
  311. aiecs/tools/tool_executor/tool_executor.py +364 -52
  312. aiecs/utils/LLM_output_structor.py +74 -48
  313. aiecs/utils/__init__.py +14 -3
  314. aiecs/utils/base_callback.py +0 -3
  315. aiecs/utils/cache_provider.py +696 -0
  316. aiecs/utils/execution_utils.py +50 -31
  317. aiecs/utils/prompt_loader.py +1 -0
  318. aiecs/utils/token_usage_repository.py +37 -11
  319. aiecs/ws/socket_server.py +14 -4
  320. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/METADATA +52 -15
  321. aiecs-1.7.6.dist-info/RECORD +337 -0
  322. aiecs-1.7.6.dist-info/entry_points.txt +13 -0
  323. aiecs/config/registry.py +0 -19
  324. aiecs/domain/context/content_engine.py +0 -982
  325. aiecs/llm/base_client.py +0 -99
  326. aiecs/llm/openai_client.py +0 -125
  327. aiecs/llm/vertex_client.py +0 -186
  328. aiecs/llm/xai_client.py +0 -184
  329. aiecs/scripts/dependency_checker.py +0 -857
  330. aiecs/scripts/quick_dependency_check.py +0 -269
  331. aiecs/tools/task_tools/search_api.py +0 -7
  332. aiecs-1.0.1.dist-info/RECORD +0 -90
  333. aiecs-1.0.1.dist-info/entry_points.txt +0 -7
  334. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  335. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  336. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  337. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  338. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/WHEEL +0 -0
  339. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/licenses/LICENSE +0 -0
  340. {aiecs-1.0.1.dist-info → aiecs-1.7.6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1829 @@
1
+ import os
2
+ import json
3
+ import uuid
4
+ import hashlib
5
+ import logging
6
+ import asyncio
7
+ import shutil
8
+ from typing import Dict, Any, List, Optional, Union, Tuple
9
+ from enum import Enum
10
+ from datetime import datetime
11
+ from pathlib import Path
12
+ import tempfile
13
+
14
+ from pydantic import BaseModel, Field
15
+ from pydantic_settings import BaseSettings, SettingsConfigDict
16
+
17
+ from aiecs.tools.base_tool import BaseTool
18
+ from aiecs.tools import register_tool
19
+
20
+
21
+ class DocumentFormat(str, Enum):
22
+ """Supported document formats for writing"""
23
+
24
+ TXT = "txt"
25
+ PLAIN_TEXT = "txt" # Alias for TXT
26
+ JSON = "json"
27
+ CSV = "csv"
28
+ XML = "xml"
29
+ MARKDOWN = "md"
30
+ HTML = "html"
31
+ YAML = "yaml"
32
+ PDF = "pdf"
33
+ DOCX = "docx"
34
+ XLSX = "xlsx"
35
+ BINARY = "binary"
36
+
37
+
38
+ class WriteMode(str, Enum):
39
+ """Document writing modes"""
40
+
41
+ CREATE = "create" # 创建新文件,如果存在则失败
42
+ OVERWRITE = "overwrite" # 覆盖现有文件
43
+ APPEND = "append" # 追加到现有文件
44
+ UPDATE = "update" # 更新现有文件(智能合并)
45
+ BACKUP_WRITE = "backup_write" # 备份后写入
46
+ VERSION_WRITE = "version_write" # 版本化写入
47
+ INSERT = "insert" # 在指定位置插入内容
48
+ REPLACE = "replace" # 替换指定内容
49
+ DELETE = "delete" # 删除指定内容
50
+
51
+
52
+ class EditOperation(str, Enum):
53
+ """Advanced edit operations"""
54
+
55
+ BOLD = "bold" # 加粗文本
56
+ ITALIC = "italic" # 斜体文本
57
+ UNDERLINE = "underline" # 下划线文本
58
+ STRIKETHROUGH = "strikethrough" # 删除线文本
59
+ HIGHLIGHT = "highlight" # 高亮文本
60
+ INSERT_TEXT = "insert_text" # 插入文本
61
+ DELETE_TEXT = "delete_text" # 删除文本
62
+ REPLACE_TEXT = "replace_text" # 替换文本
63
+ COPY_TEXT = "copy_text" # 复制文本
64
+ CUT_TEXT = "cut_text" # 剪切文本
65
+ PASTE_TEXT = "paste_text" # 粘贴文本
66
+ FIND_REPLACE = "find_replace" # 查找替换
67
+ INSERT_LINE = "insert_line" # 插入行
68
+ DELETE_LINE = "delete_line" # 删除行
69
+ MOVE_LINE = "move_line" # 移动行
70
+
71
+
72
+ class EncodingType(str, Enum):
73
+ """Text encoding types"""
74
+
75
+ UTF8 = "utf-8"
76
+ UTF16 = "utf-16"
77
+ ASCII = "ascii"
78
+ GBK = "gbk"
79
+ AUTO = "auto"
80
+
81
+
82
+ class ValidationLevel(str, Enum):
83
+ """Content validation levels"""
84
+
85
+ NONE = "none" # 无验证
86
+ BASIC = "basic" # 基础验证(格式、大小)
87
+ STRICT = "strict" # 严格验证(内容、结构)
88
+ ENTERPRISE = "enterprise" # 企业级验证(安全、合规)
89
+
90
+
91
+ class DocumentWriterError(Exception):
92
+ """Base exception for document writer errors"""
93
+
94
+
95
+ class WriteError(DocumentWriterError):
96
+ """Raised when write operations fail"""
97
+
98
+
99
+ class ValidationError(DocumentWriterError):
100
+ """Raised when validation fails"""
101
+
102
+
103
+ class SecurityError(DocumentWriterError):
104
+ """Raised when security validation fails"""
105
+
106
+
107
+ class WritePermissionError(DocumentWriterError):
108
+ """Raised when write permission is denied"""
109
+
110
+
111
+ class ContentValidationError(DocumentWriterError):
112
+ """Raised when content validation fails"""
113
+
114
+
115
+ class StorageError(DocumentWriterError):
116
+ """Raised when storage operations fail"""
117
+
118
+
119
+ @register_tool("document_writer")
120
+ class DocumentWriterTool(BaseTool):
121
+ """
122
+ Modern high-performance document writing component that can:
123
+ 1. Handle multiple document formats and encodings
124
+ 2. Provide production-grade write operations with validation
125
+ 3. Support various write modes (create, overwrite, append, update)
126
+ 4. Implement backup and versioning strategies
127
+ 5. Ensure atomic operations and data integrity
128
+ 6. Support both local and cloud storage
129
+
130
+ Production Features:
131
+ - Atomic writes (no partial writes)
132
+ - Content validation and security scanning
133
+ - Automatic backup and versioning
134
+ - Write permission and quota checks
135
+ - Transaction-like operations
136
+ - Audit logging
137
+ """
138
+
139
+ # Configuration schema
140
+ class Config(BaseSettings):
141
+ """Configuration for the document writer tool
142
+
143
+ Automatically reads from environment variables with DOC_WRITER_ prefix.
144
+ Example: DOC_WRITER_GCS_PROJECT_ID -> gcs_project_id
145
+ """
146
+
147
+ model_config = SettingsConfigDict(env_prefix="DOC_WRITER_")
148
+
149
+ temp_dir: str = Field(
150
+ default=os.path.join(tempfile.gettempdir(), "document_writer"),
151
+ description="Temporary directory for document processing",
152
+ )
153
+ backup_dir: str = Field(
154
+ default=os.path.join(tempfile.gettempdir(), "document_backups"),
155
+ description="Directory for document backups",
156
+ )
157
+ output_dir: Optional[str] = Field(default=None, description="Default output directory for documents")
158
+ max_file_size: int = Field(default=100 * 1024 * 1024, description="Maximum file size in bytes")
159
+ max_backup_versions: int = Field(default=10, description="Maximum number of backup versions to keep")
160
+ default_encoding: str = Field(default="utf-8", description="Default text encoding for documents")
161
+ enable_backup: bool = Field(
162
+ default=True,
163
+ description="Whether to enable automatic backup functionality",
164
+ )
165
+ enable_versioning: bool = Field(default=True, description="Whether to enable document versioning")
166
+ enable_content_validation: bool = Field(default=True, description="Whether to enable content validation")
167
+ enable_security_scan: bool = Field(default=True, description="Whether to enable security scanning")
168
+ atomic_write: bool = Field(default=True, description="Whether to use atomic write operations")
169
+ validation_level: str = Field(default="basic", description="Content validation level")
170
+ timeout_seconds: int = Field(default=60, description="Operation timeout in seconds")
171
+ auto_backup: bool = Field(
172
+ default=True,
173
+ description="Whether to automatically backup before write operations",
174
+ )
175
+ atomic_writes: bool = Field(default=True, description="Whether to use atomic write operations")
176
+ default_format: str = Field(default="md", description="Default document format")
177
+ version_control: bool = Field(default=True, description="Whether to enable version control")
178
+ security_scan: bool = Field(default=True, description="Whether to enable security scanning")
179
+ enable_cloud_storage: bool = Field(
180
+ default=True,
181
+ description="Whether to enable cloud storage integration",
182
+ )
183
+ gcs_bucket_name: str = Field(
184
+ default="aiecs-documents",
185
+ description="Google Cloud Storage bucket name",
186
+ )
187
+ gcs_project_id: Optional[str] = Field(default=None, description="Google Cloud Storage project ID")
188
+
189
+ def __init__(self, config: Optional[Dict] = None, **kwargs):
190
+ """Initialize DocumentWriterTool with settings
191
+
192
+ Configuration is automatically loaded by BaseTool from:
193
+ 1. Explicit config dict (highest priority)
194
+ 2. YAML config files (config/tools/document_writer_tool.yaml)
195
+ 3. Environment variables (via dotenv from .env files)
196
+ 4. Tool defaults (lowest priority)
197
+
198
+ Args:
199
+ config: Optional configuration overrides
200
+ **kwargs: Additional arguments passed to BaseTool (e.g., tool_name)
201
+ """
202
+ super().__init__(config, **kwargs)
203
+
204
+ # Configuration is automatically loaded by BaseTool into self._config_obj
205
+ # Access config via self._config_obj (BaseSettings instance)
206
+ self.config = self._config_obj if self._config_obj else self.Config()
207
+
208
+ self.logger = logging.getLogger(__name__)
209
+
210
+ # Create necessary directories
211
+ os.makedirs(self.config.temp_dir, exist_ok=True)
212
+ os.makedirs(self.config.backup_dir, exist_ok=True)
213
+
214
+ # Initialize cloud storage
215
+ self._init_cloud_storage()
216
+
217
+ # Initialize content validators
218
+ self._init_validators()
219
+
220
+ def _init_cloud_storage(self):
221
+ """Initialize cloud storage for document writing"""
222
+ self.file_storage = None
223
+
224
+ if self.config.enable_cloud_storage:
225
+ try:
226
+ from aiecs.infrastructure.persistence.file_storage import (
227
+ FileStorage,
228
+ )
229
+
230
+ storage_config = {
231
+ "gcs_bucket_name": self.config.gcs_bucket_name,
232
+ "gcs_project_id": self.config.gcs_project_id,
233
+ "enable_local_fallback": True,
234
+ "local_storage_path": self.config.temp_dir,
235
+ }
236
+
237
+ self.file_storage = FileStorage(storage_config)
238
+ # Initialize storage asynchronously if in async context, otherwise defer
239
+ try:
240
+ loop = asyncio.get_running_loop()
241
+ # We're in an async context, create task
242
+ asyncio.create_task(self._init_storage_async())
243
+ except RuntimeError:
244
+ # Not in async context, initialization will happen on first async operation
245
+ # or can be called explicitly via write_document_async
246
+ pass
247
+
248
+ except ImportError:
249
+ self.logger.warning("FileStorage not available, cloud storage disabled")
250
+ except Exception as e:
251
+ self.logger.warning(f"Failed to initialize cloud storage: {e}")
252
+
253
+ async def _init_storage_async(self):
254
+ """Async initialization of file storage"""
255
+ try:
256
+ if self.file_storage:
257
+ await self.file_storage.initialize()
258
+ self.logger.info("Cloud storage initialized successfully")
259
+ except Exception as e:
260
+ self.logger.warning(f"Cloud storage initialization failed: {e}")
261
+ self.file_storage = None
262
+
263
+ def _init_validators(self):
264
+ """Initialize content validators"""
265
+ self.validators = {
266
+ DocumentFormat.JSON: self._validate_json_content,
267
+ DocumentFormat.XML: self._validate_xml_content,
268
+ DocumentFormat.CSV: self._validate_csv_content,
269
+ DocumentFormat.YAML: self._validate_yaml_content,
270
+ DocumentFormat.HTML: self._validate_html_content,
271
+ }
272
+
273
+ def _run_async_safely(self, coro):
274
+ """Safely run async coroutine from sync context
275
+
276
+ This method handles both cases:
277
+ 1. If already in an async context (event loop running), creates a new event loop in a thread
278
+ 2. If not in async context, uses asyncio.run() to create new event loop
279
+
280
+ Args:
281
+ coro: Coroutine to run
282
+
283
+ Returns:
284
+ Result of the coroutine
285
+ """
286
+ try:
287
+ # Try to get the running event loop
288
+ asyncio.get_running_loop()
289
+ # If we get here, we're in an async context
290
+ # We need to run the coroutine in a separate thread with its own event loop
291
+ import concurrent.futures
292
+ import threading
293
+
294
+ result = None
295
+ exception = None
296
+
297
+ def run_in_thread():
298
+ nonlocal result, exception
299
+ try:
300
+ new_loop = asyncio.new_event_loop()
301
+ asyncio.set_event_loop(new_loop)
302
+ result = new_loop.run_until_complete(coro)
303
+ new_loop.close()
304
+ except Exception as e:
305
+ exception = e
306
+
307
+ thread = threading.Thread(target=run_in_thread)
308
+ thread.start()
309
+ thread.join()
310
+
311
+ if exception:
312
+ raise exception
313
+ return result
314
+ except RuntimeError:
315
+ # No running event loop, safe to use asyncio.run()
316
+ return asyncio.run(coro)
317
+
318
+ # Schema definitions
319
+ class Write_documentSchema(BaseModel):
320
+ """Schema for write_document operation"""
321
+
322
+ target_path: str = Field(description="Target file path (local or cloud)")
323
+ content: Union[str, bytes, Dict, List] = Field(description="Content to write")
324
+ format: DocumentFormat = Field(description="Document format")
325
+ mode: WriteMode = Field(default=WriteMode.CREATE, description="Write mode")
326
+ encoding: EncodingType = Field(default=EncodingType.UTF8, description="Text encoding")
327
+ validation_level: ValidationLevel = Field(default=ValidationLevel.BASIC, description="Validation level")
328
+ metadata: Optional[Dict[str, Any]] = Field(default=None, description="Additional metadata")
329
+ backup_comment: Optional[str] = Field(default=None, description="Backup comment")
330
+
331
+ class Batch_write_documentsSchema(BaseModel):
332
+ """Schema for batch_write_documents operation"""
333
+
334
+ write_operations: List[Dict[str, Any]] = Field(description="List of write operations")
335
+ transaction_mode: bool = Field(default=True, description="Use transaction mode")
336
+ rollback_on_error: bool = Field(default=True, description="Rollback on any error")
337
+
338
+ class Edit_documentSchema(BaseModel):
339
+ """Schema for edit_document operation"""
340
+
341
+ target_path: str = Field(description="Target file path")
342
+ operation: EditOperation = Field(description="Edit operation to perform")
343
+ content: Optional[str] = Field(default=None, description="Content for the operation")
344
+ position: Optional[Dict[str, Any]] = Field(default=None, description="Position info (line, column, offset)")
345
+ selection: Optional[Dict[str, Any]] = Field(default=None, description="Text selection range")
346
+ format_options: Optional[Dict[str, Any]] = Field(default=None, description="Formatting options")
347
+
348
+ class Format_textSchema(BaseModel):
349
+ """Schema for format_text operation"""
350
+
351
+ target_path: str = Field(description="Target file path")
352
+ text_to_format: str = Field(description="Text to apply formatting to")
353
+ format_type: EditOperation = Field(description="Type of formatting")
354
+ format_options: Optional[Dict[str, Any]] = Field(default=None, description="Additional format options")
355
+
356
+ class Find_replaceSchema(BaseModel):
357
+ """Schema for find_replace operation"""
358
+
359
+ target_path: str = Field(description="Target file path")
360
+ find_text: str = Field(description="Text to find")
361
+ replace_text: str = Field(description="Text to replace with")
362
+ replace_all: bool = Field(default=False, description="Replace all occurrences")
363
+ case_sensitive: bool = Field(default=True, description="Case sensitive search")
364
+ regex_mode: bool = Field(default=False, description="Use regex for find/replace")
365
+
366
+ def write_document(
367
+ self,
368
+ target_path: str,
369
+ content: Union[str, bytes, Dict, List],
370
+ format: DocumentFormat,
371
+ mode: WriteMode = WriteMode.CREATE,
372
+ encoding: EncodingType = EncodingType.UTF8,
373
+ validation_level: ValidationLevel = ValidationLevel.BASIC,
374
+ metadata: Optional[Dict[str, Any]] = None,
375
+ backup_comment: Optional[str] = None,
376
+ ) -> Dict[str, Any]:
377
+ """
378
+ Write document with production-grade features
379
+
380
+ Args:
381
+ target_path: Target file path (local or cloud)
382
+ content: Content to write
383
+ format: Document format
384
+ mode: Write mode (create, overwrite, append, update, etc.)
385
+ encoding: Text encoding
386
+ validation_level: Content validation level
387
+ metadata: Additional metadata
388
+ backup_comment: Comment for backup
389
+
390
+ Returns:
391
+ Dict containing write results and metadata
392
+ """
393
+ try:
394
+ start_time = datetime.now()
395
+ operation_id = str(uuid.uuid4())
396
+
397
+ self.logger.info(f"Starting write operation {operation_id}: {target_path}")
398
+
399
+ # Step 1: Validate inputs
400
+ self._validate_write_inputs(target_path, content, format, mode)
401
+
402
+ # Step 2: Prepare content
403
+ processed_content, content_metadata = self._prepare_content(content, format, encoding, validation_level)
404
+
405
+ # Step 3: Handle write mode logic
406
+ write_plan = self._plan_write_operation(target_path, mode, metadata)
407
+
408
+ # Step 4: Create backup if needed
409
+ backup_info = None
410
+ if self.config.enable_backup and mode in [
411
+ WriteMode.OVERWRITE,
412
+ WriteMode.UPDATE,
413
+ ]:
414
+ backup_info = self._create_backup(target_path, backup_comment)
415
+
416
+ # Step 5: Execute atomic write
417
+ write_result = self._run_async_safely(self._execute_atomic_write(target_path, processed_content, format, encoding, write_plan))
418
+
419
+ # Step 6: Update metadata and versioning
420
+ version_info = self._handle_versioning(target_path, content_metadata, metadata)
421
+
422
+ # Step 7: Audit logging
423
+ audit_info = self._log_write_operation(operation_id, target_path, mode, write_result, backup_info)
424
+
425
+ result = {
426
+ "operation_id": operation_id,
427
+ "target_path": target_path,
428
+ "write_mode": mode,
429
+ "format": format,
430
+ "encoding": encoding,
431
+ "content_metadata": content_metadata,
432
+ "write_result": write_result,
433
+ "backup_info": backup_info,
434
+ "version_info": version_info,
435
+ "audit_info": audit_info,
436
+ "processing_metadata": {
437
+ "start_time": start_time.isoformat(),
438
+ "end_time": datetime.now().isoformat(),
439
+ "duration": (datetime.now() - start_time).total_seconds(),
440
+ },
441
+ }
442
+
443
+ self.logger.info(f"Write operation {operation_id} completed successfully")
444
+ return result
445
+
446
+ except Exception as e:
447
+ self.logger.error(f"Write operation failed for {target_path}: {str(e)}")
448
+ # Rollback if needed
449
+ if "backup_info" in locals() and backup_info:
450
+ self._rollback_from_backup(target_path, backup_info)
451
+ raise DocumentWriterError(f"Document write failed: {str(e)}")
452
+
453
+ async def write_document_async(
454
+ self,
455
+ target_path: str,
456
+ content: Union[str, bytes, Dict, List],
457
+ format: DocumentFormat,
458
+ mode: WriteMode = WriteMode.CREATE,
459
+ encoding: EncodingType = EncodingType.UTF8,
460
+ validation_level: ValidationLevel = ValidationLevel.BASIC,
461
+ metadata: Optional[Dict[str, Any]] = None,
462
+ backup_comment: Optional[str] = None,
463
+ ) -> Dict[str, Any]:
464
+ """Async version of write_document"""
465
+ return await asyncio.to_thread(
466
+ self.write_document,
467
+ target_path=target_path,
468
+ content=content,
469
+ format=format,
470
+ mode=mode,
471
+ encoding=encoding,
472
+ validation_level=validation_level,
473
+ metadata=metadata,
474
+ backup_comment=backup_comment,
475
+ )
476
+
477
+ def batch_write_documents(
478
+ self,
479
+ write_operations: List[Dict[str, Any]],
480
+ transaction_mode: bool = True,
481
+ rollback_on_error: bool = True,
482
+ ) -> Dict[str, Any]:
483
+ """
484
+ Batch write multiple documents with transaction support
485
+
486
+ Args:
487
+ write_operations: List of write operation dictionaries
488
+ transaction_mode: Use transaction mode for atomicity
489
+ rollback_on_error: Rollback all operations on any error
490
+
491
+ Returns:
492
+ Dict containing batch write results
493
+ """
494
+ try:
495
+ start_time = datetime.now()
496
+ batch_id = str(uuid.uuid4())
497
+
498
+ self.logger.info(f"Starting batch write operation {batch_id}: {len(write_operations)} operations")
499
+
500
+ completed_operations = []
501
+ backup_operations = []
502
+
503
+ try:
504
+ for i, operation in enumerate(write_operations):
505
+ self.logger.info(f"Processing operation {i+1}/{len(write_operations)}")
506
+
507
+ # Execute individual write operation
508
+ result = self.write_document(**operation)
509
+ completed_operations.append(
510
+ {
511
+ "index": i,
512
+ "operation": operation,
513
+ "result": result,
514
+ "status": "success",
515
+ }
516
+ )
517
+
518
+ # Track backup info for potential rollback
519
+ if result.get("backup_info"):
520
+ backup_operations.append(result["backup_info"])
521
+
522
+ batch_result = {
523
+ "batch_id": batch_id,
524
+ "total_operations": len(write_operations),
525
+ "successful_operations": len(completed_operations),
526
+ "failed_operations": 0,
527
+ "operations": completed_operations,
528
+ "transaction_mode": transaction_mode,
529
+ "batch_metadata": {
530
+ "start_time": start_time.isoformat(),
531
+ "end_time": datetime.now().isoformat(),
532
+ "duration": (datetime.now() - start_time).total_seconds(),
533
+ },
534
+ }
535
+
536
+ self.logger.info(f"Batch write operation {batch_id} completed successfully")
537
+ return batch_result
538
+
539
+ except Exception as e:
540
+ self.logger.error(f"Batch write operation {batch_id} failed: {str(e)}")
541
+
542
+ if rollback_on_error and transaction_mode:
543
+ self.logger.info(f"Rolling back batch operation {batch_id}")
544
+ self._rollback_batch_operations(completed_operations, backup_operations)
545
+
546
+ # Create failure result
547
+ batch_result = {
548
+ "batch_id": batch_id,
549
+ "total_operations": len(write_operations),
550
+ "successful_operations": len(completed_operations),
551
+ "failed_operations": len(write_operations) - len(completed_operations),
552
+ "operations": completed_operations,
553
+ "error": str(e),
554
+ "transaction_mode": transaction_mode,
555
+ "rollback_performed": rollback_on_error and transaction_mode,
556
+ }
557
+
558
+ raise DocumentWriterError(f"Batch write operation failed: {str(e)}")
559
+
560
+ except Exception as e:
561
+ raise DocumentWriterError(f"Batch write operation failed: {str(e)}")
562
+
563
+ def _validate_write_inputs(
564
+ self,
565
+ target_path: str,
566
+ content: Any,
567
+ format: DocumentFormat,
568
+ mode: WriteMode,
569
+ ):
570
+ """Validate write operation inputs"""
571
+ # Path validation
572
+ if not target_path or not isinstance(target_path, str):
573
+ raise ValueError("Invalid target path")
574
+
575
+ # Content validation
576
+ if content is None:
577
+ raise ValueError("Content cannot be None")
578
+
579
+ # Size validation
580
+ content_size = self._calculate_content_size(content)
581
+ if content_size > self.config.max_file_size:
582
+ raise ValueError(f"Content size {content_size} exceeds maximum {self.config.max_file_size}")
583
+
584
+ # Permission validation
585
+ if not self._check_write_permission(target_path, mode):
586
+ raise WritePermissionError(f"No write permission for {target_path}")
587
+
588
+ def _prepare_content(
589
+ self,
590
+ content: Any,
591
+ format: DocumentFormat,
592
+ encoding: EncodingType,
593
+ validation_level: ValidationLevel,
594
+ ) -> Tuple[Union[str, bytes], Dict]:
595
+ """Prepare and validate content for writing"""
596
+
597
+ # Content conversion based on format
598
+ processed_content: Union[str, bytes]
599
+ if format == DocumentFormat.JSON:
600
+ if isinstance(content, (dict, list)):
601
+ processed_content = json.dumps(content, ensure_ascii=False, indent=2)
602
+ else:
603
+ processed_content = str(content)
604
+ elif format == DocumentFormat.CSV:
605
+ processed_content = self._convert_to_csv(content)
606
+ elif format == DocumentFormat.XML:
607
+ processed_content = self._convert_to_xml(content)
608
+ elif format == DocumentFormat.YAML:
609
+ processed_content = self._convert_to_yaml(content)
610
+ elif format == DocumentFormat.HTML:
611
+ processed_content = self._convert_to_html(content)
612
+ elif format == DocumentFormat.MARKDOWN:
613
+ processed_content = self._convert_to_markdown(content)
614
+ elif format == DocumentFormat.BINARY:
615
+ if isinstance(content, bytes):
616
+ processed_content = content
617
+ else:
618
+ processed_content = str(content).encode(encoding.value)
619
+ else:
620
+ processed_content = str(content)
621
+
622
+ # Content validation
623
+ if self.config.enable_content_validation:
624
+ self._validate_content(processed_content, format, validation_level)
625
+
626
+ # Calculate metadata
627
+ content_metadata = {
628
+ "original_type": type(content).__name__,
629
+ "processed_size": (len(processed_content) if isinstance(processed_content, (str, bytes)) else 0),
630
+ "format": format,
631
+ "encoding": encoding,
632
+ "checksum": self._calculate_checksum(processed_content),
633
+ "validation_level": validation_level,
634
+ "timestamp": datetime.now().isoformat(),
635
+ }
636
+
637
+ return processed_content, content_metadata
638
+
639
+ def _plan_write_operation(self, target_path: str, mode: WriteMode, metadata: Optional[Dict]) -> Dict:
640
+ """Plan the write operation based on mode and target"""
641
+
642
+ plan = {
643
+ "target_path": target_path,
644
+ "mode": mode,
645
+ "file_exists": self._file_exists(target_path),
646
+ "is_cloud_path": self._is_cloud_storage_path(target_path),
647
+ "requires_backup": False,
648
+ "requires_versioning": False,
649
+ "atomic_operation": self.config.atomic_write,
650
+ }
651
+
652
+ if mode == WriteMode.CREATE and plan["file_exists"]:
653
+ raise DocumentWriterError(f"File already exists: {target_path}")
654
+
655
+ if mode in [WriteMode.OVERWRITE, WriteMode.UPDATE] and plan["file_exists"]:
656
+ plan["requires_backup"] = self.config.enable_backup
657
+ plan["requires_versioning"] = self.config.enable_versioning
658
+
659
+ if mode == WriteMode.APPEND and not plan["file_exists"]:
660
+ # Convert to CREATE mode
661
+ plan["mode"] = WriteMode.CREATE
662
+
663
+ return plan
664
+
665
+ def _create_backup(self, target_path: str, comment: Optional[str] = None) -> Dict:
666
+ """Create backup of existing file"""
667
+ if not self._file_exists(target_path):
668
+ return {}
669
+
670
+ try:
671
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
672
+ file_stem = Path(target_path).stem
673
+ file_suffix = Path(target_path).suffix
674
+
675
+ backup_filename = f"{file_stem}_backup_{timestamp}{file_suffix}"
676
+ backup_path = os.path.join(self.config.backup_dir, backup_filename)
677
+
678
+ # Copy file to backup location
679
+ if self._is_cloud_storage_path(target_path):
680
+ backup_path = self._backup_cloud_file(target_path, backup_path)
681
+ else:
682
+ shutil.copy2(target_path, backup_path)
683
+
684
+ backup_info = {
685
+ "original_path": target_path,
686
+ "backup_path": backup_path,
687
+ "timestamp": timestamp,
688
+ "comment": comment,
689
+ "checksum": self._calculate_file_checksum(target_path),
690
+ }
691
+
692
+ self.logger.info(f"Created backup: {backup_path}")
693
+ return backup_info
694
+
695
+ except Exception as e:
696
+ self.logger.error(f"Failed to create backup for {target_path}: {e}")
697
+ raise StorageError(f"Backup creation failed: {e}")
698
+
699
+ async def _execute_atomic_write(
700
+ self,
701
+ target_path: str,
702
+ content: Union[str, bytes],
703
+ format: DocumentFormat,
704
+ encoding: EncodingType,
705
+ plan: Dict,
706
+ ) -> Dict:
707
+ """Execute atomic write operation"""
708
+
709
+ if plan["is_cloud_path"]:
710
+ return await self._write_to_cloud_storage(target_path, content, format, encoding, plan)
711
+ else:
712
+ return self._write_to_local_file(target_path, content, format, encoding, plan)
713
+
714
+ def _write_to_local_file(
715
+ self,
716
+ target_path: str,
717
+ content: Union[str, bytes],
718
+ format: DocumentFormat,
719
+ encoding: EncodingType,
720
+ plan: Dict,
721
+ ) -> Dict:
722
+ """Write to local file system with atomic operation"""
723
+
724
+ try:
725
+ # Create parent directories
726
+ os.makedirs(os.path.dirname(target_path), exist_ok=True)
727
+
728
+ if plan["atomic_operation"]:
729
+ # Atomic write using temporary file
730
+ temp_path = f"{target_path}.tmp.{uuid.uuid4().hex}"
731
+
732
+ try:
733
+ if plan["mode"] == WriteMode.APPEND and plan["file_exists"]:
734
+ # Read existing content first
735
+ with open(target_path, "rb") as f:
736
+ existing_content = f.read()
737
+
738
+ if isinstance(content, str):
739
+ content = existing_content.decode(encoding.value) + content
740
+ else:
741
+ content = existing_content + content
742
+
743
+ # Write to temporary file
744
+ if isinstance(content, bytes):
745
+ with open(temp_path, "wb") as f:
746
+ f.write(content)
747
+ else:
748
+ # Handle both EncodingType enum and string
749
+ enc_value = encoding.value if hasattr(encoding, "value") else str(encoding)
750
+ with open(temp_path, "w", encoding=enc_value) as f:
751
+ f.write(content)
752
+
753
+ # Atomic move
754
+ shutil.move(temp_path, target_path)
755
+
756
+ finally:
757
+ # Cleanup temp file if it still exists
758
+ if os.path.exists(temp_path):
759
+ os.unlink(temp_path)
760
+ else:
761
+ # Direct write
762
+ mode_map = {
763
+ WriteMode.CREATE: "w",
764
+ WriteMode.OVERWRITE: "w",
765
+ WriteMode.APPEND: "a",
766
+ WriteMode.UPDATE: "w",
767
+ }
768
+
769
+ file_mode = mode_map.get(plan["mode"], "w")
770
+ if isinstance(content, bytes):
771
+ file_mode += "b"
772
+
773
+ # Handle both EncodingType enum and string
774
+ file_enc_value: Optional[str] = None if isinstance(content, bytes) else (encoding.value if hasattr(encoding, "value") else str(encoding))
775
+ with open(target_path, file_mode, encoding=file_enc_value) as f:
776
+ f.write(content)
777
+
778
+ # Get file stats
779
+ stat = os.stat(target_path)
780
+
781
+ return {
782
+ "path": target_path,
783
+ "size": stat.st_size,
784
+ "checksum": self._calculate_file_checksum(target_path),
785
+ "modified_time": datetime.fromtimestamp(stat.st_mtime).isoformat(),
786
+ "atomic_write": plan["atomic_operation"],
787
+ }
788
+
789
+ except Exception as e:
790
+ raise StorageError(f"Local file write failed: {e}")
791
+
792
+ async def _write_to_cloud_storage(
793
+ self,
794
+ target_path: str,
795
+ content: Union[str, bytes],
796
+ format: DocumentFormat,
797
+ encoding: EncodingType,
798
+ plan: Dict,
799
+ ) -> Dict:
800
+ """Write to cloud storage"""
801
+
802
+ if not self.file_storage:
803
+ raise StorageError("Cloud storage not available")
804
+
805
+ try:
806
+ storage_path = self._parse_cloud_storage_path(target_path)
807
+
808
+ # Handle append mode for cloud storage
809
+ if plan["mode"] == WriteMode.APPEND and plan["file_exists"]:
810
+ existing_content = await self.file_storage.retrieve(storage_path)
811
+ if isinstance(content, str) and isinstance(existing_content, str):
812
+ content = existing_content + content
813
+ elif isinstance(content, bytes) and isinstance(existing_content, bytes):
814
+ content = existing_content + content
815
+
816
+ # Store in cloud storage
817
+ await self.file_storage.store(storage_path, content)
818
+
819
+ return {
820
+ "path": target_path,
821
+ "storage_path": storage_path,
822
+ "size": (len(content) if isinstance(content, (str, bytes)) else 0),
823
+ "checksum": self._calculate_checksum(content),
824
+ "cloud_storage": True,
825
+ }
826
+
827
+ except Exception as e:
828
+ raise StorageError(f"Cloud storage write failed: {e}")
829
+
830
+ def _handle_versioning(
831
+ self,
832
+ target_path: str,
833
+ content_metadata: Dict,
834
+ metadata: Optional[Dict],
835
+ ) -> Optional[Dict]:
836
+ """Handle document versioning"""
837
+
838
+ if not self.config.enable_versioning:
839
+ return None
840
+
841
+ try:
842
+ version_info = {
843
+ "path": target_path,
844
+ "version": self._get_next_version(target_path),
845
+ "timestamp": datetime.now().isoformat(),
846
+ "content_metadata": content_metadata,
847
+ "user_metadata": metadata or {},
848
+ }
849
+
850
+ # Store version info
851
+ version_file = f"{target_path}.versions.json"
852
+ versions = self._load_version_history(version_file)
853
+ versions.append(version_info)
854
+
855
+ # Keep only recent versions
856
+ if len(versions) > self.config.max_backup_versions:
857
+ versions = versions[-self.config.max_backup_versions :]
858
+
859
+ self._save_version_history(version_file, versions)
860
+
861
+ return version_info
862
+
863
+ except Exception as e:
864
+ self.logger.warning(f"Versioning failed for {target_path}: {e}")
865
+ return None
866
+
867
+ def _validate_content(
868
+ self,
869
+ content: Union[str, bytes],
870
+ format: DocumentFormat,
871
+ validation_level: ValidationLevel,
872
+ ):
873
+ """Validate content based on format and validation level"""
874
+
875
+ if validation_level == ValidationLevel.NONE:
876
+ return
877
+
878
+ try:
879
+ # Format-specific validation
880
+ if format in self.validators:
881
+ self.validators[format](content, validation_level)
882
+
883
+ # Security validation for enterprise level
884
+ if validation_level == ValidationLevel.ENTERPRISE:
885
+ self._security_scan_content(content)
886
+
887
+ except Exception as e:
888
+ raise ContentValidationError(f"Content validation failed: {e}")
889
+
890
+ def _validate_json_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
891
+ """Validate JSON content"""
892
+ try:
893
+ if isinstance(content, bytes):
894
+ content = content.decode("utf-8")
895
+ json.loads(content)
896
+ except json.JSONDecodeError as e:
897
+ raise ContentValidationError(f"Invalid JSON: {e}")
898
+
899
+ def _validate_xml_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
900
+ """Validate XML content"""
901
+ try:
902
+ import xml.etree.ElementTree as ET
903
+
904
+ if isinstance(content, bytes):
905
+ content = content.decode("utf-8")
906
+ ET.fromstring(content)
907
+ except ET.ParseError as e:
908
+ raise ContentValidationError(f"Invalid XML: {e}")
909
+
910
+ def _validate_csv_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
911
+ """Validate CSV content"""
912
+ try:
913
+ import csv
914
+ import io
915
+
916
+ if isinstance(content, bytes):
917
+ content = content.decode("utf-8")
918
+ csv.reader(io.StringIO(content))
919
+ except Exception as e:
920
+ raise ContentValidationError(f"Invalid CSV: {e}")
921
+
922
+ def _validate_yaml_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
923
+ """Validate YAML content"""
924
+ try:
925
+ import yaml
926
+
927
+ if isinstance(content, bytes):
928
+ content = content.decode("utf-8")
929
+ yaml.safe_load(content)
930
+ except yaml.YAMLError as e:
931
+ raise ContentValidationError(f"Invalid YAML: {e}")
932
+
933
+ def _validate_html_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
934
+ """Validate HTML content"""
935
+ try:
936
+ from bs4 import BeautifulSoup
937
+
938
+ if isinstance(content, bytes):
939
+ content = content.decode("utf-8")
940
+ BeautifulSoup(content, "html.parser")
941
+ except Exception as e:
942
+ raise ContentValidationError(f"Invalid HTML: {e}")
943
+
944
+ def _security_scan_content(self, content: Union[str, bytes]):
945
+ """Perform security scan on content"""
946
+ if isinstance(content, bytes):
947
+ content = content.decode("utf-8", errors="ignore")
948
+
949
+ # Check for suspicious patterns
950
+ suspicious_patterns = [
951
+ r"<script[^>]*>", # JavaScript
952
+ r"javascript:", # JavaScript URLs
953
+ r"vbscript:", # VBScript URLs
954
+ r"data:.*base64", # Base64 data URLs
955
+ r"eval\s*\(", # eval() calls
956
+ r"exec\s*\(", # exec() calls
957
+ ]
958
+
959
+ import re
960
+
961
+ for pattern in suspicious_patterns:
962
+ if re.search(pattern, content, re.IGNORECASE):
963
+ raise ContentValidationError("Security scan failed: suspicious pattern detected")
964
+
965
+ # Helper methods
966
+ def _calculate_content_size(self, content: Any) -> int:
967
+ """Calculate content size in bytes"""
968
+ if isinstance(content, bytes):
969
+ return len(content)
970
+ elif isinstance(content, str):
971
+ return len(content.encode("utf-8"))
972
+ else:
973
+ return len(str(content).encode("utf-8"))
974
+
975
+ def _calculate_checksum(self, content: Union[str, bytes]) -> str:
976
+ """Calculate content checksum"""
977
+ if isinstance(content, str):
978
+ content = content.encode("utf-8")
979
+ return hashlib.sha256(content).hexdigest()
980
+
981
+ def _calculate_file_checksum(self, file_path: str) -> str:
982
+ """Calculate file checksum"""
983
+ hash_sha256 = hashlib.sha256()
984
+ with open(file_path, "rb") as f:
985
+ for chunk in iter(lambda: f.read(4096), b""):
986
+ hash_sha256.update(chunk)
987
+ return hash_sha256.hexdigest()
988
+
989
+ def _check_write_permission(self, target_path: str, mode: WriteMode) -> bool:
990
+ """Check write permission for target path"""
991
+ try:
992
+ if self._is_cloud_storage_path(target_path):
993
+ return self.file_storage is not None
994
+
995
+ parent_dir = os.path.dirname(target_path)
996
+ if not os.path.exists(parent_dir):
997
+ # Check if we can create the directory
998
+ return os.access(os.path.dirname(parent_dir), os.W_OK)
999
+
1000
+ if os.path.exists(target_path):
1001
+ return os.access(target_path, os.W_OK)
1002
+ else:
1003
+ return os.access(parent_dir, os.W_OK)
1004
+
1005
+ except Exception:
1006
+ return False
1007
+
1008
+ def _file_exists(self, file_path: str) -> bool:
1009
+ """Check if file exists (local or cloud)"""
1010
+ if self._is_cloud_storage_path(file_path):
1011
+ # For cloud storage, we'd need to implement exists check
1012
+ return False # Simplified for now
1013
+ else:
1014
+ return os.path.exists(file_path)
1015
+
1016
+ def _is_cloud_storage_path(self, source: str) -> bool:
1017
+ """Check if source is a cloud storage path"""
1018
+ cloud_schemes = ["gs", "s3", "azure", "cloud"]
1019
+ try:
1020
+ from urllib.parse import urlparse
1021
+
1022
+ parsed = urlparse(source)
1023
+ return parsed.scheme in cloud_schemes
1024
+ except Exception:
1025
+ return False
1026
+
1027
+ def _parse_cloud_storage_path(self, source: str) -> str:
1028
+ """Parse cloud storage path to get storage key"""
1029
+ try:
1030
+ from urllib.parse import urlparse
1031
+
1032
+ parsed = urlparse(source)
1033
+ return parsed.path.lstrip("/")
1034
+ except Exception:
1035
+ return source
1036
+
1037
+ # Content conversion methods
1038
+ def _convert_to_csv(self, content: Any) -> str:
1039
+ """Convert content to CSV format"""
1040
+ import csv
1041
+ import io
1042
+
1043
+ output = io.StringIO()
1044
+ writer = csv.writer(output)
1045
+
1046
+ if isinstance(content, list):
1047
+ for row in content:
1048
+ if isinstance(row, (list, tuple)):
1049
+ writer.writerow(row)
1050
+ else:
1051
+ writer.writerow([row])
1052
+ elif isinstance(content, dict):
1053
+ # Convert dict to CSV with headers
1054
+ if content:
1055
+ headers = list(content.keys())
1056
+ writer.writerow(headers)
1057
+ writer.writerow([content[h] for h in headers])
1058
+ else:
1059
+ writer.writerow([str(content)])
1060
+
1061
+ return output.getvalue()
1062
+
1063
+ def _convert_to_xml(self, content: Any) -> str:
1064
+ """Convert content to XML format"""
1065
+ import xml.etree.ElementTree as ET
1066
+
1067
+ if isinstance(content, dict):
1068
+ root = ET.Element("document")
1069
+ for key, value in content.items():
1070
+ elem = ET.SubElement(root, str(key))
1071
+ elem.text = str(value)
1072
+ return ET.tostring(root, encoding="unicode")
1073
+ else:
1074
+ root = ET.Element("document")
1075
+ root.text = str(content)
1076
+ return ET.tostring(root, encoding="unicode")
1077
+
1078
+ def _convert_to_yaml(self, content: Any) -> str:
1079
+ """Convert content to YAML format"""
1080
+ try:
1081
+ import yaml
1082
+
1083
+ return yaml.dump(content, default_flow_style=False, allow_unicode=True)
1084
+ except ImportError:
1085
+ # Fallback to simple string representation
1086
+ return str(content)
1087
+
1088
+ def _convert_to_html(self, content: Any) -> str:
1089
+ """Convert content to HTML format"""
1090
+ if isinstance(content, dict):
1091
+ html = "<html><body>\n"
1092
+ for key, value in content.items():
1093
+ html += f"<h3>{key}</h3>\n<p>{value}</p>\n"
1094
+ html += "</body></html>"
1095
+ return html
1096
+ else:
1097
+ return f"<html><body><pre>{str(content)}</pre></body></html>"
1098
+
1099
+ def _convert_to_markdown(self, content: Any) -> str:
1100
+ """Convert content to Markdown format"""
1101
+ if isinstance(content, dict):
1102
+ md = ""
1103
+ for key, value in content.items():
1104
+ md += f"## {key}\n\n{value}\n\n"
1105
+ return md
1106
+ else:
1107
+ return str(content)
1108
+
1109
+ # Versioning methods
1110
+ def _get_next_version(self, file_path: str) -> int:
1111
+ """Get next version number for file"""
1112
+ version_file = f"{file_path}.versions.json"
1113
+ versions = self._load_version_history(version_file)
1114
+ return len(versions) + 1
1115
+
1116
+ def _load_version_history(self, version_file: str) -> List[Dict]:
1117
+ """Load version history from file"""
1118
+ try:
1119
+ if os.path.exists(version_file):
1120
+ with open(version_file, "r") as f:
1121
+ return json.load(f)
1122
+ except Exception:
1123
+ pass
1124
+ return []
1125
+
1126
+ def _save_version_history(self, version_file: str, versions: List[Dict]):
1127
+ """Save version history to file"""
1128
+ try:
1129
+ with open(version_file, "w") as f:
1130
+ json.dump(versions, f, indent=2)
1131
+ except Exception as e:
1132
+ self.logger.warning(f"Failed to save version history: {e}")
1133
+
1134
+ # Backup and rollback methods
1135
+ def _backup_cloud_file(self, source_path: str, backup_path: str) -> str:
1136
+ """Backup cloud file"""
1137
+ # Simplified implementation
1138
+ return backup_path
1139
+
1140
+ def _rollback_from_backup(self, target_path: str, backup_info: Dict):
1141
+ """Rollback file from backup"""
1142
+ try:
1143
+ if backup_info and os.path.exists(backup_info["backup_path"]):
1144
+ shutil.copy2(backup_info["backup_path"], target_path)
1145
+ self.logger.info(f"Rolled back {target_path} from backup")
1146
+ except Exception as e:
1147
+ self.logger.error(f"Rollback failed: {e}")
1148
+
1149
+ def _rollback_batch_operations(self, completed_operations: List[Dict], backup_operations: List[Dict]):
1150
+ """Rollback batch operations"""
1151
+ for op in reversed(completed_operations):
1152
+ try:
1153
+ result = op.get("result", {})
1154
+ backup_info = result.get("backup_info")
1155
+ if backup_info:
1156
+ self._rollback_from_backup(result["write_result"]["path"], backup_info)
1157
+ except Exception as e:
1158
+ self.logger.error(f"Batch rollback failed for operation: {e}")
1159
+
1160
+ def _log_write_operation(
1161
+ self,
1162
+ operation_id: str,
1163
+ target_path: str,
1164
+ mode: WriteMode,
1165
+ write_result: Dict,
1166
+ backup_info: Optional[Dict],
1167
+ ) -> Dict:
1168
+ """Log write operation for audit"""
1169
+ audit_info = {
1170
+ "operation_id": operation_id,
1171
+ "timestamp": datetime.now().isoformat(),
1172
+ "target_path": target_path,
1173
+ "mode": mode,
1174
+ "success": True,
1175
+ "file_size": write_result.get("size", 0),
1176
+ "checksum": write_result.get("checksum"),
1177
+ "backup_created": backup_info is not None,
1178
+ }
1179
+
1180
+ # Log to audit file
1181
+ try:
1182
+ audit_file = os.path.join(self.config.temp_dir, "write_audit.log")
1183
+ with open(audit_file, "a") as f:
1184
+ f.write(json.dumps(audit_info) + "\n")
1185
+ except Exception as e:
1186
+ self.logger.warning(f"Audit logging failed: {e}")
1187
+
1188
+ return audit_info
1189
+
1190
+ def edit_document(
1191
+ self,
1192
+ target_path: str,
1193
+ operation: EditOperation,
1194
+ content: Optional[str] = None,
1195
+ position: Optional[Dict[str, Any]] = None,
1196
+ selection: Optional[Dict[str, Any]] = None,
1197
+ format_options: Optional[Dict[str, Any]] = None,
1198
+ ) -> Dict[str, Any]:
1199
+ """
1200
+ Perform advanced editing operations on documents
1201
+
1202
+ Args:
1203
+ target_path: Target file path
1204
+ operation: Edit operation to perform
1205
+ content: Content for the operation (if applicable)
1206
+ position: Position info (line, column, offset)
1207
+ selection: Text selection range
1208
+ format_options: Additional format options
1209
+
1210
+ Returns:
1211
+ Dict containing edit results
1212
+ """
1213
+ try:
1214
+ start_time = datetime.now()
1215
+ operation_id = str(uuid.uuid4())
1216
+
1217
+ self.logger.info(f"Starting edit operation {operation_id}: {operation} on {target_path}")
1218
+
1219
+ # Read current document content
1220
+ current_content = self._read_document_content(target_path)
1221
+
1222
+ # Perform the specific edit operation
1223
+ if operation == EditOperation.INSERT_TEXT:
1224
+ if content is None:
1225
+ raise ValueError("content is required for INSERT_TEXT operation")
1226
+ edited_content = self._insert_text(current_content, content, position)
1227
+ elif operation == EditOperation.DELETE_TEXT:
1228
+ edited_content = self._delete_text(current_content, selection)
1229
+ elif operation == EditOperation.REPLACE_TEXT:
1230
+ if content is None:
1231
+ raise ValueError("content is required for REPLACE_TEXT operation")
1232
+ edited_content = self._replace_text(current_content, selection, content)
1233
+ elif operation == EditOperation.BOLD:
1234
+ edited_content = self._format_text_bold(current_content, selection, format_options)
1235
+ elif operation == EditOperation.ITALIC:
1236
+ edited_content = self._format_text_italic(current_content, selection, format_options)
1237
+ elif operation == EditOperation.UNDERLINE:
1238
+ edited_content = self._format_text_underline(current_content, selection, format_options)
1239
+ elif operation == EditOperation.STRIKETHROUGH:
1240
+ edited_content = self._format_text_strikethrough(current_content, selection, format_options)
1241
+ elif operation == EditOperation.HIGHLIGHT:
1242
+ edited_content = self._format_text_highlight(current_content, selection, format_options)
1243
+ elif operation == EditOperation.INSERT_LINE:
1244
+ if content is None:
1245
+ raise ValueError("content is required for INSERT_LINE operation")
1246
+ edited_content = self._insert_line(current_content, position, content)
1247
+ elif operation == EditOperation.DELETE_LINE:
1248
+ edited_content = self._delete_line(current_content, position)
1249
+ elif operation == EditOperation.MOVE_LINE:
1250
+ edited_content = self._move_line(current_content, position, format_options)
1251
+ elif operation == EditOperation.COPY_TEXT:
1252
+ return self._copy_text(current_content, selection)
1253
+ elif operation == EditOperation.CUT_TEXT:
1254
+ edited_content, cut_content = self._cut_text(current_content, selection)
1255
+ # Store cut content in clipboard
1256
+ self._store_clipboard_content(cut_content)
1257
+ elif operation == EditOperation.PASTE_TEXT:
1258
+ clipboard_content = self._get_clipboard_content()
1259
+ edited_content = self._paste_text(current_content, position, clipboard_content)
1260
+ else:
1261
+ raise ValueError(f"Unsupported edit operation: {operation}")
1262
+
1263
+ # Write the edited content back to file
1264
+ file_format_str = self._detect_file_format(target_path)
1265
+ file_format = DocumentFormat(file_format_str) if file_format_str in [f.value for f in DocumentFormat] else DocumentFormat.TXT
1266
+ write_result = self.write_document(
1267
+ target_path=target_path,
1268
+ content=edited_content,
1269
+ format=file_format,
1270
+ mode=WriteMode.BACKUP_WRITE, # Always backup before editing
1271
+ backup_comment=f"Edit operation: {operation}",
1272
+ )
1273
+
1274
+ result = {
1275
+ "operation_id": operation_id,
1276
+ "target_path": target_path,
1277
+ "operation": operation,
1278
+ "edit_metadata": {
1279
+ "original_size": len(current_content),
1280
+ "edited_size": (len(edited_content) if isinstance(edited_content, str) else 0),
1281
+ "position": position,
1282
+ "selection": selection,
1283
+ },
1284
+ "write_result": write_result,
1285
+ "processing_metadata": {
1286
+ "start_time": start_time.isoformat(),
1287
+ "end_time": datetime.now().isoformat(),
1288
+ "duration": (datetime.now() - start_time).total_seconds(),
1289
+ },
1290
+ }
1291
+
1292
+ self.logger.info(f"Edit operation {operation_id} completed successfully")
1293
+ return result
1294
+
1295
+ except Exception as e:
1296
+ raise DocumentWriterError(f"Edit operation failed: {str(e)}")
1297
+
1298
+ def format_text(
1299
+ self,
1300
+ target_path: str,
1301
+ text_to_format: str,
1302
+ format_type: EditOperation,
1303
+ format_options: Optional[Dict[str, Any]] = None,
1304
+ ) -> Dict[str, Any]:
1305
+ """
1306
+ Apply formatting to specific text in a document
1307
+
1308
+ Args:
1309
+ target_path: Target file path
1310
+ text_to_format: Text to apply formatting to
1311
+ format_type: Type of formatting (bold, italic, etc.)
1312
+ format_options: Additional format options
1313
+
1314
+ Returns:
1315
+ Dict containing formatting results
1316
+ """
1317
+ try:
1318
+ current_content = self._read_document_content(target_path)
1319
+
1320
+ # Find all occurrences of the text
1321
+ formatted_content = self._apply_text_formatting(current_content, text_to_format, format_type, format_options)
1322
+
1323
+ # Write back to file
1324
+ file_format_str = self._detect_file_format(target_path)
1325
+ file_format = DocumentFormat(file_format_str) if file_format_str in [f.value for f in DocumentFormat] else DocumentFormat.TXT
1326
+ write_result = self.write_document(
1327
+ target_path=target_path,
1328
+ content=formatted_content,
1329
+ format=file_format,
1330
+ mode=WriteMode.BACKUP_WRITE,
1331
+ )
1332
+
1333
+ return {
1334
+ "target_path": target_path,
1335
+ "text_formatted": text_to_format,
1336
+ "format_type": format_type,
1337
+ "write_result": write_result,
1338
+ }
1339
+
1340
+ except Exception as e:
1341
+ raise DocumentWriterError(f"Text formatting failed: {str(e)}")
1342
+
1343
+ def find_replace(
1344
+ self,
1345
+ target_path: str,
1346
+ find_text: str,
1347
+ replace_text: str,
1348
+ replace_all: bool = False,
1349
+ case_sensitive: bool = True,
1350
+ regex_mode: bool = False,
1351
+ ) -> Dict[str, Any]:
1352
+ """
1353
+ Find and replace text in a document
1354
+
1355
+ Args:
1356
+ target_path: Target file path
1357
+ find_text: Text to find
1358
+ replace_text: Text to replace with
1359
+ replace_all: Replace all occurrences
1360
+ case_sensitive: Case sensitive search
1361
+ regex_mode: Use regex for find/replace
1362
+
1363
+ Returns:
1364
+ Dict containing find/replace results
1365
+ """
1366
+ try:
1367
+ current_content = self._read_document_content(target_path)
1368
+
1369
+ # Perform find and replace
1370
+ new_content, replacements = self._perform_find_replace(
1371
+ current_content,
1372
+ find_text,
1373
+ replace_text,
1374
+ replace_all,
1375
+ case_sensitive,
1376
+ regex_mode,
1377
+ )
1378
+
1379
+ if replacements > 0:
1380
+ # Write back to file
1381
+ file_format_str = self._detect_file_format(target_path)
1382
+ file_format = DocumentFormat(file_format_str) if file_format_str in [f.value for f in DocumentFormat] else DocumentFormat.TXT
1383
+ write_result = self.write_document(
1384
+ target_path=target_path,
1385
+ content=new_content,
1386
+ format=file_format,
1387
+ mode=WriteMode.BACKUP_WRITE,
1388
+ backup_comment=f"Find/Replace: '{find_text}' -> '{replace_text}'",
1389
+ )
1390
+
1391
+ return {
1392
+ "target_path": target_path,
1393
+ "find_text": find_text,
1394
+ "replace_text": replace_text,
1395
+ "replacements_made": replacements,
1396
+ "write_result": write_result,
1397
+ }
1398
+ else:
1399
+ return {
1400
+ "target_path": target_path,
1401
+ "find_text": find_text,
1402
+ "replace_text": replace_text,
1403
+ "replacements_made": 0,
1404
+ "message": "No matches found",
1405
+ }
1406
+
1407
+ except Exception as e:
1408
+ raise DocumentWriterError(f"Find/replace operation failed: {str(e)}")
1409
+
1410
+ # Helper methods for editing operations
1411
+ def _read_document_content(self, file_path: str) -> str:
1412
+ """Read document content for editing"""
1413
+ try:
1414
+ with open(file_path, "r", encoding="utf-8") as f:
1415
+ return f.read()
1416
+ except UnicodeDecodeError:
1417
+ # Try with different encodings
1418
+ for encoding in ["gbk", "latin1", "cp1252"]:
1419
+ try:
1420
+ with open(file_path, "r", encoding=encoding) as f:
1421
+ return f.read()
1422
+ except Exception:
1423
+ continue
1424
+ raise DocumentWriterError(f"Cannot decode file: {file_path}")
1425
+ except Exception as e:
1426
+ raise DocumentWriterError(f"Cannot read file {file_path}: {str(e)}")
1427
+
1428
+ def _detect_file_format(self, file_path: str) -> str:
1429
+ """Detect file format from extension"""
1430
+ ext = os.path.splitext(file_path)[1].lower()
1431
+ format_map = {
1432
+ ".txt": "txt",
1433
+ ".json": "json",
1434
+ ".csv": "csv",
1435
+ ".xml": "xml",
1436
+ ".html": "html",
1437
+ ".htm": "html",
1438
+ ".md": "markdown",
1439
+ ".markdown": "markdown",
1440
+ ".yaml": "yaml",
1441
+ ".yml": "yaml",
1442
+ }
1443
+ return format_map.get(ext, "txt")
1444
+
1445
+ def _insert_text(self, content: str, text: str, position: Optional[Dict[str, Any]]) -> str:
1446
+ """Insert text at specified position"""
1447
+ if not position:
1448
+ return content + text
1449
+
1450
+ if "offset" in position:
1451
+ offset = position["offset"]
1452
+ return content[:offset] + text + content[offset:]
1453
+ elif "line" in position:
1454
+ lines = content.split("\n")
1455
+ line_num = position.get("line", 0)
1456
+ column = position.get("column", 0)
1457
+
1458
+ if line_num < len(lines):
1459
+ line = lines[line_num]
1460
+ lines[line_num] = line[:column] + text + line[column:]
1461
+ else:
1462
+ lines.append(text)
1463
+ return "\n".join(lines)
1464
+ else:
1465
+ return content + text
1466
+
1467
+ def _delete_text(self, content: str, selection: Optional[Dict[str, Any]]) -> str:
1468
+ """Delete text in specified selection"""
1469
+ if not selection:
1470
+ return content
1471
+
1472
+ if "start_offset" in selection and "end_offset" in selection:
1473
+ start = selection["start_offset"]
1474
+ end = selection["end_offset"]
1475
+ return content[:start] + content[end:]
1476
+ elif "start_line" in selection and "end_line" in selection:
1477
+ lines = content.split("\n")
1478
+ start_line = selection["start_line"]
1479
+ end_line = selection["end_line"]
1480
+ start_col = selection.get("start_column", 0)
1481
+ end_col = selection.get(
1482
+ "end_column",
1483
+ len(lines[end_line]) if end_line < len(lines) else 0,
1484
+ )
1485
+
1486
+ if start_line == end_line:
1487
+ # Same line deletion
1488
+ line = lines[start_line]
1489
+ lines[start_line] = line[:start_col] + line[end_col:]
1490
+ else:
1491
+ # Multi-line deletion
1492
+ lines[start_line] = lines[start_line][:start_col]
1493
+ if end_line < len(lines):
1494
+ lines[start_line] += lines[end_line][end_col:]
1495
+ del lines[start_line + 1 : end_line + 1]
1496
+
1497
+ return "\n".join(lines)
1498
+
1499
+ return content
1500
+
1501
+ def _replace_text(
1502
+ self,
1503
+ content: str,
1504
+ selection: Optional[Dict[str, Any]],
1505
+ replacement: str,
1506
+ ) -> str:
1507
+ """Replace text in specified selection"""
1508
+ if not selection:
1509
+ return content
1510
+
1511
+ # First delete the selected text, then insert replacement
1512
+ content_after_delete = self._delete_text(content, selection)
1513
+
1514
+ # Calculate new insertion position after deletion
1515
+ if "start_offset" in selection:
1516
+ insert_pos = {"offset": selection["start_offset"]}
1517
+ elif "start_line" in selection:
1518
+ insert_pos = {
1519
+ "line": selection["start_line"],
1520
+ "column": selection.get("start_column", 0),
1521
+ }
1522
+ else:
1523
+ insert_pos = None
1524
+
1525
+ return self._insert_text(content_after_delete, replacement, insert_pos)
1526
+
1527
+ def _format_text_bold(
1528
+ self,
1529
+ content: str,
1530
+ selection: Optional[Dict[str, Any]],
1531
+ options: Optional[Dict[str, Any]],
1532
+ ) -> str:
1533
+ """Apply bold formatting to selected text"""
1534
+ if not selection:
1535
+ return content
1536
+
1537
+ format_type = options.get("format_type", "markdown") if options else "markdown"
1538
+
1539
+ if format_type == "markdown":
1540
+ return self._apply_markdown_formatting(content, selection, "**", "**")
1541
+ elif format_type == "html":
1542
+ return self._apply_html_formatting(content, selection, "<strong>", "</strong>")
1543
+ else:
1544
+ return content
1545
+
1546
+ def _format_text_italic(
1547
+ self,
1548
+ content: str,
1549
+ selection: Optional[Dict[str, Any]],
1550
+ options: Optional[Dict[str, Any]],
1551
+ ) -> str:
1552
+ """Apply italic formatting to selected text"""
1553
+ if not selection:
1554
+ return content
1555
+
1556
+ format_type = options.get("format_type", "markdown") if options else "markdown"
1557
+
1558
+ if format_type == "markdown":
1559
+ return self._apply_markdown_formatting(content, selection, "*", "*")
1560
+ elif format_type == "html":
1561
+ return self._apply_html_formatting(content, selection, "<em>", "</em>")
1562
+ else:
1563
+ return content
1564
+
1565
+ def _format_text_underline(
1566
+ self,
1567
+ content: str,
1568
+ selection: Optional[Dict[str, Any]],
1569
+ options: Optional[Dict[str, Any]],
1570
+ ) -> str:
1571
+ """Apply underline formatting to selected text"""
1572
+ if not selection:
1573
+ return content
1574
+
1575
+ format_type = options.get("format_type", "html") if options else "html"
1576
+
1577
+ if format_type == "html":
1578
+ return self._apply_html_formatting(content, selection, "<u>", "</u>")
1579
+ else:
1580
+ return content
1581
+
1582
+ def _format_text_strikethrough(
1583
+ self,
1584
+ content: str,
1585
+ selection: Optional[Dict[str, Any]],
1586
+ options: Optional[Dict[str, Any]],
1587
+ ) -> str:
1588
+ """Apply strikethrough formatting to selected text"""
1589
+ if not selection:
1590
+ return content
1591
+
1592
+ format_type = options.get("format_type", "markdown") if options else "markdown"
1593
+
1594
+ if format_type == "markdown":
1595
+ return self._apply_markdown_formatting(content, selection, "~~", "~~")
1596
+ elif format_type == "html":
1597
+ return self._apply_html_formatting(content, selection, "<del>", "</del>")
1598
+ else:
1599
+ return content
1600
+
1601
+ def _format_text_highlight(
1602
+ self,
1603
+ content: str,
1604
+ selection: Optional[Dict[str, Any]],
1605
+ options: Optional[Dict[str, Any]],
1606
+ ) -> str:
1607
+ """Apply highlight formatting to selected text"""
1608
+ if not selection:
1609
+ return content
1610
+
1611
+ format_type = options.get("format_type", "html") if options else "html"
1612
+ color = options.get("color", "yellow") if options else "yellow"
1613
+
1614
+ if format_type == "html":
1615
+ return self._apply_html_formatting(
1616
+ content,
1617
+ selection,
1618
+ f'<mark style="background-color: {color}">',
1619
+ "</mark>",
1620
+ )
1621
+ elif format_type == "markdown":
1622
+ return self._apply_markdown_formatting(content, selection, "==", "==")
1623
+ else:
1624
+ return content
1625
+
1626
+ def _apply_markdown_formatting(
1627
+ self,
1628
+ content: str,
1629
+ selection: Dict[str, Any],
1630
+ start_marker: str,
1631
+ end_marker: str,
1632
+ ) -> str:
1633
+ """Apply markdown formatting to selected text"""
1634
+ selected_text = self._extract_selected_text(content, selection)
1635
+ formatted_text = start_marker + selected_text + end_marker
1636
+ return self._replace_text(content, selection, formatted_text)
1637
+
1638
+ def _apply_html_formatting(
1639
+ self,
1640
+ content: str,
1641
+ selection: Dict[str, Any],
1642
+ start_tag: str,
1643
+ end_tag: str,
1644
+ ) -> str:
1645
+ """Apply HTML formatting to selected text"""
1646
+ selected_text = self._extract_selected_text(content, selection)
1647
+ formatted_text = start_tag + selected_text + end_tag
1648
+ return self._replace_text(content, selection, formatted_text)
1649
+
1650
+ def _extract_selected_text(self, content: str, selection: Dict[str, Any]) -> str:
1651
+ """Extract text from selection"""
1652
+ if "start_offset" in selection and "end_offset" in selection:
1653
+ return content[selection["start_offset"] : selection["end_offset"]]
1654
+ elif "start_line" in selection and "end_line" in selection:
1655
+ lines = content.split("\n")
1656
+ start_line = selection["start_line"]
1657
+ end_line = selection["end_line"]
1658
+ start_col = selection.get("start_column", 0)
1659
+ end_col = selection.get(
1660
+ "end_column",
1661
+ len(lines[end_line]) if end_line < len(lines) else 0,
1662
+ )
1663
+
1664
+ if start_line == end_line:
1665
+ return lines[start_line][start_col:end_col]
1666
+ else:
1667
+ result = [lines[start_line][start_col:]]
1668
+ result.extend(lines[start_line + 1 : end_line])
1669
+ if end_line < len(lines):
1670
+ result.append(lines[end_line][:end_col])
1671
+ return "\n".join(result)
1672
+ return ""
1673
+
1674
+ def _insert_line(
1675
+ self,
1676
+ content: str,
1677
+ position: Optional[Dict[str, Any]],
1678
+ line_content: str,
1679
+ ) -> str:
1680
+ """Insert a new line at specified position"""
1681
+ lines = content.split("\n")
1682
+ line_num = position.get("line", len(lines)) if position else len(lines)
1683
+
1684
+ lines.insert(line_num, line_content)
1685
+ return "\n".join(lines)
1686
+
1687
+ def _delete_line(self, content: str, position: Optional[Dict[str, Any]]) -> str:
1688
+ """Delete line at specified position"""
1689
+ lines = content.split("\n")
1690
+ line_num = position.get("line", 0) if position else 0
1691
+
1692
+ if 0 <= line_num < len(lines):
1693
+ del lines[line_num]
1694
+
1695
+ return "\n".join(lines)
1696
+
1697
+ def _move_line(
1698
+ self,
1699
+ content: str,
1700
+ position: Optional[Dict[str, Any]],
1701
+ options: Optional[Dict[str, Any]],
1702
+ ) -> str:
1703
+ """Move line to different position"""
1704
+ lines = content.split("\n")
1705
+ from_line = position.get("line", 0) if position else 0
1706
+ to_line = options.get("to_line", 0) if options else 0
1707
+
1708
+ if 0 <= from_line < len(lines) and 0 <= to_line < len(lines):
1709
+ line_content = lines.pop(from_line)
1710
+ lines.insert(to_line, line_content)
1711
+
1712
+ return "\n".join(lines)
1713
+
1714
+ def _copy_text(self, content: str, selection: Optional[Dict[str, Any]]) -> Dict[str, Any]:
1715
+ """Copy selected text to clipboard"""
1716
+ selected_text = self._extract_selected_text(content, selection) if selection else content
1717
+ self._store_clipboard_content(selected_text)
1718
+
1719
+ return {
1720
+ "operation": "copy",
1721
+ "copied_text": selected_text,
1722
+ "copied_length": len(selected_text),
1723
+ }
1724
+
1725
+ def _cut_text(self, content: str, selection: Optional[Dict[str, Any]]) -> Tuple[str, str]:
1726
+ """Cut selected text (copy and delete)"""
1727
+ selected_text = self._extract_selected_text(content, selection) if selection else content
1728
+ new_content = self._delete_text(content, selection) if selection else ""
1729
+
1730
+ return new_content, selected_text
1731
+
1732
+ def _paste_text(
1733
+ self,
1734
+ content: str,
1735
+ position: Optional[Dict[str, Any]],
1736
+ clipboard_content: str,
1737
+ ) -> str:
1738
+ """Paste text from clipboard"""
1739
+ return self._insert_text(content, clipboard_content, position)
1740
+
1741
+ def _store_clipboard_content(self, content: str):
1742
+ """Store content in clipboard (simplified implementation)"""
1743
+ clipboard_file = os.path.join(self.config.temp_dir, "clipboard.txt")
1744
+ try:
1745
+ with open(clipboard_file, "w", encoding="utf-8") as f:
1746
+ f.write(content)
1747
+ except Exception as e:
1748
+ self.logger.warning(f"Failed to store clipboard content: {e}")
1749
+
1750
+ def _get_clipboard_content(self) -> str:
1751
+ """Get content from clipboard"""
1752
+ clipboard_file = os.path.join(self.config.temp_dir, "clipboard.txt")
1753
+ try:
1754
+ with open(clipboard_file, "r", encoding="utf-8") as f:
1755
+ return f.read()
1756
+ except Exception:
1757
+ return ""
1758
+
1759
+ def _apply_text_formatting(
1760
+ self,
1761
+ content: str,
1762
+ text_to_format: str,
1763
+ format_type: EditOperation,
1764
+ options: Optional[Dict[str, Any]],
1765
+ ) -> str:
1766
+ """Apply formatting to all occurrences of specific text"""
1767
+ if format_type == EditOperation.BOLD:
1768
+ replacement = f"**{text_to_format}**"
1769
+ elif format_type == EditOperation.ITALIC:
1770
+ replacement = f"*{text_to_format}*"
1771
+ elif format_type == EditOperation.UNDERLINE:
1772
+ replacement = f"<u>{text_to_format}</u>"
1773
+ elif format_type == EditOperation.STRIKETHROUGH:
1774
+ replacement = f"~~{text_to_format}~~"
1775
+ elif format_type == EditOperation.HIGHLIGHT:
1776
+ color = options.get("color", "yellow") if options else "yellow"
1777
+ replacement = f'<mark style="background-color: {color}">{text_to_format}</mark>'
1778
+ else:
1779
+ replacement = text_to_format
1780
+
1781
+ return content.replace(text_to_format, replacement)
1782
+
1783
+ def _perform_find_replace(
1784
+ self,
1785
+ content: str,
1786
+ find_text: str,
1787
+ replace_text: str,
1788
+ replace_all: bool,
1789
+ case_sensitive: bool,
1790
+ regex_mode: bool,
1791
+ ) -> Tuple[str, int]:
1792
+ """Perform find and replace operation"""
1793
+ import re
1794
+
1795
+ replacements = 0
1796
+
1797
+ if regex_mode:
1798
+ flags = 0 if case_sensitive else re.IGNORECASE
1799
+ if replace_all:
1800
+ new_content, replacements = re.subn(find_text, replace_text, content, flags=flags)
1801
+ else:
1802
+ new_content = re.sub(find_text, replace_text, content, count=1, flags=flags)
1803
+ replacements = 1 if new_content != content else 0
1804
+ else:
1805
+ if case_sensitive:
1806
+ if replace_all:
1807
+ replacements = content.count(find_text)
1808
+ new_content = content.replace(find_text, replace_text)
1809
+ else:
1810
+ new_content = content.replace(find_text, replace_text, 1)
1811
+ replacements = 1 if new_content != content else 0
1812
+ else:
1813
+ # Case insensitive replacement
1814
+ import re
1815
+
1816
+ pattern = re.escape(find_text)
1817
+ if replace_all:
1818
+ new_content, replacements = re.subn(pattern, replace_text, content, flags=re.IGNORECASE)
1819
+ else:
1820
+ new_content = re.sub(
1821
+ pattern,
1822
+ replace_text,
1823
+ content,
1824
+ count=1,
1825
+ flags=re.IGNORECASE,
1826
+ )
1827
+ replacements = 1 if new_content != content else 0
1828
+
1829
+ return new_content, replacements