aiecs 1.0.8__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (81) hide show
  1. aiecs/__init__.py +1 -1
  2. aiecs/aiecs_client.py +159 -1
  3. aiecs/config/config.py +6 -0
  4. aiecs/domain/__init__.py +95 -0
  5. aiecs/domain/community/__init__.py +159 -0
  6. aiecs/domain/community/agent_adapter.py +516 -0
  7. aiecs/domain/community/analytics.py +465 -0
  8. aiecs/domain/community/collaborative_workflow.py +99 -7
  9. aiecs/domain/community/communication_hub.py +649 -0
  10. aiecs/domain/community/community_builder.py +322 -0
  11. aiecs/domain/community/community_integration.py +365 -12
  12. aiecs/domain/community/community_manager.py +481 -5
  13. aiecs/domain/community/decision_engine.py +459 -13
  14. aiecs/domain/community/exceptions.py +238 -0
  15. aiecs/domain/community/models/__init__.py +36 -0
  16. aiecs/domain/community/resource_manager.py +1 -1
  17. aiecs/domain/community/shared_context_manager.py +621 -0
  18. aiecs/domain/context/__init__.py +24 -0
  19. aiecs/domain/context/context_engine.py +37 -33
  20. aiecs/main.py +20 -2
  21. aiecs/scripts/aid/VERSION_MANAGEMENT.md +97 -0
  22. aiecs/scripts/aid/__init__.py +15 -0
  23. aiecs/scripts/aid/version_manager.py +224 -0
  24. aiecs/scripts/dependance_check/__init__.py +18 -0
  25. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +51 -8
  26. aiecs/scripts/dependance_patch/__init__.py +8 -0
  27. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +12 -0
  28. aiecs/scripts/tools_develop/README.md +340 -0
  29. aiecs/scripts/tools_develop/__init__.py +16 -0
  30. aiecs/scripts/tools_develop/check_type_annotations.py +263 -0
  31. aiecs/scripts/tools_develop/validate_tool_schemas.py +346 -0
  32. aiecs/tools/__init__.py +53 -34
  33. aiecs/tools/docs/__init__.py +106 -0
  34. aiecs/tools/docs/ai_document_orchestrator.py +556 -0
  35. aiecs/tools/docs/ai_document_writer_orchestrator.py +2222 -0
  36. aiecs/tools/docs/content_insertion_tool.py +1234 -0
  37. aiecs/tools/docs/document_creator_tool.py +1179 -0
  38. aiecs/tools/docs/document_layout_tool.py +1105 -0
  39. aiecs/tools/docs/document_parser_tool.py +924 -0
  40. aiecs/tools/docs/document_writer_tool.py +1636 -0
  41. aiecs/tools/langchain_adapter.py +102 -51
  42. aiecs/tools/schema_generator.py +265 -0
  43. aiecs/tools/statistics/__init__.py +82 -0
  44. aiecs/tools/statistics/ai_data_analysis_orchestrator.py +581 -0
  45. aiecs/tools/statistics/ai_insight_generator_tool.py +473 -0
  46. aiecs/tools/statistics/ai_report_orchestrator_tool.py +629 -0
  47. aiecs/tools/statistics/data_loader_tool.py +518 -0
  48. aiecs/tools/statistics/data_profiler_tool.py +599 -0
  49. aiecs/tools/statistics/data_transformer_tool.py +531 -0
  50. aiecs/tools/statistics/data_visualizer_tool.py +460 -0
  51. aiecs/tools/statistics/model_trainer_tool.py +470 -0
  52. aiecs/tools/statistics/statistical_analyzer_tool.py +426 -0
  53. aiecs/tools/task_tools/chart_tool.py +2 -1
  54. aiecs/tools/task_tools/image_tool.py +43 -43
  55. aiecs/tools/task_tools/office_tool.py +48 -36
  56. aiecs/tools/task_tools/pandas_tool.py +37 -33
  57. aiecs/tools/task_tools/report_tool.py +67 -56
  58. aiecs/tools/task_tools/research_tool.py +32 -31
  59. aiecs/tools/task_tools/scraper_tool.py +53 -46
  60. aiecs/tools/task_tools/search_tool.py +1123 -0
  61. aiecs/tools/task_tools/stats_tool.py +20 -15
  62. {aiecs-1.0.8.dist-info → aiecs-1.2.0.dist-info}/METADATA +5 -1
  63. aiecs-1.2.0.dist-info/RECORD +135 -0
  64. aiecs-1.2.0.dist-info/entry_points.txt +10 -0
  65. aiecs/tools/task_tools/search_api.py +0 -7
  66. aiecs-1.0.8.dist-info/RECORD +0 -98
  67. aiecs-1.0.8.dist-info/entry_points.txt +0 -7
  68. /aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +0 -0
  69. /aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +0 -0
  70. /aiecs/scripts/{dependency_checker.py → dependance_check/dependency_checker.py} +0 -0
  71. /aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +0 -0
  72. /aiecs/scripts/{quick_dependency_check.py → dependance_check/quick_dependency_check.py} +0 -0
  73. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  74. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  75. /aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +0 -0
  76. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  77. /aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +0 -0
  78. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  79. {aiecs-1.0.8.dist-info → aiecs-1.2.0.dist-info}/WHEEL +0 -0
  80. {aiecs-1.0.8.dist-info → aiecs-1.2.0.dist-info}/licenses/LICENSE +0 -0
  81. {aiecs-1.0.8.dist-info → aiecs-1.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1636 @@
1
+ import os
2
+ import json
3
+ import time
4
+ import uuid
5
+ import hashlib
6
+ import logging
7
+ import asyncio
8
+ import shutil
9
+ from typing import Dict, Any, List, Optional, Union, Tuple
10
+ from enum import Enum
11
+ from datetime import datetime
12
+ from pathlib import Path
13
+ import tempfile
14
+
15
+ from pydantic import BaseModel, Field, ConfigDict
16
+ from pydantic import ValidationError as PydanticValidationError
17
+
18
+ from aiecs.tools.base_tool import BaseTool
19
+ from aiecs.tools import register_tool
20
+
21
+
22
+ class DocumentFormat(str, Enum):
23
+ """Supported document formats for writing"""
24
+ TXT = "txt"
25
+ PLAIN_TEXT = "txt" # Alias for TXT
26
+ JSON = "json"
27
+ CSV = "csv"
28
+ XML = "xml"
29
+ MARKDOWN = "md"
30
+ HTML = "html"
31
+ YAML = "yaml"
32
+ PDF = "pdf"
33
+ DOCX = "docx"
34
+ XLSX = "xlsx"
35
+ BINARY = "binary"
36
+
37
+
38
+ class WriteMode(str, Enum):
39
+ """Document writing modes"""
40
+ CREATE = "create" # 创建新文件,如果存在则失败
41
+ OVERWRITE = "overwrite" # 覆盖现有文件
42
+ APPEND = "append" # 追加到现有文件
43
+ UPDATE = "update" # 更新现有文件(智能合并)
44
+ BACKUP_WRITE = "backup_write" # 备份后写入
45
+ VERSION_WRITE = "version_write" # 版本化写入
46
+ INSERT = "insert" # 在指定位置插入内容
47
+ REPLACE = "replace" # 替换指定内容
48
+ DELETE = "delete" # 删除指定内容
49
+
50
+
51
+ class EditOperation(str, Enum):
52
+ """Advanced edit operations"""
53
+ BOLD = "bold" # 加粗文本
54
+ ITALIC = "italic" # 斜体文本
55
+ UNDERLINE = "underline" # 下划线文本
56
+ STRIKETHROUGH = "strikethrough" # 删除线文本
57
+ HIGHLIGHT = "highlight" # 高亮文本
58
+ INSERT_TEXT = "insert_text" # 插入文本
59
+ DELETE_TEXT = "delete_text" # 删除文本
60
+ REPLACE_TEXT = "replace_text" # 替换文本
61
+ COPY_TEXT = "copy_text" # 复制文本
62
+ CUT_TEXT = "cut_text" # 剪切文本
63
+ PASTE_TEXT = "paste_text" # 粘贴文本
64
+ FIND_REPLACE = "find_replace" # 查找替换
65
+ INSERT_LINE = "insert_line" # 插入行
66
+ DELETE_LINE = "delete_line" # 删除行
67
+ MOVE_LINE = "move_line" # 移动行
68
+
69
+
70
+ class EncodingType(str, Enum):
71
+ """Text encoding types"""
72
+ UTF8 = "utf-8"
73
+ UTF16 = "utf-16"
74
+ ASCII = "ascii"
75
+ GBK = "gbk"
76
+ AUTO = "auto"
77
+
78
+
79
+ class ValidationLevel(str, Enum):
80
+ """Content validation levels"""
81
+ NONE = "none" # 无验证
82
+ BASIC = "basic" # 基础验证(格式、大小)
83
+ STRICT = "strict" # 严格验证(内容、结构)
84
+ ENTERPRISE = "enterprise" # 企业级验证(安全、合规)
85
+
86
+
87
+
88
+
89
+ class DocumentWriterError(Exception):
90
+ """Base exception for document writer errors"""
91
+ pass
92
+
93
+
94
+ class WriteError(DocumentWriterError):
95
+ """Raised when write operations fail"""
96
+ pass
97
+
98
+
99
+ class ValidationError(DocumentWriterError):
100
+ """Raised when validation fails"""
101
+ pass
102
+
103
+
104
+ class SecurityError(DocumentWriterError):
105
+ """Raised when security validation fails"""
106
+ pass
107
+
108
+
109
+ class WritePermissionError(DocumentWriterError):
110
+ """Raised when write permission is denied"""
111
+ pass
112
+
113
+
114
+ class ContentValidationError(DocumentWriterError):
115
+ """Raised when content validation fails"""
116
+ pass
117
+
118
+
119
+ class StorageError(DocumentWriterError):
120
+ """Raised when storage operations fail"""
121
+ pass
122
+
123
+
124
+ @register_tool("document_writer")
125
+ class DocumentWriterTool(BaseTool):
126
+ """
127
+ Modern high-performance document writing component that can:
128
+ 1. Handle multiple document formats and encodings
129
+ 2. Provide production-grade write operations with validation
130
+ 3. Support various write modes (create, overwrite, append, update)
131
+ 4. Implement backup and versioning strategies
132
+ 5. Ensure atomic operations and data integrity
133
+ 6. Support both local and cloud storage
134
+
135
+ Production Features:
136
+ - Atomic writes (no partial writes)
137
+ - Content validation and security scanning
138
+ - Automatic backup and versioning
139
+ - Write permission and quota checks
140
+ - Transaction-like operations
141
+ - Audit logging
142
+ """
143
+
144
+ # Configuration schema
145
+ class Config(BaseModel):
146
+ """Configuration for the document writer tool"""
147
+ model_config = ConfigDict(env_prefix="DOC_WRITER_")
148
+
149
+ temp_dir: str = Field(
150
+ default=os.path.join(tempfile.gettempdir(), 'document_writer'),
151
+ description="Temporary directory for document processing"
152
+ )
153
+ backup_dir: str = Field(
154
+ default=os.path.join(tempfile.gettempdir(), 'document_backups'),
155
+ description="Directory for document backups"
156
+ )
157
+ output_dir: Optional[str] = Field(
158
+ default=None,
159
+ description="Default output directory for documents"
160
+ )
161
+ max_file_size: int = Field(
162
+ default=100 * 1024 * 1024,
163
+ description="Maximum file size in bytes"
164
+ )
165
+ max_backup_versions: int = Field(
166
+ default=10,
167
+ description="Maximum number of backup versions to keep"
168
+ )
169
+ default_encoding: str = Field(
170
+ default="utf-8",
171
+ description="Default text encoding for documents"
172
+ )
173
+ enable_backup: bool = Field(
174
+ default=True,
175
+ description="Whether to enable automatic backup functionality"
176
+ )
177
+ enable_versioning: bool = Field(
178
+ default=True,
179
+ description="Whether to enable document versioning"
180
+ )
181
+ enable_content_validation: bool = Field(
182
+ default=True,
183
+ description="Whether to enable content validation"
184
+ )
185
+ enable_security_scan: bool = Field(
186
+ default=True,
187
+ description="Whether to enable security scanning"
188
+ )
189
+ atomic_write: bool = Field(
190
+ default=True,
191
+ description="Whether to use atomic write operations"
192
+ )
193
+ validation_level: str = Field(
194
+ default="basic",
195
+ description="Content validation level"
196
+ )
197
+ timeout_seconds: int = Field(
198
+ default=60,
199
+ description="Operation timeout in seconds"
200
+ )
201
+ auto_backup: bool = Field(
202
+ default=True,
203
+ description="Whether to automatically backup before write operations"
204
+ )
205
+ atomic_writes: bool = Field(
206
+ default=True,
207
+ description="Whether to use atomic write operations"
208
+ )
209
+ default_format: str = Field(
210
+ default="md",
211
+ description="Default document format"
212
+ )
213
+ version_control: bool = Field(
214
+ default=True,
215
+ description="Whether to enable version control"
216
+ )
217
+ security_scan: bool = Field(
218
+ default=True,
219
+ description="Whether to enable security scanning"
220
+ )
221
+ enable_cloud_storage: bool = Field(
222
+ default=True,
223
+ description="Whether to enable cloud storage integration"
224
+ )
225
+ gcs_bucket_name: str = Field(
226
+ default="aiecs-documents",
227
+ description="Google Cloud Storage bucket name"
228
+ )
229
+ gcs_project_id: Optional[str] = Field(
230
+ default=None,
231
+ description="Google Cloud Storage project ID"
232
+ )
233
+
234
+ def __init__(self, config: Optional[Dict] = None):
235
+ """Initialize DocumentWriterTool with settings"""
236
+ super().__init__(config)
237
+
238
+ # Parse configuration
239
+ self.config = self.Config(**(config or {}))
240
+
241
+ self.logger = logging.getLogger(__name__)
242
+
243
+ # Create necessary directories
244
+ os.makedirs(self.config.temp_dir, exist_ok=True)
245
+ os.makedirs(self.config.backup_dir, exist_ok=True)
246
+
247
+ # Initialize cloud storage
248
+ self._init_cloud_storage()
249
+
250
+ # Initialize content validators
251
+ self._init_validators()
252
+
253
+ def _init_cloud_storage(self):
254
+ """Initialize cloud storage for document writing"""
255
+ self.file_storage = None
256
+
257
+ if self.config.enable_cloud_storage:
258
+ try:
259
+ from aiecs.infrastructure.persistence.file_storage import FileStorage
260
+
261
+ storage_config = {
262
+ 'gcs_bucket_name': self.config.gcs_bucket_name,
263
+ 'gcs_project_id': self.config.gcs_project_id,
264
+ 'enable_local_fallback': True,
265
+ 'local_storage_path': self.config.temp_dir
266
+ }
267
+
268
+ self.file_storage = FileStorage(storage_config)
269
+ asyncio.create_task(self._init_storage_async())
270
+
271
+ except ImportError:
272
+ self.logger.warning("FileStorage not available, cloud storage disabled")
273
+ except Exception as e:
274
+ self.logger.warning(f"Failed to initialize cloud storage: {e}")
275
+
276
+ async def _init_storage_async(self):
277
+ """Async initialization of file storage"""
278
+ try:
279
+ if self.file_storage:
280
+ await self.file_storage.initialize()
281
+ self.logger.info("Cloud storage initialized successfully")
282
+ except Exception as e:
283
+ self.logger.warning(f"Cloud storage initialization failed: {e}")
284
+ self.file_storage = None
285
+
286
+ def _init_validators(self):
287
+ """Initialize content validators"""
288
+ self.validators = {
289
+ DocumentFormat.JSON: self._validate_json_content,
290
+ DocumentFormat.XML: self._validate_xml_content,
291
+ DocumentFormat.CSV: self._validate_csv_content,
292
+ DocumentFormat.YAML: self._validate_yaml_content,
293
+ DocumentFormat.HTML: self._validate_html_content
294
+ }
295
+
296
+ # Schema definitions
297
+ class WriteDocumentSchema(BaseModel):
298
+ """Schema for write_document operation"""
299
+ target_path: str = Field(description="Target file path (local or cloud)")
300
+ content: Union[str, bytes, Dict, List] = Field(description="Content to write")
301
+ format: DocumentFormat = Field(description="Document format")
302
+ mode: WriteMode = Field(default=WriteMode.CREATE, description="Write mode")
303
+ encoding: EncodingType = Field(default=EncodingType.UTF8, description="Text encoding")
304
+ validation_level: ValidationLevel = Field(default=ValidationLevel.BASIC, description="Validation level")
305
+ metadata: Optional[Dict[str, Any]] = Field(default=None, description="Additional metadata")
306
+ backup_comment: Optional[str] = Field(default=None, description="Backup comment")
307
+
308
+ class BatchWriteSchema(BaseModel):
309
+ """Schema for batch_write_documents operation"""
310
+ write_operations: List[Dict[str, Any]] = Field(description="List of write operations")
311
+ transaction_mode: bool = Field(default=True, description="Use transaction mode")
312
+ rollback_on_error: bool = Field(default=True, description="Rollback on any error")
313
+
314
+ class EditDocumentSchema(BaseModel):
315
+ """Schema for edit_document operation"""
316
+ target_path: str = Field(description="Target file path")
317
+ operation: EditOperation = Field(description="Edit operation to perform")
318
+ content: Optional[str] = Field(default=None, description="Content for the operation")
319
+ position: Optional[Dict[str, Any]] = Field(default=None, description="Position info (line, column, offset)")
320
+ selection: Optional[Dict[str, Any]] = Field(default=None, description="Text selection range")
321
+ format_options: Optional[Dict[str, Any]] = Field(default=None, description="Formatting options")
322
+
323
+ class FormatTextSchema(BaseModel):
324
+ """Schema for format_text operation"""
325
+ target_path: str = Field(description="Target file path")
326
+ text_to_format: str = Field(description="Text to apply formatting to")
327
+ format_type: EditOperation = Field(description="Type of formatting")
328
+ format_options: Optional[Dict[str, Any]] = Field(default=None, description="Additional format options")
329
+
330
+ class FindReplaceSchema(BaseModel):
331
+ """Schema for find_replace operation"""
332
+ target_path: str = Field(description="Target file path")
333
+ find_text: str = Field(description="Text to find")
334
+ replace_text: str = Field(description="Text to replace with")
335
+ replace_all: bool = Field(default=False, description="Replace all occurrences")
336
+ case_sensitive: bool = Field(default=True, description="Case sensitive search")
337
+ regex_mode: bool = Field(default=False, description="Use regex for find/replace")
338
+
339
+ def write_document(self,
340
+ target_path: str,
341
+ content: Union[str, bytes, Dict, List],
342
+ format: DocumentFormat,
343
+ mode: WriteMode = WriteMode.CREATE,
344
+ encoding: EncodingType = EncodingType.UTF8,
345
+ validation_level: ValidationLevel = ValidationLevel.BASIC,
346
+ metadata: Optional[Dict[str, Any]] = None,
347
+ backup_comment: Optional[str] = None) -> Dict[str, Any]:
348
+ """
349
+ Write document with production-grade features
350
+
351
+ Args:
352
+ target_path: Target file path (local or cloud)
353
+ content: Content to write
354
+ format: Document format
355
+ mode: Write mode (create, overwrite, append, update, etc.)
356
+ encoding: Text encoding
357
+ validation_level: Content validation level
358
+ metadata: Additional metadata
359
+ backup_comment: Comment for backup
360
+
361
+ Returns:
362
+ Dict containing write results and metadata
363
+ """
364
+ try:
365
+ start_time = datetime.now()
366
+ operation_id = str(uuid.uuid4())
367
+
368
+ self.logger.info(f"Starting write operation {operation_id}: {target_path}")
369
+
370
+ # Step 1: Validate inputs
371
+ self._validate_write_inputs(target_path, content, format, mode)
372
+
373
+ # Step 2: Prepare content
374
+ processed_content, content_metadata = self._prepare_content(
375
+ content, format, encoding, validation_level
376
+ )
377
+
378
+ # Step 3: Handle write mode logic
379
+ write_plan = self._plan_write_operation(target_path, mode, metadata)
380
+
381
+ # Step 4: Create backup if needed
382
+ backup_info = None
383
+ if self.config.enable_backup and mode in [WriteMode.OVERWRITE, WriteMode.UPDATE]:
384
+ backup_info = self._create_backup(target_path, backup_comment)
385
+
386
+ # Step 5: Execute atomic write
387
+ write_result = self._execute_atomic_write(
388
+ target_path, processed_content, format, encoding, write_plan
389
+ )
390
+
391
+ # Step 6: Update metadata and versioning
392
+ version_info = self._handle_versioning(target_path, content_metadata, metadata)
393
+
394
+ # Step 7: Audit logging
395
+ audit_info = self._log_write_operation(
396
+ operation_id, target_path, mode, write_result, backup_info
397
+ )
398
+
399
+ result = {
400
+ "operation_id": operation_id,
401
+ "target_path": target_path,
402
+ "write_mode": mode,
403
+ "format": format,
404
+ "encoding": encoding,
405
+ "content_metadata": content_metadata,
406
+ "write_result": write_result,
407
+ "backup_info": backup_info,
408
+ "version_info": version_info,
409
+ "audit_info": audit_info,
410
+ "processing_metadata": {
411
+ "start_time": start_time.isoformat(),
412
+ "end_time": datetime.now().isoformat(),
413
+ "duration": (datetime.now() - start_time).total_seconds()
414
+ }
415
+ }
416
+
417
+ self.logger.info(f"Write operation {operation_id} completed successfully")
418
+ return result
419
+
420
+ except Exception as e:
421
+ self.logger.error(f"Write operation failed for {target_path}: {str(e)}")
422
+ # Rollback if needed
423
+ if 'backup_info' in locals() and backup_info:
424
+ self._rollback_from_backup(target_path, backup_info)
425
+ raise DocumentWriterError(f"Document write failed: {str(e)}")
426
+
427
+ async def write_document_async(self,
428
+ target_path: str,
429
+ content: Union[str, bytes, Dict, List],
430
+ format: DocumentFormat,
431
+ mode: WriteMode = WriteMode.CREATE,
432
+ encoding: EncodingType = EncodingType.UTF8,
433
+ validation_level: ValidationLevel = ValidationLevel.BASIC,
434
+ metadata: Optional[Dict[str, Any]] = None,
435
+ backup_comment: Optional[str] = None) -> Dict[str, Any]:
436
+ """Async version of write_document"""
437
+ return await asyncio.to_thread(
438
+ self.write_document,
439
+ target_path=target_path,
440
+ content=content,
441
+ format=format,
442
+ mode=mode,
443
+ encoding=encoding,
444
+ validation_level=validation_level,
445
+ metadata=metadata,
446
+ backup_comment=backup_comment
447
+ )
448
+
449
+ def batch_write_documents(self,
450
+ write_operations: List[Dict[str, Any]],
451
+ transaction_mode: bool = True,
452
+ rollback_on_error: bool = True) -> Dict[str, Any]:
453
+ """
454
+ Batch write multiple documents with transaction support
455
+
456
+ Args:
457
+ write_operations: List of write operation dictionaries
458
+ transaction_mode: Use transaction mode for atomicity
459
+ rollback_on_error: Rollback all operations on any error
460
+
461
+ Returns:
462
+ Dict containing batch write results
463
+ """
464
+ try:
465
+ start_time = datetime.now()
466
+ batch_id = str(uuid.uuid4())
467
+
468
+ self.logger.info(f"Starting batch write operation {batch_id}: {len(write_operations)} operations")
469
+
470
+ completed_operations = []
471
+ backup_operations = []
472
+
473
+ try:
474
+ for i, operation in enumerate(write_operations):
475
+ self.logger.info(f"Processing operation {i+1}/{len(write_operations)}")
476
+
477
+ # Execute individual write operation
478
+ result = self.write_document(**operation)
479
+ completed_operations.append({
480
+ "index": i,
481
+ "operation": operation,
482
+ "result": result,
483
+ "status": "success"
484
+ })
485
+
486
+ # Track backup info for potential rollback
487
+ if result.get("backup_info"):
488
+ backup_operations.append(result["backup_info"])
489
+
490
+ batch_result = {
491
+ "batch_id": batch_id,
492
+ "total_operations": len(write_operations),
493
+ "successful_operations": len(completed_operations),
494
+ "failed_operations": 0,
495
+ "operations": completed_operations,
496
+ "transaction_mode": transaction_mode,
497
+ "batch_metadata": {
498
+ "start_time": start_time.isoformat(),
499
+ "end_time": datetime.now().isoformat(),
500
+ "duration": (datetime.now() - start_time).total_seconds()
501
+ }
502
+ }
503
+
504
+ self.logger.info(f"Batch write operation {batch_id} completed successfully")
505
+ return batch_result
506
+
507
+ except Exception as e:
508
+ self.logger.error(f"Batch write operation {batch_id} failed: {str(e)}")
509
+
510
+ if rollback_on_error and transaction_mode:
511
+ self.logger.info(f"Rolling back batch operation {batch_id}")
512
+ self._rollback_batch_operations(completed_operations, backup_operations)
513
+
514
+ # Create failure result
515
+ batch_result = {
516
+ "batch_id": batch_id,
517
+ "total_operations": len(write_operations),
518
+ "successful_operations": len(completed_operations),
519
+ "failed_operations": len(write_operations) - len(completed_operations),
520
+ "operations": completed_operations,
521
+ "error": str(e),
522
+ "transaction_mode": transaction_mode,
523
+ "rollback_performed": rollback_on_error and transaction_mode
524
+ }
525
+
526
+ raise DocumentWriterError(f"Batch write operation failed: {str(e)}")
527
+
528
+ except Exception as e:
529
+ raise DocumentWriterError(f"Batch write operation failed: {str(e)}")
530
+
531
+ def _validate_write_inputs(self, target_path: str, content: Any, format: DocumentFormat, mode: WriteMode):
532
+ """Validate write operation inputs"""
533
+ # Path validation
534
+ if not target_path or not isinstance(target_path, str):
535
+ raise ValueError("Invalid target path")
536
+
537
+ # Content validation
538
+ if content is None:
539
+ raise ValueError("Content cannot be None")
540
+
541
+ # Size validation
542
+ content_size = self._calculate_content_size(content)
543
+ if content_size > self.config.max_file_size:
544
+ raise ValueError(f"Content size {content_size} exceeds maximum {self.config.max_file_size}")
545
+
546
+ # Permission validation
547
+ if not self._check_write_permission(target_path, mode):
548
+ raise WritePermissionError(f"No write permission for {target_path}")
549
+
550
+ def _prepare_content(self, content: Any, format: DocumentFormat,
551
+ encoding: EncodingType, validation_level: ValidationLevel) -> Tuple[Union[str, bytes], Dict]:
552
+ """Prepare and validate content for writing"""
553
+
554
+ # Content conversion based on format
555
+ if format == DocumentFormat.JSON:
556
+ if isinstance(content, (dict, list)):
557
+ processed_content = json.dumps(content, ensure_ascii=False, indent=2)
558
+ else:
559
+ processed_content = str(content)
560
+ elif format == DocumentFormat.CSV:
561
+ processed_content = self._convert_to_csv(content)
562
+ elif format == DocumentFormat.XML:
563
+ processed_content = self._convert_to_xml(content)
564
+ elif format == DocumentFormat.YAML:
565
+ processed_content = self._convert_to_yaml(content)
566
+ elif format == DocumentFormat.HTML:
567
+ processed_content = self._convert_to_html(content)
568
+ elif format == DocumentFormat.MARKDOWN:
569
+ processed_content = self._convert_to_markdown(content)
570
+ elif format == DocumentFormat.BINARY:
571
+ if isinstance(content, bytes):
572
+ processed_content = content
573
+ else:
574
+ processed_content = str(content).encode(encoding.value)
575
+ else:
576
+ processed_content = str(content)
577
+
578
+ # Content validation
579
+ if self.config.enable_content_validation:
580
+ self._validate_content(processed_content, format, validation_level)
581
+
582
+ # Calculate metadata
583
+ content_metadata = {
584
+ "original_type": type(content).__name__,
585
+ "processed_size": len(processed_content) if isinstance(processed_content, (str, bytes)) else 0,
586
+ "format": format,
587
+ "encoding": encoding,
588
+ "checksum": self._calculate_checksum(processed_content),
589
+ "validation_level": validation_level,
590
+ "timestamp": datetime.now().isoformat()
591
+ }
592
+
593
+ return processed_content, content_metadata
594
+
595
+ def _plan_write_operation(self, target_path: str, mode: WriteMode, metadata: Optional[Dict]) -> Dict:
596
+ """Plan the write operation based on mode and target"""
597
+
598
+ plan = {
599
+ "target_path": target_path,
600
+ "mode": mode,
601
+ "file_exists": self._file_exists(target_path),
602
+ "is_cloud_path": self._is_cloud_storage_path(target_path),
603
+ "requires_backup": False,
604
+ "requires_versioning": False,
605
+ "atomic_operation": self.config.atomic_write
606
+ }
607
+
608
+ if mode == WriteMode.CREATE and plan["file_exists"]:
609
+ raise DocumentWriterError(f"File already exists: {target_path}")
610
+
611
+ if mode in [WriteMode.OVERWRITE, WriteMode.UPDATE] and plan["file_exists"]:
612
+ plan["requires_backup"] = self.config.enable_backup
613
+ plan["requires_versioning"] = self.config.enable_versioning
614
+
615
+ if mode == WriteMode.APPEND and not plan["file_exists"]:
616
+ # Convert to CREATE mode
617
+ plan["mode"] = WriteMode.CREATE
618
+
619
+ return plan
620
+
621
+ def _create_backup(self, target_path: str, comment: Optional[str] = None) -> Dict:
622
+ """Create backup of existing file"""
623
+ if not self._file_exists(target_path):
624
+ return None
625
+
626
+ try:
627
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
628
+ file_stem = Path(target_path).stem
629
+ file_suffix = Path(target_path).suffix
630
+
631
+ backup_filename = f"{file_stem}_backup_{timestamp}{file_suffix}"
632
+ backup_path = os.path.join(self.config.backup_dir, backup_filename)
633
+
634
+ # Copy file to backup location
635
+ if self._is_cloud_storage_path(target_path):
636
+ backup_path = self._backup_cloud_file(target_path, backup_path)
637
+ else:
638
+ shutil.copy2(target_path, backup_path)
639
+
640
+ backup_info = {
641
+ "original_path": target_path,
642
+ "backup_path": backup_path,
643
+ "timestamp": timestamp,
644
+ "comment": comment,
645
+ "checksum": self._calculate_file_checksum(target_path)
646
+ }
647
+
648
+ self.logger.info(f"Created backup: {backup_path}")
649
+ return backup_info
650
+
651
+ except Exception as e:
652
+ self.logger.error(f"Failed to create backup for {target_path}: {e}")
653
+ raise StorageError(f"Backup creation failed: {e}")
654
+
655
+ def _execute_atomic_write(self, target_path: str, content: Union[str, bytes],
656
+ format: DocumentFormat, encoding: EncodingType, plan: Dict) -> Dict:
657
+ """Execute atomic write operation"""
658
+
659
+ if plan["is_cloud_path"]:
660
+ return self._write_to_cloud_storage(target_path, content, format, encoding, plan)
661
+ else:
662
+ return self._write_to_local_file(target_path, content, format, encoding, plan)
663
+
664
+ def _write_to_local_file(self, target_path: str, content: Union[str, bytes],
665
+ format: DocumentFormat, encoding: EncodingType, plan: Dict) -> Dict:
666
+ """Write to local file system with atomic operation"""
667
+
668
+ try:
669
+ # Create parent directories
670
+ os.makedirs(os.path.dirname(target_path), exist_ok=True)
671
+
672
+ if plan["atomic_operation"]:
673
+ # Atomic write using temporary file
674
+ temp_path = f"{target_path}.tmp.{uuid.uuid4().hex}"
675
+
676
+ try:
677
+ if plan["mode"] == WriteMode.APPEND and plan["file_exists"]:
678
+ # Read existing content first
679
+ with open(target_path, 'rb') as f:
680
+ existing_content = f.read()
681
+
682
+ if isinstance(content, str):
683
+ content = existing_content.decode(encoding.value) + content
684
+ else:
685
+ content = existing_content + content
686
+
687
+ # Write to temporary file
688
+ if isinstance(content, bytes):
689
+ with open(temp_path, 'wb') as f:
690
+ f.write(content)
691
+ else:
692
+ # Handle both EncodingType enum and string
693
+ enc_value = encoding.value if hasattr(encoding, 'value') else str(encoding)
694
+ with open(temp_path, 'w', encoding=enc_value) as f:
695
+ f.write(content)
696
+
697
+ # Atomic move
698
+ shutil.move(temp_path, target_path)
699
+
700
+ finally:
701
+ # Cleanup temp file if it still exists
702
+ if os.path.exists(temp_path):
703
+ os.unlink(temp_path)
704
+ else:
705
+ # Direct write
706
+ mode_map = {
707
+ WriteMode.CREATE: 'w',
708
+ WriteMode.OVERWRITE: 'w',
709
+ WriteMode.APPEND: 'a',
710
+ WriteMode.UPDATE: 'w'
711
+ }
712
+
713
+ file_mode = mode_map.get(plan["mode"], 'w')
714
+ if isinstance(content, bytes):
715
+ file_mode += 'b'
716
+
717
+ # Handle both EncodingType enum and string
718
+ enc_value = None if isinstance(content, bytes) else (encoding.value if hasattr(encoding, 'value') else str(encoding))
719
+ with open(target_path, file_mode, encoding=enc_value) as f:
720
+ f.write(content)
721
+
722
+ # Get file stats
723
+ stat = os.stat(target_path)
724
+
725
+ return {
726
+ "path": target_path,
727
+ "size": stat.st_size,
728
+ "checksum": self._calculate_file_checksum(target_path),
729
+ "modified_time": datetime.fromtimestamp(stat.st_mtime).isoformat(),
730
+ "atomic_write": plan["atomic_operation"]
731
+ }
732
+
733
+ except Exception as e:
734
+ raise StorageError(f"Local file write failed: {e}")
735
+
736
+ async def _write_to_cloud_storage(self, target_path: str, content: Union[str, bytes],
737
+ format: DocumentFormat, encoding: EncodingType, plan: Dict) -> Dict:
738
+ """Write to cloud storage"""
739
+
740
+ if not self.file_storage:
741
+ raise StorageError("Cloud storage not available")
742
+
743
+ try:
744
+ storage_path = self._parse_cloud_storage_path(target_path)
745
+
746
+ # Handle append mode for cloud storage
747
+ if plan["mode"] == WriteMode.APPEND and plan["file_exists"]:
748
+ existing_content = await self.file_storage.retrieve(storage_path)
749
+ if isinstance(content, str) and isinstance(existing_content, str):
750
+ content = existing_content + content
751
+ elif isinstance(content, bytes) and isinstance(existing_content, bytes):
752
+ content = existing_content + content
753
+
754
+ # Store in cloud storage
755
+ await self.file_storage.store(storage_path, content)
756
+
757
+ return {
758
+ "path": target_path,
759
+ "storage_path": storage_path,
760
+ "size": len(content) if isinstance(content, (str, bytes)) else 0,
761
+ "checksum": self._calculate_checksum(content),
762
+ "cloud_storage": True
763
+ }
764
+
765
+ except Exception as e:
766
+ raise StorageError(f"Cloud storage write failed: {e}")
767
+
768
+ def _handle_versioning(self, target_path: str, content_metadata: Dict, metadata: Optional[Dict]) -> Optional[Dict]:
769
+ """Handle document versioning"""
770
+
771
+ if not self.config.enable_versioning:
772
+ return None
773
+
774
+ try:
775
+ version_info = {
776
+ "path": target_path,
777
+ "version": self._get_next_version(target_path),
778
+ "timestamp": datetime.now().isoformat(),
779
+ "content_metadata": content_metadata,
780
+ "user_metadata": metadata or {}
781
+ }
782
+
783
+ # Store version info
784
+ version_file = f"{target_path}.versions.json"
785
+ versions = self._load_version_history(version_file)
786
+ versions.append(version_info)
787
+
788
+ # Keep only recent versions
789
+ if len(versions) > self.config.max_backup_versions:
790
+ versions = versions[-self.config.max_backup_versions:]
791
+
792
+ self._save_version_history(version_file, versions)
793
+
794
+ return version_info
795
+
796
+ except Exception as e:
797
+ self.logger.warning(f"Versioning failed for {target_path}: {e}")
798
+ return None
799
+
800
+ def _validate_content(self, content: Union[str, bytes], format: DocumentFormat,
801
+ validation_level: ValidationLevel):
802
+ """Validate content based on format and validation level"""
803
+
804
+ if validation_level == ValidationLevel.NONE:
805
+ return
806
+
807
+ try:
808
+ # Format-specific validation
809
+ if format in self.validators:
810
+ self.validators[format](content, validation_level)
811
+
812
+ # Security validation for enterprise level
813
+ if validation_level == ValidationLevel.ENTERPRISE:
814
+ self._security_scan_content(content)
815
+
816
+ except Exception as e:
817
+ raise ContentValidationError(f"Content validation failed: {e}")
818
+
819
+ def _validate_json_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
820
+ """Validate JSON content"""
821
+ try:
822
+ if isinstance(content, bytes):
823
+ content = content.decode('utf-8')
824
+ json.loads(content)
825
+ except json.JSONDecodeError as e:
826
+ raise ContentValidationError(f"Invalid JSON: {e}")
827
+
828
+ def _validate_xml_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
829
+ """Validate XML content"""
830
+ try:
831
+ import xml.etree.ElementTree as ET
832
+ if isinstance(content, bytes):
833
+ content = content.decode('utf-8')
834
+ ET.fromstring(content)
835
+ except ET.ParseError as e:
836
+ raise ContentValidationError(f"Invalid XML: {e}")
837
+
838
+ def _validate_csv_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
839
+ """Validate CSV content"""
840
+ try:
841
+ import csv
842
+ import io
843
+ if isinstance(content, bytes):
844
+ content = content.decode('utf-8')
845
+ csv.reader(io.StringIO(content))
846
+ except Exception as e:
847
+ raise ContentValidationError(f"Invalid CSV: {e}")
848
+
849
+ def _validate_yaml_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
850
+ """Validate YAML content"""
851
+ try:
852
+ import yaml
853
+ if isinstance(content, bytes):
854
+ content = content.decode('utf-8')
855
+ yaml.safe_load(content)
856
+ except yaml.YAMLError as e:
857
+ raise ContentValidationError(f"Invalid YAML: {e}")
858
+
859
+ def _validate_html_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
860
+ """Validate HTML content"""
861
+ try:
862
+ from bs4 import BeautifulSoup
863
+ if isinstance(content, bytes):
864
+ content = content.decode('utf-8')
865
+ BeautifulSoup(content, 'html.parser')
866
+ except Exception as e:
867
+ raise ContentValidationError(f"Invalid HTML: {e}")
868
+
869
+ def _security_scan_content(self, content: Union[str, bytes]):
870
+ """Perform security scan on content"""
871
+ if isinstance(content, bytes):
872
+ content = content.decode('utf-8', errors='ignore')
873
+
874
+ # Check for suspicious patterns
875
+ suspicious_patterns = [
876
+ r'<script[^>]*>', # JavaScript
877
+ r'javascript:', # JavaScript URLs
878
+ r'vbscript:', # VBScript URLs
879
+ r'data:.*base64', # Base64 data URLs
880
+ r'eval\s*\(', # eval() calls
881
+ r'exec\s*\(', # exec() calls
882
+ ]
883
+
884
+ import re
885
+ for pattern in suspicious_patterns:
886
+ if re.search(pattern, content, re.IGNORECASE):
887
+ raise ContentValidationError(f"Security scan failed: suspicious pattern detected")
888
+
889
+ # Helper methods
890
+ def _calculate_content_size(self, content: Any) -> int:
891
+ """Calculate content size in bytes"""
892
+ if isinstance(content, bytes):
893
+ return len(content)
894
+ elif isinstance(content, str):
895
+ return len(content.encode('utf-8'))
896
+ else:
897
+ return len(str(content).encode('utf-8'))
898
+
899
+ def _calculate_checksum(self, content: Union[str, bytes]) -> str:
900
+ """Calculate content checksum"""
901
+ if isinstance(content, str):
902
+ content = content.encode('utf-8')
903
+ return hashlib.sha256(content).hexdigest()
904
+
905
+ def _calculate_file_checksum(self, file_path: str) -> str:
906
+ """Calculate file checksum"""
907
+ hash_sha256 = hashlib.sha256()
908
+ with open(file_path, "rb") as f:
909
+ for chunk in iter(lambda: f.read(4096), b""):
910
+ hash_sha256.update(chunk)
911
+ return hash_sha256.hexdigest()
912
+
913
+ def _check_write_permission(self, target_path: str, mode: WriteMode) -> bool:
914
+ """Check write permission for target path"""
915
+ try:
916
+ if self._is_cloud_storage_path(target_path):
917
+ return self.file_storage is not None
918
+
919
+ parent_dir = os.path.dirname(target_path)
920
+ if not os.path.exists(parent_dir):
921
+ # Check if we can create the directory
922
+ return os.access(os.path.dirname(parent_dir), os.W_OK)
923
+
924
+ if os.path.exists(target_path):
925
+ return os.access(target_path, os.W_OK)
926
+ else:
927
+ return os.access(parent_dir, os.W_OK)
928
+
929
+ except Exception:
930
+ return False
931
+
932
+ def _file_exists(self, file_path: str) -> bool:
933
+ """Check if file exists (local or cloud)"""
934
+ if self._is_cloud_storage_path(file_path):
935
+ # For cloud storage, we'd need to implement exists check
936
+ return False # Simplified for now
937
+ else:
938
+ return os.path.exists(file_path)
939
+
940
+ def _is_cloud_storage_path(self, source: str) -> bool:
941
+ """Check if source is a cloud storage path"""
942
+ cloud_schemes = ['gs', 's3', 'azure', 'cloud']
943
+ try:
944
+ from urllib.parse import urlparse
945
+ parsed = urlparse(source)
946
+ return parsed.scheme in cloud_schemes
947
+ except:
948
+ return False
949
+
950
+ def _parse_cloud_storage_path(self, source: str) -> str:
951
+ """Parse cloud storage path to get storage key"""
952
+ try:
953
+ from urllib.parse import urlparse
954
+ parsed = urlparse(source)
955
+ return parsed.path.lstrip('/')
956
+ except Exception:
957
+ return source
958
+
959
+ # Content conversion methods
960
+ def _convert_to_csv(self, content: Any) -> str:
961
+ """Convert content to CSV format"""
962
+ import csv
963
+ import io
964
+
965
+ output = io.StringIO()
966
+ writer = csv.writer(output)
967
+
968
+ if isinstance(content, list):
969
+ for row in content:
970
+ if isinstance(row, (list, tuple)):
971
+ writer.writerow(row)
972
+ else:
973
+ writer.writerow([row])
974
+ elif isinstance(content, dict):
975
+ # Convert dict to CSV with headers
976
+ if content:
977
+ headers = list(content.keys())
978
+ writer.writerow(headers)
979
+ writer.writerow([content[h] for h in headers])
980
+ else:
981
+ writer.writerow([str(content)])
982
+
983
+ return output.getvalue()
984
+
985
+ def _convert_to_xml(self, content: Any) -> str:
986
+ """Convert content to XML format"""
987
+ import xml.etree.ElementTree as ET
988
+
989
+ if isinstance(content, dict):
990
+ root = ET.Element("document")
991
+ for key, value in content.items():
992
+ elem = ET.SubElement(root, str(key))
993
+ elem.text = str(value)
994
+ return ET.tostring(root, encoding='unicode')
995
+ else:
996
+ root = ET.Element("document")
997
+ root.text = str(content)
998
+ return ET.tostring(root, encoding='unicode')
999
+
1000
+ def _convert_to_yaml(self, content: Any) -> str:
1001
+ """Convert content to YAML format"""
1002
+ try:
1003
+ import yaml
1004
+ return yaml.dump(content, default_flow_style=False, allow_unicode=True)
1005
+ except ImportError:
1006
+ # Fallback to simple string representation
1007
+ return str(content)
1008
+
1009
+ def _convert_to_html(self, content: Any) -> str:
1010
+ """Convert content to HTML format"""
1011
+ if isinstance(content, dict):
1012
+ html = "<html><body>\n"
1013
+ for key, value in content.items():
1014
+ html += f"<h3>{key}</h3>\n<p>{value}</p>\n"
1015
+ html += "</body></html>"
1016
+ return html
1017
+ else:
1018
+ return f"<html><body><pre>{str(content)}</pre></body></html>"
1019
+
1020
+ def _convert_to_markdown(self, content: Any) -> str:
1021
+ """Convert content to Markdown format"""
1022
+ if isinstance(content, dict):
1023
+ md = ""
1024
+ for key, value in content.items():
1025
+ md += f"## {key}\n\n{value}\n\n"
1026
+ return md
1027
+ else:
1028
+ return str(content)
1029
+
1030
+ # Versioning methods
1031
+ def _get_next_version(self, file_path: str) -> int:
1032
+ """Get next version number for file"""
1033
+ version_file = f"{file_path}.versions.json"
1034
+ versions = self._load_version_history(version_file)
1035
+ return len(versions) + 1
1036
+
1037
+ def _load_version_history(self, version_file: str) -> List[Dict]:
1038
+ """Load version history from file"""
1039
+ try:
1040
+ if os.path.exists(version_file):
1041
+ with open(version_file, 'r') as f:
1042
+ return json.load(f)
1043
+ except Exception:
1044
+ pass
1045
+ return []
1046
+
1047
+ def _save_version_history(self, version_file: str, versions: List[Dict]):
1048
+ """Save version history to file"""
1049
+ try:
1050
+ with open(version_file, 'w') as f:
1051
+ json.dump(versions, f, indent=2)
1052
+ except Exception as e:
1053
+ self.logger.warning(f"Failed to save version history: {e}")
1054
+
1055
+ # Backup and rollback methods
1056
+ def _backup_cloud_file(self, source_path: str, backup_path: str) -> str:
1057
+ """Backup cloud file"""
1058
+ # Simplified implementation
1059
+ return backup_path
1060
+
1061
+ def _rollback_from_backup(self, target_path: str, backup_info: Dict):
1062
+ """Rollback file from backup"""
1063
+ try:
1064
+ if backup_info and os.path.exists(backup_info["backup_path"]):
1065
+ shutil.copy2(backup_info["backup_path"], target_path)
1066
+ self.logger.info(f"Rolled back {target_path} from backup")
1067
+ except Exception as e:
1068
+ self.logger.error(f"Rollback failed: {e}")
1069
+
1070
+ def _rollback_batch_operations(self, completed_operations: List[Dict], backup_operations: List[Dict]):
1071
+ """Rollback batch operations"""
1072
+ for op in reversed(completed_operations):
1073
+ try:
1074
+ result = op.get("result", {})
1075
+ backup_info = result.get("backup_info")
1076
+ if backup_info:
1077
+ self._rollback_from_backup(
1078
+ result["write_result"]["path"],
1079
+ backup_info
1080
+ )
1081
+ except Exception as e:
1082
+ self.logger.error(f"Batch rollback failed for operation: {e}")
1083
+
1084
+ def _log_write_operation(self, operation_id: str, target_path: str, mode: WriteMode,
1085
+ write_result: Dict, backup_info: Optional[Dict]) -> Dict:
1086
+ """Log write operation for audit"""
1087
+ audit_info = {
1088
+ "operation_id": operation_id,
1089
+ "timestamp": datetime.now().isoformat(),
1090
+ "target_path": target_path,
1091
+ "mode": mode,
1092
+ "success": True,
1093
+ "file_size": write_result.get("size", 0),
1094
+ "checksum": write_result.get("checksum"),
1095
+ "backup_created": backup_info is not None
1096
+ }
1097
+
1098
+ # Log to audit file
1099
+ try:
1100
+ audit_file = os.path.join(self.config.temp_dir, "write_audit.log")
1101
+ with open(audit_file, "a") as f:
1102
+ f.write(json.dumps(audit_info) + "\n")
1103
+ except Exception as e:
1104
+ self.logger.warning(f"Audit logging failed: {e}")
1105
+
1106
+ return audit_info
1107
+
1108
+ def edit_document(self,
1109
+ target_path: str,
1110
+ operation: EditOperation,
1111
+ content: Optional[str] = None,
1112
+ position: Optional[Dict[str, Any]] = None,
1113
+ selection: Optional[Dict[str, Any]] = None,
1114
+ format_options: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
1115
+ """
1116
+ Perform advanced editing operations on documents
1117
+
1118
+ Args:
1119
+ target_path: Target file path
1120
+ operation: Edit operation to perform
1121
+ content: Content for the operation (if applicable)
1122
+ position: Position info (line, column, offset)
1123
+ selection: Text selection range
1124
+ format_options: Additional format options
1125
+
1126
+ Returns:
1127
+ Dict containing edit results
1128
+ """
1129
+ try:
1130
+ start_time = datetime.now()
1131
+ operation_id = str(uuid.uuid4())
1132
+
1133
+ self.logger.info(f"Starting edit operation {operation_id}: {operation} on {target_path}")
1134
+
1135
+ # Read current document content
1136
+ current_content = self._read_document_content(target_path)
1137
+
1138
+ # Perform the specific edit operation
1139
+ if operation == EditOperation.INSERT_TEXT:
1140
+ edited_content = self._insert_text(current_content, content, position)
1141
+ elif operation == EditOperation.DELETE_TEXT:
1142
+ edited_content = self._delete_text(current_content, selection)
1143
+ elif operation == EditOperation.REPLACE_TEXT:
1144
+ edited_content = self._replace_text(current_content, selection, content)
1145
+ elif operation == EditOperation.BOLD:
1146
+ edited_content = self._format_text_bold(current_content, selection, format_options)
1147
+ elif operation == EditOperation.ITALIC:
1148
+ edited_content = self._format_text_italic(current_content, selection, format_options)
1149
+ elif operation == EditOperation.UNDERLINE:
1150
+ edited_content = self._format_text_underline(current_content, selection, format_options)
1151
+ elif operation == EditOperation.STRIKETHROUGH:
1152
+ edited_content = self._format_text_strikethrough(current_content, selection, format_options)
1153
+ elif operation == EditOperation.HIGHLIGHT:
1154
+ edited_content = self._format_text_highlight(current_content, selection, format_options)
1155
+ elif operation == EditOperation.INSERT_LINE:
1156
+ edited_content = self._insert_line(current_content, position, content)
1157
+ elif operation == EditOperation.DELETE_LINE:
1158
+ edited_content = self._delete_line(current_content, position)
1159
+ elif operation == EditOperation.MOVE_LINE:
1160
+ edited_content = self._move_line(current_content, position, format_options)
1161
+ elif operation == EditOperation.COPY_TEXT:
1162
+ return self._copy_text(current_content, selection)
1163
+ elif operation == EditOperation.CUT_TEXT:
1164
+ edited_content, cut_content = self._cut_text(current_content, selection)
1165
+ # Store cut content in clipboard
1166
+ self._store_clipboard_content(cut_content)
1167
+ elif operation == EditOperation.PASTE_TEXT:
1168
+ clipboard_content = self._get_clipboard_content()
1169
+ edited_content = self._paste_text(current_content, position, clipboard_content)
1170
+ else:
1171
+ raise ValueError(f"Unsupported edit operation: {operation}")
1172
+
1173
+ # Write the edited content back to file
1174
+ file_format = self._detect_file_format(target_path)
1175
+ write_result = self.write_document(
1176
+ target_path=target_path,
1177
+ content=edited_content,
1178
+ format=file_format,
1179
+ mode="backup_write", # Always backup before editing
1180
+ backup_comment=f"Edit operation: {operation}"
1181
+ )
1182
+
1183
+ result = {
1184
+ "operation_id": operation_id,
1185
+ "target_path": target_path,
1186
+ "operation": operation,
1187
+ "edit_metadata": {
1188
+ "original_size": len(current_content),
1189
+ "edited_size": len(edited_content) if isinstance(edited_content, str) else 0,
1190
+ "position": position,
1191
+ "selection": selection
1192
+ },
1193
+ "write_result": write_result,
1194
+ "processing_metadata": {
1195
+ "start_time": start_time.isoformat(),
1196
+ "end_time": datetime.now().isoformat(),
1197
+ "duration": (datetime.now() - start_time).total_seconds()
1198
+ }
1199
+ }
1200
+
1201
+ self.logger.info(f"Edit operation {operation_id} completed successfully")
1202
+ return result
1203
+
1204
+ except Exception as e:
1205
+ raise DocumentWriterError(f"Edit operation failed: {str(e)}")
1206
+
1207
+ def format_text(self,
1208
+ target_path: str,
1209
+ text_to_format: str,
1210
+ format_type: EditOperation,
1211
+ format_options: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
1212
+ """
1213
+ Apply formatting to specific text in a document
1214
+
1215
+ Args:
1216
+ target_path: Target file path
1217
+ text_to_format: Text to apply formatting to
1218
+ format_type: Type of formatting (bold, italic, etc.)
1219
+ format_options: Additional format options
1220
+
1221
+ Returns:
1222
+ Dict containing formatting results
1223
+ """
1224
+ try:
1225
+ current_content = self._read_document_content(target_path)
1226
+
1227
+ # Find all occurrences of the text
1228
+ formatted_content = self._apply_text_formatting(
1229
+ current_content, text_to_format, format_type, format_options
1230
+ )
1231
+
1232
+ # Write back to file
1233
+ file_format = self._detect_file_format(target_path)
1234
+ write_result = self.write_document(
1235
+ target_path=target_path,
1236
+ content=formatted_content,
1237
+ format=file_format,
1238
+ mode="backup_write"
1239
+ )
1240
+
1241
+ return {
1242
+ "target_path": target_path,
1243
+ "text_formatted": text_to_format,
1244
+ "format_type": format_type,
1245
+ "write_result": write_result
1246
+ }
1247
+
1248
+ except Exception as e:
1249
+ raise DocumentWriterError(f"Text formatting failed: {str(e)}")
1250
+
1251
+ def find_replace(self,
1252
+ target_path: str,
1253
+ find_text: str,
1254
+ replace_text: str,
1255
+ replace_all: bool = False,
1256
+ case_sensitive: bool = True,
1257
+ regex_mode: bool = False) -> Dict[str, Any]:
1258
+ """
1259
+ Find and replace text in a document
1260
+
1261
+ Args:
1262
+ target_path: Target file path
1263
+ find_text: Text to find
1264
+ replace_text: Text to replace with
1265
+ replace_all: Replace all occurrences
1266
+ case_sensitive: Case sensitive search
1267
+ regex_mode: Use regex for find/replace
1268
+
1269
+ Returns:
1270
+ Dict containing find/replace results
1271
+ """
1272
+ try:
1273
+ current_content = self._read_document_content(target_path)
1274
+
1275
+ # Perform find and replace
1276
+ new_content, replacements = self._perform_find_replace(
1277
+ current_content, find_text, replace_text,
1278
+ replace_all, case_sensitive, regex_mode
1279
+ )
1280
+
1281
+ if replacements > 0:
1282
+ # Write back to file
1283
+ file_format = self._detect_file_format(target_path)
1284
+ write_result = self.write_document(
1285
+ target_path=target_path,
1286
+ content=new_content,
1287
+ format=file_format,
1288
+ mode="backup_write",
1289
+ backup_comment=f"Find/Replace: '{find_text}' -> '{replace_text}'"
1290
+ )
1291
+
1292
+ return {
1293
+ "target_path": target_path,
1294
+ "find_text": find_text,
1295
+ "replace_text": replace_text,
1296
+ "replacements_made": replacements,
1297
+ "write_result": write_result
1298
+ }
1299
+ else:
1300
+ return {
1301
+ "target_path": target_path,
1302
+ "find_text": find_text,
1303
+ "replace_text": replace_text,
1304
+ "replacements_made": 0,
1305
+ "message": "No matches found"
1306
+ }
1307
+
1308
+ except Exception as e:
1309
+ raise DocumentWriterError(f"Find/replace operation failed: {str(e)}")
1310
+
1311
+ # Helper methods for editing operations
1312
+ def _read_document_content(self, file_path: str) -> str:
1313
+ """Read document content for editing"""
1314
+ try:
1315
+ with open(file_path, 'r', encoding='utf-8') as f:
1316
+ return f.read()
1317
+ except UnicodeDecodeError:
1318
+ # Try with different encodings
1319
+ for encoding in ['gbk', 'latin1', 'cp1252']:
1320
+ try:
1321
+ with open(file_path, 'r', encoding=encoding) as f:
1322
+ return f.read()
1323
+ except:
1324
+ continue
1325
+ raise DocumentWriterError(f"Cannot decode file: {file_path}")
1326
+ except Exception as e:
1327
+ raise DocumentWriterError(f"Cannot read file {file_path}: {str(e)}")
1328
+
1329
+ def _detect_file_format(self, file_path: str) -> str:
1330
+ """Detect file format from extension"""
1331
+ ext = os.path.splitext(file_path)[1].lower()
1332
+ format_map = {
1333
+ '.txt': 'txt', '.json': 'json', '.csv': 'csv',
1334
+ '.xml': 'xml', '.html': 'html', '.htm': 'html',
1335
+ '.md': 'markdown', '.markdown': 'markdown',
1336
+ '.yaml': 'yaml', '.yml': 'yaml'
1337
+ }
1338
+ return format_map.get(ext, 'txt')
1339
+
1340
+ def _insert_text(self, content: str, text: str, position: Optional[Dict[str, Any]]) -> str:
1341
+ """Insert text at specified position"""
1342
+ if not position:
1343
+ return content + text
1344
+
1345
+ if 'offset' in position:
1346
+ offset = position['offset']
1347
+ return content[:offset] + text + content[offset:]
1348
+ elif 'line' in position:
1349
+ lines = content.split('\n')
1350
+ line_num = position.get('line', 0)
1351
+ column = position.get('column', 0)
1352
+
1353
+ if line_num < len(lines):
1354
+ line = lines[line_num]
1355
+ lines[line_num] = line[:column] + text + line[column:]
1356
+ else:
1357
+ lines.append(text)
1358
+ return '\n'.join(lines)
1359
+ else:
1360
+ return content + text
1361
+
1362
+ def _delete_text(self, content: str, selection: Optional[Dict[str, Any]]) -> str:
1363
+ """Delete text in specified selection"""
1364
+ if not selection:
1365
+ return content
1366
+
1367
+ if 'start_offset' in selection and 'end_offset' in selection:
1368
+ start = selection['start_offset']
1369
+ end = selection['end_offset']
1370
+ return content[:start] + content[end:]
1371
+ elif 'start_line' in selection and 'end_line' in selection:
1372
+ lines = content.split('\n')
1373
+ start_line = selection['start_line']
1374
+ end_line = selection['end_line']
1375
+ start_col = selection.get('start_column', 0)
1376
+ end_col = selection.get('end_column', len(lines[end_line]) if end_line < len(lines) else 0)
1377
+
1378
+ if start_line == end_line:
1379
+ # Same line deletion
1380
+ line = lines[start_line]
1381
+ lines[start_line] = line[:start_col] + line[end_col:]
1382
+ else:
1383
+ # Multi-line deletion
1384
+ lines[start_line] = lines[start_line][:start_col]
1385
+ if end_line < len(lines):
1386
+ lines[start_line] += lines[end_line][end_col:]
1387
+ del lines[start_line + 1:end_line + 1]
1388
+
1389
+ return '\n'.join(lines)
1390
+
1391
+ return content
1392
+
1393
+ def _replace_text(self, content: str, selection: Optional[Dict[str, Any]], replacement: str) -> str:
1394
+ """Replace text in specified selection"""
1395
+ if not selection:
1396
+ return content
1397
+
1398
+ # First delete the selected text, then insert replacement
1399
+ content_after_delete = self._delete_text(content, selection)
1400
+
1401
+ # Calculate new insertion position after deletion
1402
+ if 'start_offset' in selection:
1403
+ insert_pos = {'offset': selection['start_offset']}
1404
+ elif 'start_line' in selection:
1405
+ insert_pos = {
1406
+ 'line': selection['start_line'],
1407
+ 'column': selection.get('start_column', 0)
1408
+ }
1409
+ else:
1410
+ insert_pos = None
1411
+
1412
+ return self._insert_text(content_after_delete, replacement, insert_pos)
1413
+
1414
+ def _format_text_bold(self, content: str, selection: Optional[Dict[str, Any]], options: Optional[Dict[str, Any]]) -> str:
1415
+ """Apply bold formatting to selected text"""
1416
+ if not selection:
1417
+ return content
1418
+
1419
+ format_type = options.get('format_type', 'markdown') if options else 'markdown'
1420
+
1421
+ if format_type == 'markdown':
1422
+ return self._apply_markdown_formatting(content, selection, '**', '**')
1423
+ elif format_type == 'html':
1424
+ return self._apply_html_formatting(content, selection, '<strong>', '</strong>')
1425
+ else:
1426
+ return content
1427
+
1428
+ def _format_text_italic(self, content: str, selection: Optional[Dict[str, Any]], options: Optional[Dict[str, Any]]) -> str:
1429
+ """Apply italic formatting to selected text"""
1430
+ if not selection:
1431
+ return content
1432
+
1433
+ format_type = options.get('format_type', 'markdown') if options else 'markdown'
1434
+
1435
+ if format_type == 'markdown':
1436
+ return self._apply_markdown_formatting(content, selection, '*', '*')
1437
+ elif format_type == 'html':
1438
+ return self._apply_html_formatting(content, selection, '<em>', '</em>')
1439
+ else:
1440
+ return content
1441
+
1442
+ def _format_text_underline(self, content: str, selection: Optional[Dict[str, Any]], options: Optional[Dict[str, Any]]) -> str:
1443
+ """Apply underline formatting to selected text"""
1444
+ if not selection:
1445
+ return content
1446
+
1447
+ format_type = options.get('format_type', 'html') if options else 'html'
1448
+
1449
+ if format_type == 'html':
1450
+ return self._apply_html_formatting(content, selection, '<u>', '</u>')
1451
+ else:
1452
+ return content
1453
+
1454
+ def _format_text_strikethrough(self, content: str, selection: Optional[Dict[str, Any]], options: Optional[Dict[str, Any]]) -> str:
1455
+ """Apply strikethrough formatting to selected text"""
1456
+ if not selection:
1457
+ return content
1458
+
1459
+ format_type = options.get('format_type', 'markdown') if options else 'markdown'
1460
+
1461
+ if format_type == 'markdown':
1462
+ return self._apply_markdown_formatting(content, selection, '~~', '~~')
1463
+ elif format_type == 'html':
1464
+ return self._apply_html_formatting(content, selection, '<del>', '</del>')
1465
+ else:
1466
+ return content
1467
+
1468
+ def _format_text_highlight(self, content: str, selection: Optional[Dict[str, Any]], options: Optional[Dict[str, Any]]) -> str:
1469
+ """Apply highlight formatting to selected text"""
1470
+ if not selection:
1471
+ return content
1472
+
1473
+ format_type = options.get('format_type', 'html') if options else 'html'
1474
+ color = options.get('color', 'yellow') if options else 'yellow'
1475
+
1476
+ if format_type == 'html':
1477
+ return self._apply_html_formatting(content, selection, f'<mark style="background-color: {color}">', '</mark>')
1478
+ elif format_type == 'markdown':
1479
+ return self._apply_markdown_formatting(content, selection, '==', '==')
1480
+ else:
1481
+ return content
1482
+
1483
+ def _apply_markdown_formatting(self, content: str, selection: Dict[str, Any], start_marker: str, end_marker: str) -> str:
1484
+ """Apply markdown formatting to selected text"""
1485
+ selected_text = self._extract_selected_text(content, selection)
1486
+ formatted_text = start_marker + selected_text + end_marker
1487
+ return self._replace_text(content, selection, formatted_text)
1488
+
1489
+ def _apply_html_formatting(self, content: str, selection: Dict[str, Any], start_tag: str, end_tag: str) -> str:
1490
+ """Apply HTML formatting to selected text"""
1491
+ selected_text = self._extract_selected_text(content, selection)
1492
+ formatted_text = start_tag + selected_text + end_tag
1493
+ return self._replace_text(content, selection, formatted_text)
1494
+
1495
+ def _extract_selected_text(self, content: str, selection: Dict[str, Any]) -> str:
1496
+ """Extract text from selection"""
1497
+ if 'start_offset' in selection and 'end_offset' in selection:
1498
+ return content[selection['start_offset']:selection['end_offset']]
1499
+ elif 'start_line' in selection and 'end_line' in selection:
1500
+ lines = content.split('\n')
1501
+ start_line = selection['start_line']
1502
+ end_line = selection['end_line']
1503
+ start_col = selection.get('start_column', 0)
1504
+ end_col = selection.get('end_column', len(lines[end_line]) if end_line < len(lines) else 0)
1505
+
1506
+ if start_line == end_line:
1507
+ return lines[start_line][start_col:end_col]
1508
+ else:
1509
+ result = [lines[start_line][start_col:]]
1510
+ result.extend(lines[start_line + 1:end_line])
1511
+ if end_line < len(lines):
1512
+ result.append(lines[end_line][:end_col])
1513
+ return '\n'.join(result)
1514
+ return ""
1515
+
1516
+ def _insert_line(self, content: str, position: Optional[Dict[str, Any]], line_content: str) -> str:
1517
+ """Insert a new line at specified position"""
1518
+ lines = content.split('\n')
1519
+ line_num = position.get('line', len(lines)) if position else len(lines)
1520
+
1521
+ lines.insert(line_num, line_content)
1522
+ return '\n'.join(lines)
1523
+
1524
+ def _delete_line(self, content: str, position: Optional[Dict[str, Any]]) -> str:
1525
+ """Delete line at specified position"""
1526
+ lines = content.split('\n')
1527
+ line_num = position.get('line', 0) if position else 0
1528
+
1529
+ if 0 <= line_num < len(lines):
1530
+ del lines[line_num]
1531
+
1532
+ return '\n'.join(lines)
1533
+
1534
+ def _move_line(self, content: str, position: Optional[Dict[str, Any]], options: Optional[Dict[str, Any]]) -> str:
1535
+ """Move line to different position"""
1536
+ lines = content.split('\n')
1537
+ from_line = position.get('line', 0) if position else 0
1538
+ to_line = options.get('to_line', 0) if options else 0
1539
+
1540
+ if 0 <= from_line < len(lines) and 0 <= to_line < len(lines):
1541
+ line_content = lines.pop(from_line)
1542
+ lines.insert(to_line, line_content)
1543
+
1544
+ return '\n'.join(lines)
1545
+
1546
+ def _copy_text(self, content: str, selection: Optional[Dict[str, Any]]) -> Dict[str, Any]:
1547
+ """Copy selected text to clipboard"""
1548
+ selected_text = self._extract_selected_text(content, selection) if selection else content
1549
+ self._store_clipboard_content(selected_text)
1550
+
1551
+ return {
1552
+ "operation": "copy",
1553
+ "copied_text": selected_text,
1554
+ "copied_length": len(selected_text)
1555
+ }
1556
+
1557
+ def _cut_text(self, content: str, selection: Optional[Dict[str, Any]]) -> Tuple[str, str]:
1558
+ """Cut selected text (copy and delete)"""
1559
+ selected_text = self._extract_selected_text(content, selection) if selection else content
1560
+ new_content = self._delete_text(content, selection) if selection else ""
1561
+
1562
+ return new_content, selected_text
1563
+
1564
+ def _paste_text(self, content: str, position: Optional[Dict[str, Any]], clipboard_content: str) -> str:
1565
+ """Paste text from clipboard"""
1566
+ return self._insert_text(content, clipboard_content, position)
1567
+
1568
+ def _store_clipboard_content(self, content: str):
1569
+ """Store content in clipboard (simplified implementation)"""
1570
+ clipboard_file = os.path.join(self.config.temp_dir, "clipboard.txt")
1571
+ try:
1572
+ with open(clipboard_file, 'w', encoding='utf-8') as f:
1573
+ f.write(content)
1574
+ except Exception as e:
1575
+ self.logger.warning(f"Failed to store clipboard content: {e}")
1576
+
1577
+ def _get_clipboard_content(self) -> str:
1578
+ """Get content from clipboard"""
1579
+ clipboard_file = os.path.join(self.config.temp_dir, "clipboard.txt")
1580
+ try:
1581
+ with open(clipboard_file, 'r', encoding='utf-8') as f:
1582
+ return f.read()
1583
+ except Exception:
1584
+ return ""
1585
+
1586
+ def _apply_text_formatting(self, content: str, text_to_format: str, format_type: EditOperation, options: Optional[Dict[str, Any]]) -> str:
1587
+ """Apply formatting to all occurrences of specific text"""
1588
+ if format_type == EditOperation.BOLD:
1589
+ replacement = f"**{text_to_format}**"
1590
+ elif format_type == EditOperation.ITALIC:
1591
+ replacement = f"*{text_to_format}*"
1592
+ elif format_type == EditOperation.UNDERLINE:
1593
+ replacement = f"<u>{text_to_format}</u>"
1594
+ elif format_type == EditOperation.STRIKETHROUGH:
1595
+ replacement = f"~~{text_to_format}~~"
1596
+ elif format_type == EditOperation.HIGHLIGHT:
1597
+ color = options.get('color', 'yellow') if options else 'yellow'
1598
+ replacement = f'<mark style="background-color: {color}">{text_to_format}</mark>'
1599
+ else:
1600
+ replacement = text_to_format
1601
+
1602
+ return content.replace(text_to_format, replacement)
1603
+
1604
+ def _perform_find_replace(self, content: str, find_text: str, replace_text: str,
1605
+ replace_all: bool, case_sensitive: bool, regex_mode: bool) -> Tuple[str, int]:
1606
+ """Perform find and replace operation"""
1607
+ import re
1608
+
1609
+ replacements = 0
1610
+
1611
+ if regex_mode:
1612
+ flags = 0 if case_sensitive else re.IGNORECASE
1613
+ if replace_all:
1614
+ new_content, replacements = re.subn(find_text, replace_text, content, flags=flags)
1615
+ else:
1616
+ new_content = re.sub(find_text, replace_text, content, count=1, flags=flags)
1617
+ replacements = 1 if new_content != content else 0
1618
+ else:
1619
+ if case_sensitive:
1620
+ if replace_all:
1621
+ replacements = content.count(find_text)
1622
+ new_content = content.replace(find_text, replace_text)
1623
+ else:
1624
+ new_content = content.replace(find_text, replace_text, 1)
1625
+ replacements = 1 if new_content != content else 0
1626
+ else:
1627
+ # Case insensitive replacement
1628
+ import re
1629
+ pattern = re.escape(find_text)
1630
+ if replace_all:
1631
+ new_content, replacements = re.subn(pattern, replace_text, content, flags=re.IGNORECASE)
1632
+ else:
1633
+ new_content = re.sub(pattern, replace_text, content, count=1, flags=re.IGNORECASE)
1634
+ replacements = 1 if new_content != content else 0
1635
+
1636
+ return new_content, replacements