aiecs 1.0.8__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of aiecs might be problematic. Click here for more details.

Files changed (45) hide show
  1. aiecs/__init__.py +1 -1
  2. aiecs/aiecs_client.py +159 -1
  3. aiecs/config/config.py +4 -0
  4. aiecs/domain/context/__init__.py +24 -0
  5. aiecs/main.py +20 -2
  6. aiecs/scripts/dependance_check/__init__.py +18 -0
  7. aiecs/scripts/{download_nlp_data.py → dependance_check/download_nlp_data.py} +50 -8
  8. aiecs/scripts/dependance_patch/__init__.py +8 -0
  9. aiecs/scripts/dependance_patch/fix_weasel/__init__.py +12 -0
  10. aiecs/scripts/tools_develop/README.md +340 -0
  11. aiecs/scripts/tools_develop/__init__.py +16 -0
  12. aiecs/scripts/tools_develop/check_type_annotations.py +263 -0
  13. aiecs/scripts/tools_develop/validate_tool_schemas.py +346 -0
  14. aiecs/tools/__init__.py +33 -14
  15. aiecs/tools/docs/__init__.py +103 -0
  16. aiecs/tools/docs/ai_document_orchestrator.py +543 -0
  17. aiecs/tools/docs/ai_document_writer_orchestrator.py +2199 -0
  18. aiecs/tools/docs/content_insertion_tool.py +1214 -0
  19. aiecs/tools/docs/document_creator_tool.py +1161 -0
  20. aiecs/tools/docs/document_layout_tool.py +1090 -0
  21. aiecs/tools/docs/document_parser_tool.py +904 -0
  22. aiecs/tools/docs/document_writer_tool.py +1583 -0
  23. aiecs/tools/langchain_adapter.py +102 -51
  24. aiecs/tools/schema_generator.py +265 -0
  25. aiecs/tools/task_tools/image_tool.py +1 -1
  26. aiecs/tools/task_tools/office_tool.py +9 -0
  27. aiecs/tools/task_tools/scraper_tool.py +1 -1
  28. {aiecs-1.0.8.dist-info → aiecs-1.1.0.dist-info}/METADATA +1 -1
  29. {aiecs-1.0.8.dist-info → aiecs-1.1.0.dist-info}/RECORD +44 -28
  30. aiecs-1.1.0.dist-info/entry_points.txt +9 -0
  31. aiecs-1.0.8.dist-info/entry_points.txt +0 -7
  32. /aiecs/scripts/{DEPENDENCY_SYSTEM_SUMMARY.md → dependance_check/DEPENDENCY_SYSTEM_SUMMARY.md} +0 -0
  33. /aiecs/scripts/{README_DEPENDENCY_CHECKER.md → dependance_check/README_DEPENDENCY_CHECKER.md} +0 -0
  34. /aiecs/scripts/{dependency_checker.py → dependance_check/dependency_checker.py} +0 -0
  35. /aiecs/scripts/{dependency_fixer.py → dependance_check/dependency_fixer.py} +0 -0
  36. /aiecs/scripts/{quick_dependency_check.py → dependance_check/quick_dependency_check.py} +0 -0
  37. /aiecs/scripts/{setup_nlp_data.sh → dependance_check/setup_nlp_data.sh} +0 -0
  38. /aiecs/scripts/{README_WEASEL_PATCH.md → dependance_patch/fix_weasel/README_WEASEL_PATCH.md} +0 -0
  39. /aiecs/scripts/{fix_weasel_validator.py → dependance_patch/fix_weasel/fix_weasel_validator.py} +0 -0
  40. /aiecs/scripts/{fix_weasel_validator.sh → dependance_patch/fix_weasel/fix_weasel_validator.sh} +0 -0
  41. /aiecs/scripts/{patch_weasel_library.sh → dependance_patch/fix_weasel/patch_weasel_library.sh} +0 -0
  42. /aiecs/scripts/{run_weasel_patch.sh → dependance_patch/fix_weasel/run_weasel_patch.sh} +0 -0
  43. {aiecs-1.0.8.dist-info → aiecs-1.1.0.dist-info}/WHEEL +0 -0
  44. {aiecs-1.0.8.dist-info → aiecs-1.1.0.dist-info}/licenses/LICENSE +0 -0
  45. {aiecs-1.0.8.dist-info → aiecs-1.1.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1583 @@
1
+ import os
2
+ import json
3
+ import time
4
+ import uuid
5
+ import hashlib
6
+ import logging
7
+ import asyncio
8
+ import shutil
9
+ from typing import Dict, Any, List, Optional, Union, Tuple
10
+ from enum import Enum
11
+ from datetime import datetime
12
+ from pathlib import Path
13
+ import tempfile
14
+
15
+ from pydantic import BaseModel, Field
16
+ from pydantic import ValidationError as PydanticValidationError
17
+ from pydantic_settings import BaseSettings
18
+
19
+ from aiecs.tools.base_tool import BaseTool
20
+ from aiecs.tools import register_tool
21
+
22
+
23
+ class DocumentFormat(str, Enum):
24
+ """Supported document formats for writing"""
25
+ TXT = "txt"
26
+ PLAIN_TEXT = "txt" # Alias for TXT
27
+ JSON = "json"
28
+ CSV = "csv"
29
+ XML = "xml"
30
+ MARKDOWN = "md"
31
+ HTML = "html"
32
+ YAML = "yaml"
33
+ PDF = "pdf"
34
+ DOCX = "docx"
35
+ XLSX = "xlsx"
36
+ BINARY = "binary"
37
+
38
+
39
+ class WriteMode(str, Enum):
40
+ """Document writing modes"""
41
+ CREATE = "create" # 创建新文件,如果存在则失败
42
+ OVERWRITE = "overwrite" # 覆盖现有文件
43
+ APPEND = "append" # 追加到现有文件
44
+ UPDATE = "update" # 更新现有文件(智能合并)
45
+ BACKUP_WRITE = "backup_write" # 备份后写入
46
+ VERSION_WRITE = "version_write" # 版本化写入
47
+ INSERT = "insert" # 在指定位置插入内容
48
+ REPLACE = "replace" # 替换指定内容
49
+ DELETE = "delete" # 删除指定内容
50
+
51
+
52
+ class EditOperation(str, Enum):
53
+ """Advanced edit operations"""
54
+ BOLD = "bold" # 加粗文本
55
+ ITALIC = "italic" # 斜体文本
56
+ UNDERLINE = "underline" # 下划线文本
57
+ STRIKETHROUGH = "strikethrough" # 删除线文本
58
+ HIGHLIGHT = "highlight" # 高亮文本
59
+ INSERT_TEXT = "insert_text" # 插入文本
60
+ DELETE_TEXT = "delete_text" # 删除文本
61
+ REPLACE_TEXT = "replace_text" # 替换文本
62
+ COPY_TEXT = "copy_text" # 复制文本
63
+ CUT_TEXT = "cut_text" # 剪切文本
64
+ PASTE_TEXT = "paste_text" # 粘贴文本
65
+ FIND_REPLACE = "find_replace" # 查找替换
66
+ INSERT_LINE = "insert_line" # 插入行
67
+ DELETE_LINE = "delete_line" # 删除行
68
+ MOVE_LINE = "move_line" # 移动行
69
+
70
+
71
+ class EncodingType(str, Enum):
72
+ """Text encoding types"""
73
+ UTF8 = "utf-8"
74
+ UTF16 = "utf-16"
75
+ ASCII = "ascii"
76
+ GBK = "gbk"
77
+ AUTO = "auto"
78
+
79
+
80
+ class ValidationLevel(str, Enum):
81
+ """Content validation levels"""
82
+ NONE = "none" # 无验证
83
+ BASIC = "basic" # 基础验证(格式、大小)
84
+ STRICT = "strict" # 严格验证(内容、结构)
85
+ ENTERPRISE = "enterprise" # 企业级验证(安全、合规)
86
+
87
+
88
+ class DocumentWriterSettings(BaseSettings):
89
+ """Configuration for DocumentWriterTool"""
90
+ temp_dir: str = os.path.join(tempfile.gettempdir(), 'document_writer')
91
+ backup_dir: str = os.path.join(tempfile.gettempdir(), 'document_backups')
92
+ output_dir: Optional[str] = None # Output directory
93
+ max_file_size: int = 100 * 1024 * 1024 # 100MB
94
+ max_backup_versions: int = 10
95
+ default_encoding: str = "utf-8"
96
+ enable_backup: bool = True
97
+ enable_versioning: bool = True
98
+ enable_content_validation: bool = True
99
+ enable_security_scan: bool = True
100
+ atomic_write: bool = True # 原子写入
101
+ validation_level: str = "basic" # Validation level
102
+ timeout_seconds: int = 60 # Operation timeout
103
+ auto_backup: bool = True # Auto backup before write
104
+ atomic_writes: bool = True # Atomic write operations
105
+ default_format: DocumentFormat = DocumentFormat.MARKDOWN # Default document format
106
+ version_control: bool = True # Enable version control
107
+ security_scan: bool = True # Enable security scanning
108
+
109
+ # 云存储设置
110
+ enable_cloud_storage: bool = True
111
+ gcs_bucket_name: str = "aiecs-documents"
112
+ gcs_project_id: Optional[str] = None
113
+
114
+ class Config:
115
+ env_prefix = "DOC_WRITER_"
116
+ extra = "allow" # Allow extra fields for flexibility
117
+
118
+
119
+ class DocumentWriterError(Exception):
120
+ """Base exception for document writer errors"""
121
+ pass
122
+
123
+
124
+ class WriteError(DocumentWriterError):
125
+ """Raised when write operations fail"""
126
+ pass
127
+
128
+
129
+ class ValidationError(DocumentWriterError):
130
+ """Raised when validation fails"""
131
+ pass
132
+
133
+
134
+ class SecurityError(DocumentWriterError):
135
+ """Raised when security validation fails"""
136
+ pass
137
+
138
+
139
+ class WritePermissionError(DocumentWriterError):
140
+ """Raised when write permission is denied"""
141
+ pass
142
+
143
+
144
+ class ContentValidationError(DocumentWriterError):
145
+ """Raised when content validation fails"""
146
+ pass
147
+
148
+
149
+ class StorageError(DocumentWriterError):
150
+ """Raised when storage operations fail"""
151
+ pass
152
+
153
+
154
+ @register_tool("document_writer")
155
+ class DocumentWriterTool(BaseTool):
156
+ """
157
+ Modern high-performance document writing component that can:
158
+ 1. Handle multiple document formats and encodings
159
+ 2. Provide production-grade write operations with validation
160
+ 3. Support various write modes (create, overwrite, append, update)
161
+ 4. Implement backup and versioning strategies
162
+ 5. Ensure atomic operations and data integrity
163
+ 6. Support both local and cloud storage
164
+
165
+ Production Features:
166
+ - Atomic writes (no partial writes)
167
+ - Content validation and security scanning
168
+ - Automatic backup and versioning
169
+ - Write permission and quota checks
170
+ - Transaction-like operations
171
+ - Audit logging
172
+ """
173
+
174
+ def __init__(self, config: Optional[Dict] = None):
175
+ """Initialize DocumentWriterTool with settings"""
176
+ try:
177
+ super().__init__(config)
178
+ except PydanticValidationError as e:
179
+ raise ValueError(f"Invalid settings: {e}")
180
+
181
+ self.settings = DocumentWriterSettings()
182
+ if config:
183
+ try:
184
+ self.settings = self.settings.model_validate({**self.settings.model_dump(), **config})
185
+ except PydanticValidationError as e:
186
+ raise ValueError(f"Invalid settings: {e}")
187
+
188
+ self.logger = logging.getLogger(__name__)
189
+
190
+ # Create necessary directories
191
+ os.makedirs(self.settings.temp_dir, exist_ok=True)
192
+ os.makedirs(self.settings.backup_dir, exist_ok=True)
193
+
194
+ # Initialize cloud storage
195
+ self._init_cloud_storage()
196
+
197
+ # Initialize content validators
198
+ self._init_validators()
199
+
200
+ def _init_cloud_storage(self):
201
+ """Initialize cloud storage for document writing"""
202
+ self.file_storage = None
203
+
204
+ if self.settings.enable_cloud_storage:
205
+ try:
206
+ from aiecs.infrastructure.persistence.file_storage import FileStorage
207
+
208
+ storage_config = {
209
+ 'gcs_bucket_name': self.settings.gcs_bucket_name,
210
+ 'gcs_project_id': self.settings.gcs_project_id,
211
+ 'enable_local_fallback': True,
212
+ 'local_storage_path': self.settings.temp_dir
213
+ }
214
+
215
+ self.file_storage = FileStorage(storage_config)
216
+ asyncio.create_task(self._init_storage_async())
217
+
218
+ except ImportError:
219
+ self.logger.warning("FileStorage not available, cloud storage disabled")
220
+ except Exception as e:
221
+ self.logger.warning(f"Failed to initialize cloud storage: {e}")
222
+
223
+ async def _init_storage_async(self):
224
+ """Async initialization of file storage"""
225
+ try:
226
+ if self.file_storage:
227
+ await self.file_storage.initialize()
228
+ self.logger.info("Cloud storage initialized successfully")
229
+ except Exception as e:
230
+ self.logger.warning(f"Cloud storage initialization failed: {e}")
231
+ self.file_storage = None
232
+
233
+ def _init_validators(self):
234
+ """Initialize content validators"""
235
+ self.validators = {
236
+ DocumentFormat.JSON: self._validate_json_content,
237
+ DocumentFormat.XML: self._validate_xml_content,
238
+ DocumentFormat.CSV: self._validate_csv_content,
239
+ DocumentFormat.YAML: self._validate_yaml_content,
240
+ DocumentFormat.HTML: self._validate_html_content
241
+ }
242
+
243
+ # Schema definitions
244
+ class WriteDocumentSchema(BaseModel):
245
+ """Schema for write_document operation"""
246
+ target_path: str = Field(description="Target file path (local or cloud)")
247
+ content: Union[str, bytes, Dict, List] = Field(description="Content to write")
248
+ format: DocumentFormat = Field(description="Document format")
249
+ mode: WriteMode = Field(default=WriteMode.CREATE, description="Write mode")
250
+ encoding: EncodingType = Field(default=EncodingType.UTF8, description="Text encoding")
251
+ validation_level: ValidationLevel = Field(default=ValidationLevel.BASIC, description="Validation level")
252
+ metadata: Optional[Dict[str, Any]] = Field(default=None, description="Additional metadata")
253
+ backup_comment: Optional[str] = Field(default=None, description="Backup comment")
254
+
255
+ class BatchWriteSchema(BaseModel):
256
+ """Schema for batch_write_documents operation"""
257
+ write_operations: List[Dict[str, Any]] = Field(description="List of write operations")
258
+ transaction_mode: bool = Field(default=True, description="Use transaction mode")
259
+ rollback_on_error: bool = Field(default=True, description="Rollback on any error")
260
+
261
+ class EditDocumentSchema(BaseModel):
262
+ """Schema for edit_document operation"""
263
+ target_path: str = Field(description="Target file path")
264
+ operation: EditOperation = Field(description="Edit operation to perform")
265
+ content: Optional[str] = Field(default=None, description="Content for the operation")
266
+ position: Optional[Dict[str, Any]] = Field(default=None, description="Position info (line, column, offset)")
267
+ selection: Optional[Dict[str, Any]] = Field(default=None, description="Text selection range")
268
+ format_options: Optional[Dict[str, Any]] = Field(default=None, description="Formatting options")
269
+
270
+ class FormatTextSchema(BaseModel):
271
+ """Schema for format_text operation"""
272
+ target_path: str = Field(description="Target file path")
273
+ text_to_format: str = Field(description="Text to apply formatting to")
274
+ format_type: EditOperation = Field(description="Type of formatting")
275
+ format_options: Optional[Dict[str, Any]] = Field(default=None, description="Additional format options")
276
+
277
+ class FindReplaceSchema(BaseModel):
278
+ """Schema for find_replace operation"""
279
+ target_path: str = Field(description="Target file path")
280
+ find_text: str = Field(description="Text to find")
281
+ replace_text: str = Field(description="Text to replace with")
282
+ replace_all: bool = Field(default=False, description="Replace all occurrences")
283
+ case_sensitive: bool = Field(default=True, description="Case sensitive search")
284
+ regex_mode: bool = Field(default=False, description="Use regex for find/replace")
285
+
286
+ def write_document(self,
287
+ target_path: str,
288
+ content: Union[str, bytes, Dict, List],
289
+ format: DocumentFormat,
290
+ mode: WriteMode = WriteMode.CREATE,
291
+ encoding: EncodingType = EncodingType.UTF8,
292
+ validation_level: ValidationLevel = ValidationLevel.BASIC,
293
+ metadata: Optional[Dict[str, Any]] = None,
294
+ backup_comment: Optional[str] = None) -> Dict[str, Any]:
295
+ """
296
+ Write document with production-grade features
297
+
298
+ Args:
299
+ target_path: Target file path (local or cloud)
300
+ content: Content to write
301
+ format: Document format
302
+ mode: Write mode (create, overwrite, append, update, etc.)
303
+ encoding: Text encoding
304
+ validation_level: Content validation level
305
+ metadata: Additional metadata
306
+ backup_comment: Comment for backup
307
+
308
+ Returns:
309
+ Dict containing write results and metadata
310
+ """
311
+ try:
312
+ start_time = datetime.now()
313
+ operation_id = str(uuid.uuid4())
314
+
315
+ self.logger.info(f"Starting write operation {operation_id}: {target_path}")
316
+
317
+ # Step 1: Validate inputs
318
+ self._validate_write_inputs(target_path, content, format, mode)
319
+
320
+ # Step 2: Prepare content
321
+ processed_content, content_metadata = self._prepare_content(
322
+ content, format, encoding, validation_level
323
+ )
324
+
325
+ # Step 3: Handle write mode logic
326
+ write_plan = self._plan_write_operation(target_path, mode, metadata)
327
+
328
+ # Step 4: Create backup if needed
329
+ backup_info = None
330
+ if self.settings.enable_backup and mode in [WriteMode.OVERWRITE, WriteMode.UPDATE]:
331
+ backup_info = self._create_backup(target_path, backup_comment)
332
+
333
+ # Step 5: Execute atomic write
334
+ write_result = self._execute_atomic_write(
335
+ target_path, processed_content, format, encoding, write_plan
336
+ )
337
+
338
+ # Step 6: Update metadata and versioning
339
+ version_info = self._handle_versioning(target_path, content_metadata, metadata)
340
+
341
+ # Step 7: Audit logging
342
+ audit_info = self._log_write_operation(
343
+ operation_id, target_path, mode, write_result, backup_info
344
+ )
345
+
346
+ result = {
347
+ "operation_id": operation_id,
348
+ "target_path": target_path,
349
+ "write_mode": mode,
350
+ "format": format,
351
+ "encoding": encoding,
352
+ "content_metadata": content_metadata,
353
+ "write_result": write_result,
354
+ "backup_info": backup_info,
355
+ "version_info": version_info,
356
+ "audit_info": audit_info,
357
+ "processing_metadata": {
358
+ "start_time": start_time.isoformat(),
359
+ "end_time": datetime.now().isoformat(),
360
+ "duration": (datetime.now() - start_time).total_seconds()
361
+ }
362
+ }
363
+
364
+ self.logger.info(f"Write operation {operation_id} completed successfully")
365
+ return result
366
+
367
+ except Exception as e:
368
+ self.logger.error(f"Write operation failed for {target_path}: {str(e)}")
369
+ # Rollback if needed
370
+ if 'backup_info' in locals() and backup_info:
371
+ self._rollback_from_backup(target_path, backup_info)
372
+ raise DocumentWriterError(f"Document write failed: {str(e)}")
373
+
374
+ async def write_document_async(self,
375
+ target_path: str,
376
+ content: Union[str, bytes, Dict, List],
377
+ format: DocumentFormat,
378
+ mode: WriteMode = WriteMode.CREATE,
379
+ encoding: EncodingType = EncodingType.UTF8,
380
+ validation_level: ValidationLevel = ValidationLevel.BASIC,
381
+ metadata: Optional[Dict[str, Any]] = None,
382
+ backup_comment: Optional[str] = None) -> Dict[str, Any]:
383
+ """Async version of write_document"""
384
+ return await asyncio.to_thread(
385
+ self.write_document,
386
+ target_path=target_path,
387
+ content=content,
388
+ format=format,
389
+ mode=mode,
390
+ encoding=encoding,
391
+ validation_level=validation_level,
392
+ metadata=metadata,
393
+ backup_comment=backup_comment
394
+ )
395
+
396
+ def batch_write_documents(self,
397
+ write_operations: List[Dict[str, Any]],
398
+ transaction_mode: bool = True,
399
+ rollback_on_error: bool = True) -> Dict[str, Any]:
400
+ """
401
+ Batch write multiple documents with transaction support
402
+
403
+ Args:
404
+ write_operations: List of write operation dictionaries
405
+ transaction_mode: Use transaction mode for atomicity
406
+ rollback_on_error: Rollback all operations on any error
407
+
408
+ Returns:
409
+ Dict containing batch write results
410
+ """
411
+ try:
412
+ start_time = datetime.now()
413
+ batch_id = str(uuid.uuid4())
414
+
415
+ self.logger.info(f"Starting batch write operation {batch_id}: {len(write_operations)} operations")
416
+
417
+ completed_operations = []
418
+ backup_operations = []
419
+
420
+ try:
421
+ for i, operation in enumerate(write_operations):
422
+ self.logger.info(f"Processing operation {i+1}/{len(write_operations)}")
423
+
424
+ # Execute individual write operation
425
+ result = self.write_document(**operation)
426
+ completed_operations.append({
427
+ "index": i,
428
+ "operation": operation,
429
+ "result": result,
430
+ "status": "success"
431
+ })
432
+
433
+ # Track backup info for potential rollback
434
+ if result.get("backup_info"):
435
+ backup_operations.append(result["backup_info"])
436
+
437
+ batch_result = {
438
+ "batch_id": batch_id,
439
+ "total_operations": len(write_operations),
440
+ "successful_operations": len(completed_operations),
441
+ "failed_operations": 0,
442
+ "operations": completed_operations,
443
+ "transaction_mode": transaction_mode,
444
+ "batch_metadata": {
445
+ "start_time": start_time.isoformat(),
446
+ "end_time": datetime.now().isoformat(),
447
+ "duration": (datetime.now() - start_time).total_seconds()
448
+ }
449
+ }
450
+
451
+ self.logger.info(f"Batch write operation {batch_id} completed successfully")
452
+ return batch_result
453
+
454
+ except Exception as e:
455
+ self.logger.error(f"Batch write operation {batch_id} failed: {str(e)}")
456
+
457
+ if rollback_on_error and transaction_mode:
458
+ self.logger.info(f"Rolling back batch operation {batch_id}")
459
+ self._rollback_batch_operations(completed_operations, backup_operations)
460
+
461
+ # Create failure result
462
+ batch_result = {
463
+ "batch_id": batch_id,
464
+ "total_operations": len(write_operations),
465
+ "successful_operations": len(completed_operations),
466
+ "failed_operations": len(write_operations) - len(completed_operations),
467
+ "operations": completed_operations,
468
+ "error": str(e),
469
+ "transaction_mode": transaction_mode,
470
+ "rollback_performed": rollback_on_error and transaction_mode
471
+ }
472
+
473
+ raise DocumentWriterError(f"Batch write operation failed: {str(e)}")
474
+
475
+ except Exception as e:
476
+ raise DocumentWriterError(f"Batch write operation failed: {str(e)}")
477
+
478
+ def _validate_write_inputs(self, target_path: str, content: Any, format: DocumentFormat, mode: WriteMode):
479
+ """Validate write operation inputs"""
480
+ # Path validation
481
+ if not target_path or not isinstance(target_path, str):
482
+ raise ValueError("Invalid target path")
483
+
484
+ # Content validation
485
+ if content is None:
486
+ raise ValueError("Content cannot be None")
487
+
488
+ # Size validation
489
+ content_size = self._calculate_content_size(content)
490
+ if content_size > self.settings.max_file_size:
491
+ raise ValueError(f"Content size {content_size} exceeds maximum {self.settings.max_file_size}")
492
+
493
+ # Permission validation
494
+ if not self._check_write_permission(target_path, mode):
495
+ raise WritePermissionError(f"No write permission for {target_path}")
496
+
497
+ def _prepare_content(self, content: Any, format: DocumentFormat,
498
+ encoding: EncodingType, validation_level: ValidationLevel) -> Tuple[Union[str, bytes], Dict]:
499
+ """Prepare and validate content for writing"""
500
+
501
+ # Content conversion based on format
502
+ if format == DocumentFormat.JSON:
503
+ if isinstance(content, (dict, list)):
504
+ processed_content = json.dumps(content, ensure_ascii=False, indent=2)
505
+ else:
506
+ processed_content = str(content)
507
+ elif format == DocumentFormat.CSV:
508
+ processed_content = self._convert_to_csv(content)
509
+ elif format == DocumentFormat.XML:
510
+ processed_content = self._convert_to_xml(content)
511
+ elif format == DocumentFormat.YAML:
512
+ processed_content = self._convert_to_yaml(content)
513
+ elif format == DocumentFormat.HTML:
514
+ processed_content = self._convert_to_html(content)
515
+ elif format == DocumentFormat.MARKDOWN:
516
+ processed_content = self._convert_to_markdown(content)
517
+ elif format == DocumentFormat.BINARY:
518
+ if isinstance(content, bytes):
519
+ processed_content = content
520
+ else:
521
+ processed_content = str(content).encode(encoding.value)
522
+ else:
523
+ processed_content = str(content)
524
+
525
+ # Content validation
526
+ if self.settings.enable_content_validation:
527
+ self._validate_content(processed_content, format, validation_level)
528
+
529
+ # Calculate metadata
530
+ content_metadata = {
531
+ "original_type": type(content).__name__,
532
+ "processed_size": len(processed_content) if isinstance(processed_content, (str, bytes)) else 0,
533
+ "format": format,
534
+ "encoding": encoding,
535
+ "checksum": self._calculate_checksum(processed_content),
536
+ "validation_level": validation_level,
537
+ "timestamp": datetime.now().isoformat()
538
+ }
539
+
540
+ return processed_content, content_metadata
541
+
542
+ def _plan_write_operation(self, target_path: str, mode: WriteMode, metadata: Optional[Dict]) -> Dict:
543
+ """Plan the write operation based on mode and target"""
544
+
545
+ plan = {
546
+ "target_path": target_path,
547
+ "mode": mode,
548
+ "file_exists": self._file_exists(target_path),
549
+ "is_cloud_path": self._is_cloud_storage_path(target_path),
550
+ "requires_backup": False,
551
+ "requires_versioning": False,
552
+ "atomic_operation": self.settings.atomic_write
553
+ }
554
+
555
+ if mode == WriteMode.CREATE and plan["file_exists"]:
556
+ raise DocumentWriterError(f"File already exists: {target_path}")
557
+
558
+ if mode in [WriteMode.OVERWRITE, WriteMode.UPDATE] and plan["file_exists"]:
559
+ plan["requires_backup"] = self.settings.enable_backup
560
+ plan["requires_versioning"] = self.settings.enable_versioning
561
+
562
+ if mode == WriteMode.APPEND and not plan["file_exists"]:
563
+ # Convert to CREATE mode
564
+ plan["mode"] = WriteMode.CREATE
565
+
566
+ return plan
567
+
568
+ def _create_backup(self, target_path: str, comment: Optional[str] = None) -> Dict:
569
+ """Create backup of existing file"""
570
+ if not self._file_exists(target_path):
571
+ return None
572
+
573
+ try:
574
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
575
+ file_stem = Path(target_path).stem
576
+ file_suffix = Path(target_path).suffix
577
+
578
+ backup_filename = f"{file_stem}_backup_{timestamp}{file_suffix}"
579
+ backup_path = os.path.join(self.settings.backup_dir, backup_filename)
580
+
581
+ # Copy file to backup location
582
+ if self._is_cloud_storage_path(target_path):
583
+ backup_path = self._backup_cloud_file(target_path, backup_path)
584
+ else:
585
+ shutil.copy2(target_path, backup_path)
586
+
587
+ backup_info = {
588
+ "original_path": target_path,
589
+ "backup_path": backup_path,
590
+ "timestamp": timestamp,
591
+ "comment": comment,
592
+ "checksum": self._calculate_file_checksum(target_path)
593
+ }
594
+
595
+ self.logger.info(f"Created backup: {backup_path}")
596
+ return backup_info
597
+
598
+ except Exception as e:
599
+ self.logger.error(f"Failed to create backup for {target_path}: {e}")
600
+ raise StorageError(f"Backup creation failed: {e}")
601
+
602
+ def _execute_atomic_write(self, target_path: str, content: Union[str, bytes],
603
+ format: DocumentFormat, encoding: EncodingType, plan: Dict) -> Dict:
604
+ """Execute atomic write operation"""
605
+
606
+ if plan["is_cloud_path"]:
607
+ return self._write_to_cloud_storage(target_path, content, format, encoding, plan)
608
+ else:
609
+ return self._write_to_local_file(target_path, content, format, encoding, plan)
610
+
611
+ def _write_to_local_file(self, target_path: str, content: Union[str, bytes],
612
+ format: DocumentFormat, encoding: EncodingType, plan: Dict) -> Dict:
613
+ """Write to local file system with atomic operation"""
614
+
615
+ try:
616
+ # Create parent directories
617
+ os.makedirs(os.path.dirname(target_path), exist_ok=True)
618
+
619
+ if plan["atomic_operation"]:
620
+ # Atomic write using temporary file
621
+ temp_path = f"{target_path}.tmp.{uuid.uuid4().hex}"
622
+
623
+ try:
624
+ if plan["mode"] == WriteMode.APPEND and plan["file_exists"]:
625
+ # Read existing content first
626
+ with open(target_path, 'rb') as f:
627
+ existing_content = f.read()
628
+
629
+ if isinstance(content, str):
630
+ content = existing_content.decode(encoding.value) + content
631
+ else:
632
+ content = existing_content + content
633
+
634
+ # Write to temporary file
635
+ if isinstance(content, bytes):
636
+ with open(temp_path, 'wb') as f:
637
+ f.write(content)
638
+ else:
639
+ # Handle both EncodingType enum and string
640
+ enc_value = encoding.value if hasattr(encoding, 'value') else str(encoding)
641
+ with open(temp_path, 'w', encoding=enc_value) as f:
642
+ f.write(content)
643
+
644
+ # Atomic move
645
+ shutil.move(temp_path, target_path)
646
+
647
+ finally:
648
+ # Cleanup temp file if it still exists
649
+ if os.path.exists(temp_path):
650
+ os.unlink(temp_path)
651
+ else:
652
+ # Direct write
653
+ mode_map = {
654
+ WriteMode.CREATE: 'w',
655
+ WriteMode.OVERWRITE: 'w',
656
+ WriteMode.APPEND: 'a',
657
+ WriteMode.UPDATE: 'w'
658
+ }
659
+
660
+ file_mode = mode_map.get(plan["mode"], 'w')
661
+ if isinstance(content, bytes):
662
+ file_mode += 'b'
663
+
664
+ # Handle both EncodingType enum and string
665
+ enc_value = None if isinstance(content, bytes) else (encoding.value if hasattr(encoding, 'value') else str(encoding))
666
+ with open(target_path, file_mode, encoding=enc_value) as f:
667
+ f.write(content)
668
+
669
+ # Get file stats
670
+ stat = os.stat(target_path)
671
+
672
+ return {
673
+ "path": target_path,
674
+ "size": stat.st_size,
675
+ "checksum": self._calculate_file_checksum(target_path),
676
+ "modified_time": datetime.fromtimestamp(stat.st_mtime).isoformat(),
677
+ "atomic_write": plan["atomic_operation"]
678
+ }
679
+
680
+ except Exception as e:
681
+ raise StorageError(f"Local file write failed: {e}")
682
+
683
+ async def _write_to_cloud_storage(self, target_path: str, content: Union[str, bytes],
684
+ format: DocumentFormat, encoding: EncodingType, plan: Dict) -> Dict:
685
+ """Write to cloud storage"""
686
+
687
+ if not self.file_storage:
688
+ raise StorageError("Cloud storage not available")
689
+
690
+ try:
691
+ storage_path = self._parse_cloud_storage_path(target_path)
692
+
693
+ # Handle append mode for cloud storage
694
+ if plan["mode"] == WriteMode.APPEND and plan["file_exists"]:
695
+ existing_content = await self.file_storage.retrieve(storage_path)
696
+ if isinstance(content, str) and isinstance(existing_content, str):
697
+ content = existing_content + content
698
+ elif isinstance(content, bytes) and isinstance(existing_content, bytes):
699
+ content = existing_content + content
700
+
701
+ # Store in cloud storage
702
+ await self.file_storage.store(storage_path, content)
703
+
704
+ return {
705
+ "path": target_path,
706
+ "storage_path": storage_path,
707
+ "size": len(content) if isinstance(content, (str, bytes)) else 0,
708
+ "checksum": self._calculate_checksum(content),
709
+ "cloud_storage": True
710
+ }
711
+
712
+ except Exception as e:
713
+ raise StorageError(f"Cloud storage write failed: {e}")
714
+
715
+ def _handle_versioning(self, target_path: str, content_metadata: Dict, metadata: Optional[Dict]) -> Optional[Dict]:
716
+ """Handle document versioning"""
717
+
718
+ if not self.settings.enable_versioning:
719
+ return None
720
+
721
+ try:
722
+ version_info = {
723
+ "path": target_path,
724
+ "version": self._get_next_version(target_path),
725
+ "timestamp": datetime.now().isoformat(),
726
+ "content_metadata": content_metadata,
727
+ "user_metadata": metadata or {}
728
+ }
729
+
730
+ # Store version info
731
+ version_file = f"{target_path}.versions.json"
732
+ versions = self._load_version_history(version_file)
733
+ versions.append(version_info)
734
+
735
+ # Keep only recent versions
736
+ if len(versions) > self.settings.max_backup_versions:
737
+ versions = versions[-self.settings.max_backup_versions:]
738
+
739
+ self._save_version_history(version_file, versions)
740
+
741
+ return version_info
742
+
743
+ except Exception as e:
744
+ self.logger.warning(f"Versioning failed for {target_path}: {e}")
745
+ return None
746
+
747
+ def _validate_content(self, content: Union[str, bytes], format: DocumentFormat,
748
+ validation_level: ValidationLevel):
749
+ """Validate content based on format and validation level"""
750
+
751
+ if validation_level == ValidationLevel.NONE:
752
+ return
753
+
754
+ try:
755
+ # Format-specific validation
756
+ if format in self.validators:
757
+ self.validators[format](content, validation_level)
758
+
759
+ # Security validation for enterprise level
760
+ if validation_level == ValidationLevel.ENTERPRISE:
761
+ self._security_scan_content(content)
762
+
763
+ except Exception as e:
764
+ raise ContentValidationError(f"Content validation failed: {e}")
765
+
766
+ def _validate_json_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
767
+ """Validate JSON content"""
768
+ try:
769
+ if isinstance(content, bytes):
770
+ content = content.decode('utf-8')
771
+ json.loads(content)
772
+ except json.JSONDecodeError as e:
773
+ raise ContentValidationError(f"Invalid JSON: {e}")
774
+
775
+ def _validate_xml_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
776
+ """Validate XML content"""
777
+ try:
778
+ import xml.etree.ElementTree as ET
779
+ if isinstance(content, bytes):
780
+ content = content.decode('utf-8')
781
+ ET.fromstring(content)
782
+ except ET.ParseError as e:
783
+ raise ContentValidationError(f"Invalid XML: {e}")
784
+
785
+ def _validate_csv_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
786
+ """Validate CSV content"""
787
+ try:
788
+ import csv
789
+ import io
790
+ if isinstance(content, bytes):
791
+ content = content.decode('utf-8')
792
+ csv.reader(io.StringIO(content))
793
+ except Exception as e:
794
+ raise ContentValidationError(f"Invalid CSV: {e}")
795
+
796
+ def _validate_yaml_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
797
+ """Validate YAML content"""
798
+ try:
799
+ import yaml
800
+ if isinstance(content, bytes):
801
+ content = content.decode('utf-8')
802
+ yaml.safe_load(content)
803
+ except yaml.YAMLError as e:
804
+ raise ContentValidationError(f"Invalid YAML: {e}")
805
+
806
+ def _validate_html_content(self, content: Union[str, bytes], validation_level: ValidationLevel):
807
+ """Validate HTML content"""
808
+ try:
809
+ from bs4 import BeautifulSoup
810
+ if isinstance(content, bytes):
811
+ content = content.decode('utf-8')
812
+ BeautifulSoup(content, 'html.parser')
813
+ except Exception as e:
814
+ raise ContentValidationError(f"Invalid HTML: {e}")
815
+
816
+ def _security_scan_content(self, content: Union[str, bytes]):
817
+ """Perform security scan on content"""
818
+ if isinstance(content, bytes):
819
+ content = content.decode('utf-8', errors='ignore')
820
+
821
+ # Check for suspicious patterns
822
+ suspicious_patterns = [
823
+ r'<script[^>]*>', # JavaScript
824
+ r'javascript:', # JavaScript URLs
825
+ r'vbscript:', # VBScript URLs
826
+ r'data:.*base64', # Base64 data URLs
827
+ r'eval\s*\(', # eval() calls
828
+ r'exec\s*\(', # exec() calls
829
+ ]
830
+
831
+ import re
832
+ for pattern in suspicious_patterns:
833
+ if re.search(pattern, content, re.IGNORECASE):
834
+ raise ContentValidationError(f"Security scan failed: suspicious pattern detected")
835
+
836
+ # Helper methods
837
+ def _calculate_content_size(self, content: Any) -> int:
838
+ """Calculate content size in bytes"""
839
+ if isinstance(content, bytes):
840
+ return len(content)
841
+ elif isinstance(content, str):
842
+ return len(content.encode('utf-8'))
843
+ else:
844
+ return len(str(content).encode('utf-8'))
845
+
846
+ def _calculate_checksum(self, content: Union[str, bytes]) -> str:
847
+ """Calculate content checksum"""
848
+ if isinstance(content, str):
849
+ content = content.encode('utf-8')
850
+ return hashlib.sha256(content).hexdigest()
851
+
852
+ def _calculate_file_checksum(self, file_path: str) -> str:
853
+ """Calculate file checksum"""
854
+ hash_sha256 = hashlib.sha256()
855
+ with open(file_path, "rb") as f:
856
+ for chunk in iter(lambda: f.read(4096), b""):
857
+ hash_sha256.update(chunk)
858
+ return hash_sha256.hexdigest()
859
+
860
+ def _check_write_permission(self, target_path: str, mode: WriteMode) -> bool:
861
+ """Check write permission for target path"""
862
+ try:
863
+ if self._is_cloud_storage_path(target_path):
864
+ return self.file_storage is not None
865
+
866
+ parent_dir = os.path.dirname(target_path)
867
+ if not os.path.exists(parent_dir):
868
+ # Check if we can create the directory
869
+ return os.access(os.path.dirname(parent_dir), os.W_OK)
870
+
871
+ if os.path.exists(target_path):
872
+ return os.access(target_path, os.W_OK)
873
+ else:
874
+ return os.access(parent_dir, os.W_OK)
875
+
876
+ except Exception:
877
+ return False
878
+
879
+ def _file_exists(self, file_path: str) -> bool:
880
+ """Check if file exists (local or cloud)"""
881
+ if self._is_cloud_storage_path(file_path):
882
+ # For cloud storage, we'd need to implement exists check
883
+ return False # Simplified for now
884
+ else:
885
+ return os.path.exists(file_path)
886
+
887
+ def _is_cloud_storage_path(self, source: str) -> bool:
888
+ """Check if source is a cloud storage path"""
889
+ cloud_schemes = ['gs', 's3', 'azure', 'cloud']
890
+ try:
891
+ from urllib.parse import urlparse
892
+ parsed = urlparse(source)
893
+ return parsed.scheme in cloud_schemes
894
+ except:
895
+ return False
896
+
897
+ def _parse_cloud_storage_path(self, source: str) -> str:
898
+ """Parse cloud storage path to get storage key"""
899
+ try:
900
+ from urllib.parse import urlparse
901
+ parsed = urlparse(source)
902
+ return parsed.path.lstrip('/')
903
+ except Exception:
904
+ return source
905
+
906
+ # Content conversion methods
907
+ def _convert_to_csv(self, content: Any) -> str:
908
+ """Convert content to CSV format"""
909
+ import csv
910
+ import io
911
+
912
+ output = io.StringIO()
913
+ writer = csv.writer(output)
914
+
915
+ if isinstance(content, list):
916
+ for row in content:
917
+ if isinstance(row, (list, tuple)):
918
+ writer.writerow(row)
919
+ else:
920
+ writer.writerow([row])
921
+ elif isinstance(content, dict):
922
+ # Convert dict to CSV with headers
923
+ if content:
924
+ headers = list(content.keys())
925
+ writer.writerow(headers)
926
+ writer.writerow([content[h] for h in headers])
927
+ else:
928
+ writer.writerow([str(content)])
929
+
930
+ return output.getvalue()
931
+
932
+ def _convert_to_xml(self, content: Any) -> str:
933
+ """Convert content to XML format"""
934
+ import xml.etree.ElementTree as ET
935
+
936
+ if isinstance(content, dict):
937
+ root = ET.Element("document")
938
+ for key, value in content.items():
939
+ elem = ET.SubElement(root, str(key))
940
+ elem.text = str(value)
941
+ return ET.tostring(root, encoding='unicode')
942
+ else:
943
+ root = ET.Element("document")
944
+ root.text = str(content)
945
+ return ET.tostring(root, encoding='unicode')
946
+
947
+ def _convert_to_yaml(self, content: Any) -> str:
948
+ """Convert content to YAML format"""
949
+ try:
950
+ import yaml
951
+ return yaml.dump(content, default_flow_style=False, allow_unicode=True)
952
+ except ImportError:
953
+ # Fallback to simple string representation
954
+ return str(content)
955
+
956
+ def _convert_to_html(self, content: Any) -> str:
957
+ """Convert content to HTML format"""
958
+ if isinstance(content, dict):
959
+ html = "<html><body>\n"
960
+ for key, value in content.items():
961
+ html += f"<h3>{key}</h3>\n<p>{value}</p>\n"
962
+ html += "</body></html>"
963
+ return html
964
+ else:
965
+ return f"<html><body><pre>{str(content)}</pre></body></html>"
966
+
967
+ def _convert_to_markdown(self, content: Any) -> str:
968
+ """Convert content to Markdown format"""
969
+ if isinstance(content, dict):
970
+ md = ""
971
+ for key, value in content.items():
972
+ md += f"## {key}\n\n{value}\n\n"
973
+ return md
974
+ else:
975
+ return str(content)
976
+
977
+ # Versioning methods
978
+ def _get_next_version(self, file_path: str) -> int:
979
+ """Get next version number for file"""
980
+ version_file = f"{file_path}.versions.json"
981
+ versions = self._load_version_history(version_file)
982
+ return len(versions) + 1
983
+
984
+ def _load_version_history(self, version_file: str) -> List[Dict]:
985
+ """Load version history from file"""
986
+ try:
987
+ if os.path.exists(version_file):
988
+ with open(version_file, 'r') as f:
989
+ return json.load(f)
990
+ except Exception:
991
+ pass
992
+ return []
993
+
994
+ def _save_version_history(self, version_file: str, versions: List[Dict]):
995
+ """Save version history to file"""
996
+ try:
997
+ with open(version_file, 'w') as f:
998
+ json.dump(versions, f, indent=2)
999
+ except Exception as e:
1000
+ self.logger.warning(f"Failed to save version history: {e}")
1001
+
1002
+ # Backup and rollback methods
1003
+ def _backup_cloud_file(self, source_path: str, backup_path: str) -> str:
1004
+ """Backup cloud file"""
1005
+ # Simplified implementation
1006
+ return backup_path
1007
+
1008
+ def _rollback_from_backup(self, target_path: str, backup_info: Dict):
1009
+ """Rollback file from backup"""
1010
+ try:
1011
+ if backup_info and os.path.exists(backup_info["backup_path"]):
1012
+ shutil.copy2(backup_info["backup_path"], target_path)
1013
+ self.logger.info(f"Rolled back {target_path} from backup")
1014
+ except Exception as e:
1015
+ self.logger.error(f"Rollback failed: {e}")
1016
+
1017
+ def _rollback_batch_operations(self, completed_operations: List[Dict], backup_operations: List[Dict]):
1018
+ """Rollback batch operations"""
1019
+ for op in reversed(completed_operations):
1020
+ try:
1021
+ result = op.get("result", {})
1022
+ backup_info = result.get("backup_info")
1023
+ if backup_info:
1024
+ self._rollback_from_backup(
1025
+ result["write_result"]["path"],
1026
+ backup_info
1027
+ )
1028
+ except Exception as e:
1029
+ self.logger.error(f"Batch rollback failed for operation: {e}")
1030
+
1031
+ def _log_write_operation(self, operation_id: str, target_path: str, mode: WriteMode,
1032
+ write_result: Dict, backup_info: Optional[Dict]) -> Dict:
1033
+ """Log write operation for audit"""
1034
+ audit_info = {
1035
+ "operation_id": operation_id,
1036
+ "timestamp": datetime.now().isoformat(),
1037
+ "target_path": target_path,
1038
+ "mode": mode,
1039
+ "success": True,
1040
+ "file_size": write_result.get("size", 0),
1041
+ "checksum": write_result.get("checksum"),
1042
+ "backup_created": backup_info is not None
1043
+ }
1044
+
1045
+ # Log to audit file
1046
+ try:
1047
+ audit_file = os.path.join(self.settings.temp_dir, "write_audit.log")
1048
+ with open(audit_file, "a") as f:
1049
+ f.write(json.dumps(audit_info) + "\n")
1050
+ except Exception as e:
1051
+ self.logger.warning(f"Audit logging failed: {e}")
1052
+
1053
+ return audit_info
1054
+
1055
+ def edit_document(self,
1056
+ target_path: str,
1057
+ operation: EditOperation,
1058
+ content: Optional[str] = None,
1059
+ position: Optional[Dict[str, Any]] = None,
1060
+ selection: Optional[Dict[str, Any]] = None,
1061
+ format_options: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
1062
+ """
1063
+ Perform advanced editing operations on documents
1064
+
1065
+ Args:
1066
+ target_path: Target file path
1067
+ operation: Edit operation to perform
1068
+ content: Content for the operation (if applicable)
1069
+ position: Position info (line, column, offset)
1070
+ selection: Text selection range
1071
+ format_options: Additional format options
1072
+
1073
+ Returns:
1074
+ Dict containing edit results
1075
+ """
1076
+ try:
1077
+ start_time = datetime.now()
1078
+ operation_id = str(uuid.uuid4())
1079
+
1080
+ self.logger.info(f"Starting edit operation {operation_id}: {operation} on {target_path}")
1081
+
1082
+ # Read current document content
1083
+ current_content = self._read_document_content(target_path)
1084
+
1085
+ # Perform the specific edit operation
1086
+ if operation == EditOperation.INSERT_TEXT:
1087
+ edited_content = self._insert_text(current_content, content, position)
1088
+ elif operation == EditOperation.DELETE_TEXT:
1089
+ edited_content = self._delete_text(current_content, selection)
1090
+ elif operation == EditOperation.REPLACE_TEXT:
1091
+ edited_content = self._replace_text(current_content, selection, content)
1092
+ elif operation == EditOperation.BOLD:
1093
+ edited_content = self._format_text_bold(current_content, selection, format_options)
1094
+ elif operation == EditOperation.ITALIC:
1095
+ edited_content = self._format_text_italic(current_content, selection, format_options)
1096
+ elif operation == EditOperation.UNDERLINE:
1097
+ edited_content = self._format_text_underline(current_content, selection, format_options)
1098
+ elif operation == EditOperation.STRIKETHROUGH:
1099
+ edited_content = self._format_text_strikethrough(current_content, selection, format_options)
1100
+ elif operation == EditOperation.HIGHLIGHT:
1101
+ edited_content = self._format_text_highlight(current_content, selection, format_options)
1102
+ elif operation == EditOperation.INSERT_LINE:
1103
+ edited_content = self._insert_line(current_content, position, content)
1104
+ elif operation == EditOperation.DELETE_LINE:
1105
+ edited_content = self._delete_line(current_content, position)
1106
+ elif operation == EditOperation.MOVE_LINE:
1107
+ edited_content = self._move_line(current_content, position, format_options)
1108
+ elif operation == EditOperation.COPY_TEXT:
1109
+ return self._copy_text(current_content, selection)
1110
+ elif operation == EditOperation.CUT_TEXT:
1111
+ edited_content, cut_content = self._cut_text(current_content, selection)
1112
+ # Store cut content in clipboard
1113
+ self._store_clipboard_content(cut_content)
1114
+ elif operation == EditOperation.PASTE_TEXT:
1115
+ clipboard_content = self._get_clipboard_content()
1116
+ edited_content = self._paste_text(current_content, position, clipboard_content)
1117
+ else:
1118
+ raise ValueError(f"Unsupported edit operation: {operation}")
1119
+
1120
+ # Write the edited content back to file
1121
+ file_format = self._detect_file_format(target_path)
1122
+ write_result = self.write_document(
1123
+ target_path=target_path,
1124
+ content=edited_content,
1125
+ format=file_format,
1126
+ mode="backup_write", # Always backup before editing
1127
+ backup_comment=f"Edit operation: {operation}"
1128
+ )
1129
+
1130
+ result = {
1131
+ "operation_id": operation_id,
1132
+ "target_path": target_path,
1133
+ "operation": operation,
1134
+ "edit_metadata": {
1135
+ "original_size": len(current_content),
1136
+ "edited_size": len(edited_content) if isinstance(edited_content, str) else 0,
1137
+ "position": position,
1138
+ "selection": selection
1139
+ },
1140
+ "write_result": write_result,
1141
+ "processing_metadata": {
1142
+ "start_time": start_time.isoformat(),
1143
+ "end_time": datetime.now().isoformat(),
1144
+ "duration": (datetime.now() - start_time).total_seconds()
1145
+ }
1146
+ }
1147
+
1148
+ self.logger.info(f"Edit operation {operation_id} completed successfully")
1149
+ return result
1150
+
1151
+ except Exception as e:
1152
+ raise DocumentWriterError(f"Edit operation failed: {str(e)}")
1153
+
1154
+ def format_text(self,
1155
+ target_path: str,
1156
+ text_to_format: str,
1157
+ format_type: EditOperation,
1158
+ format_options: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
1159
+ """
1160
+ Apply formatting to specific text in a document
1161
+
1162
+ Args:
1163
+ target_path: Target file path
1164
+ text_to_format: Text to apply formatting to
1165
+ format_type: Type of formatting (bold, italic, etc.)
1166
+ format_options: Additional format options
1167
+
1168
+ Returns:
1169
+ Dict containing formatting results
1170
+ """
1171
+ try:
1172
+ current_content = self._read_document_content(target_path)
1173
+
1174
+ # Find all occurrences of the text
1175
+ formatted_content = self._apply_text_formatting(
1176
+ current_content, text_to_format, format_type, format_options
1177
+ )
1178
+
1179
+ # Write back to file
1180
+ file_format = self._detect_file_format(target_path)
1181
+ write_result = self.write_document(
1182
+ target_path=target_path,
1183
+ content=formatted_content,
1184
+ format=file_format,
1185
+ mode="backup_write"
1186
+ )
1187
+
1188
+ return {
1189
+ "target_path": target_path,
1190
+ "text_formatted": text_to_format,
1191
+ "format_type": format_type,
1192
+ "write_result": write_result
1193
+ }
1194
+
1195
+ except Exception as e:
1196
+ raise DocumentWriterError(f"Text formatting failed: {str(e)}")
1197
+
1198
+ def find_replace(self,
1199
+ target_path: str,
1200
+ find_text: str,
1201
+ replace_text: str,
1202
+ replace_all: bool = False,
1203
+ case_sensitive: bool = True,
1204
+ regex_mode: bool = False) -> Dict[str, Any]:
1205
+ """
1206
+ Find and replace text in a document
1207
+
1208
+ Args:
1209
+ target_path: Target file path
1210
+ find_text: Text to find
1211
+ replace_text: Text to replace with
1212
+ replace_all: Replace all occurrences
1213
+ case_sensitive: Case sensitive search
1214
+ regex_mode: Use regex for find/replace
1215
+
1216
+ Returns:
1217
+ Dict containing find/replace results
1218
+ """
1219
+ try:
1220
+ current_content = self._read_document_content(target_path)
1221
+
1222
+ # Perform find and replace
1223
+ new_content, replacements = self._perform_find_replace(
1224
+ current_content, find_text, replace_text,
1225
+ replace_all, case_sensitive, regex_mode
1226
+ )
1227
+
1228
+ if replacements > 0:
1229
+ # Write back to file
1230
+ file_format = self._detect_file_format(target_path)
1231
+ write_result = self.write_document(
1232
+ target_path=target_path,
1233
+ content=new_content,
1234
+ format=file_format,
1235
+ mode="backup_write",
1236
+ backup_comment=f"Find/Replace: '{find_text}' -> '{replace_text}'"
1237
+ )
1238
+
1239
+ return {
1240
+ "target_path": target_path,
1241
+ "find_text": find_text,
1242
+ "replace_text": replace_text,
1243
+ "replacements_made": replacements,
1244
+ "write_result": write_result
1245
+ }
1246
+ else:
1247
+ return {
1248
+ "target_path": target_path,
1249
+ "find_text": find_text,
1250
+ "replace_text": replace_text,
1251
+ "replacements_made": 0,
1252
+ "message": "No matches found"
1253
+ }
1254
+
1255
+ except Exception as e:
1256
+ raise DocumentWriterError(f"Find/replace operation failed: {str(e)}")
1257
+
1258
+ # Helper methods for editing operations
1259
+ def _read_document_content(self, file_path: str) -> str:
1260
+ """Read document content for editing"""
1261
+ try:
1262
+ with open(file_path, 'r', encoding='utf-8') as f:
1263
+ return f.read()
1264
+ except UnicodeDecodeError:
1265
+ # Try with different encodings
1266
+ for encoding in ['gbk', 'latin1', 'cp1252']:
1267
+ try:
1268
+ with open(file_path, 'r', encoding=encoding) as f:
1269
+ return f.read()
1270
+ except:
1271
+ continue
1272
+ raise DocumentWriterError(f"Cannot decode file: {file_path}")
1273
+ except Exception as e:
1274
+ raise DocumentWriterError(f"Cannot read file {file_path}: {str(e)}")
1275
+
1276
+ def _detect_file_format(self, file_path: str) -> str:
1277
+ """Detect file format from extension"""
1278
+ ext = os.path.splitext(file_path)[1].lower()
1279
+ format_map = {
1280
+ '.txt': 'txt', '.json': 'json', '.csv': 'csv',
1281
+ '.xml': 'xml', '.html': 'html', '.htm': 'html',
1282
+ '.md': 'markdown', '.markdown': 'markdown',
1283
+ '.yaml': 'yaml', '.yml': 'yaml'
1284
+ }
1285
+ return format_map.get(ext, 'txt')
1286
+
1287
+ def _insert_text(self, content: str, text: str, position: Optional[Dict[str, Any]]) -> str:
1288
+ """Insert text at specified position"""
1289
+ if not position:
1290
+ return content + text
1291
+
1292
+ if 'offset' in position:
1293
+ offset = position['offset']
1294
+ return content[:offset] + text + content[offset:]
1295
+ elif 'line' in position:
1296
+ lines = content.split('\n')
1297
+ line_num = position.get('line', 0)
1298
+ column = position.get('column', 0)
1299
+
1300
+ if line_num < len(lines):
1301
+ line = lines[line_num]
1302
+ lines[line_num] = line[:column] + text + line[column:]
1303
+ else:
1304
+ lines.append(text)
1305
+ return '\n'.join(lines)
1306
+ else:
1307
+ return content + text
1308
+
1309
+ def _delete_text(self, content: str, selection: Optional[Dict[str, Any]]) -> str:
1310
+ """Delete text in specified selection"""
1311
+ if not selection:
1312
+ return content
1313
+
1314
+ if 'start_offset' in selection and 'end_offset' in selection:
1315
+ start = selection['start_offset']
1316
+ end = selection['end_offset']
1317
+ return content[:start] + content[end:]
1318
+ elif 'start_line' in selection and 'end_line' in selection:
1319
+ lines = content.split('\n')
1320
+ start_line = selection['start_line']
1321
+ end_line = selection['end_line']
1322
+ start_col = selection.get('start_column', 0)
1323
+ end_col = selection.get('end_column', len(lines[end_line]) if end_line < len(lines) else 0)
1324
+
1325
+ if start_line == end_line:
1326
+ # Same line deletion
1327
+ line = lines[start_line]
1328
+ lines[start_line] = line[:start_col] + line[end_col:]
1329
+ else:
1330
+ # Multi-line deletion
1331
+ lines[start_line] = lines[start_line][:start_col]
1332
+ if end_line < len(lines):
1333
+ lines[start_line] += lines[end_line][end_col:]
1334
+ del lines[start_line + 1:end_line + 1]
1335
+
1336
+ return '\n'.join(lines)
1337
+
1338
+ return content
1339
+
1340
+ def _replace_text(self, content: str, selection: Optional[Dict[str, Any]], replacement: str) -> str:
1341
+ """Replace text in specified selection"""
1342
+ if not selection:
1343
+ return content
1344
+
1345
+ # First delete the selected text, then insert replacement
1346
+ content_after_delete = self._delete_text(content, selection)
1347
+
1348
+ # Calculate new insertion position after deletion
1349
+ if 'start_offset' in selection:
1350
+ insert_pos = {'offset': selection['start_offset']}
1351
+ elif 'start_line' in selection:
1352
+ insert_pos = {
1353
+ 'line': selection['start_line'],
1354
+ 'column': selection.get('start_column', 0)
1355
+ }
1356
+ else:
1357
+ insert_pos = None
1358
+
1359
+ return self._insert_text(content_after_delete, replacement, insert_pos)
1360
+
1361
+ def _format_text_bold(self, content: str, selection: Optional[Dict[str, Any]], options: Optional[Dict[str, Any]]) -> str:
1362
+ """Apply bold formatting to selected text"""
1363
+ if not selection:
1364
+ return content
1365
+
1366
+ format_type = options.get('format_type', 'markdown') if options else 'markdown'
1367
+
1368
+ if format_type == 'markdown':
1369
+ return self._apply_markdown_formatting(content, selection, '**', '**')
1370
+ elif format_type == 'html':
1371
+ return self._apply_html_formatting(content, selection, '<strong>', '</strong>')
1372
+ else:
1373
+ return content
1374
+
1375
+ def _format_text_italic(self, content: str, selection: Optional[Dict[str, Any]], options: Optional[Dict[str, Any]]) -> str:
1376
+ """Apply italic formatting to selected text"""
1377
+ if not selection:
1378
+ return content
1379
+
1380
+ format_type = options.get('format_type', 'markdown') if options else 'markdown'
1381
+
1382
+ if format_type == 'markdown':
1383
+ return self._apply_markdown_formatting(content, selection, '*', '*')
1384
+ elif format_type == 'html':
1385
+ return self._apply_html_formatting(content, selection, '<em>', '</em>')
1386
+ else:
1387
+ return content
1388
+
1389
+ def _format_text_underline(self, content: str, selection: Optional[Dict[str, Any]], options: Optional[Dict[str, Any]]) -> str:
1390
+ """Apply underline formatting to selected text"""
1391
+ if not selection:
1392
+ return content
1393
+
1394
+ format_type = options.get('format_type', 'html') if options else 'html'
1395
+
1396
+ if format_type == 'html':
1397
+ return self._apply_html_formatting(content, selection, '<u>', '</u>')
1398
+ else:
1399
+ return content
1400
+
1401
+ def _format_text_strikethrough(self, content: str, selection: Optional[Dict[str, Any]], options: Optional[Dict[str, Any]]) -> str:
1402
+ """Apply strikethrough formatting to selected text"""
1403
+ if not selection:
1404
+ return content
1405
+
1406
+ format_type = options.get('format_type', 'markdown') if options else 'markdown'
1407
+
1408
+ if format_type == 'markdown':
1409
+ return self._apply_markdown_formatting(content, selection, '~~', '~~')
1410
+ elif format_type == 'html':
1411
+ return self._apply_html_formatting(content, selection, '<del>', '</del>')
1412
+ else:
1413
+ return content
1414
+
1415
+ def _format_text_highlight(self, content: str, selection: Optional[Dict[str, Any]], options: Optional[Dict[str, Any]]) -> str:
1416
+ """Apply highlight formatting to selected text"""
1417
+ if not selection:
1418
+ return content
1419
+
1420
+ format_type = options.get('format_type', 'html') if options else 'html'
1421
+ color = options.get('color', 'yellow') if options else 'yellow'
1422
+
1423
+ if format_type == 'html':
1424
+ return self._apply_html_formatting(content, selection, f'<mark style="background-color: {color}">', '</mark>')
1425
+ elif format_type == 'markdown':
1426
+ return self._apply_markdown_formatting(content, selection, '==', '==')
1427
+ else:
1428
+ return content
1429
+
1430
+ def _apply_markdown_formatting(self, content: str, selection: Dict[str, Any], start_marker: str, end_marker: str) -> str:
1431
+ """Apply markdown formatting to selected text"""
1432
+ selected_text = self._extract_selected_text(content, selection)
1433
+ formatted_text = start_marker + selected_text + end_marker
1434
+ return self._replace_text(content, selection, formatted_text)
1435
+
1436
+ def _apply_html_formatting(self, content: str, selection: Dict[str, Any], start_tag: str, end_tag: str) -> str:
1437
+ """Apply HTML formatting to selected text"""
1438
+ selected_text = self._extract_selected_text(content, selection)
1439
+ formatted_text = start_tag + selected_text + end_tag
1440
+ return self._replace_text(content, selection, formatted_text)
1441
+
1442
+ def _extract_selected_text(self, content: str, selection: Dict[str, Any]) -> str:
1443
+ """Extract text from selection"""
1444
+ if 'start_offset' in selection and 'end_offset' in selection:
1445
+ return content[selection['start_offset']:selection['end_offset']]
1446
+ elif 'start_line' in selection and 'end_line' in selection:
1447
+ lines = content.split('\n')
1448
+ start_line = selection['start_line']
1449
+ end_line = selection['end_line']
1450
+ start_col = selection.get('start_column', 0)
1451
+ end_col = selection.get('end_column', len(lines[end_line]) if end_line < len(lines) else 0)
1452
+
1453
+ if start_line == end_line:
1454
+ return lines[start_line][start_col:end_col]
1455
+ else:
1456
+ result = [lines[start_line][start_col:]]
1457
+ result.extend(lines[start_line + 1:end_line])
1458
+ if end_line < len(lines):
1459
+ result.append(lines[end_line][:end_col])
1460
+ return '\n'.join(result)
1461
+ return ""
1462
+
1463
+ def _insert_line(self, content: str, position: Optional[Dict[str, Any]], line_content: str) -> str:
1464
+ """Insert a new line at specified position"""
1465
+ lines = content.split('\n')
1466
+ line_num = position.get('line', len(lines)) if position else len(lines)
1467
+
1468
+ lines.insert(line_num, line_content)
1469
+ return '\n'.join(lines)
1470
+
1471
+ def _delete_line(self, content: str, position: Optional[Dict[str, Any]]) -> str:
1472
+ """Delete line at specified position"""
1473
+ lines = content.split('\n')
1474
+ line_num = position.get('line', 0) if position else 0
1475
+
1476
+ if 0 <= line_num < len(lines):
1477
+ del lines[line_num]
1478
+
1479
+ return '\n'.join(lines)
1480
+
1481
+ def _move_line(self, content: str, position: Optional[Dict[str, Any]], options: Optional[Dict[str, Any]]) -> str:
1482
+ """Move line to different position"""
1483
+ lines = content.split('\n')
1484
+ from_line = position.get('line', 0) if position else 0
1485
+ to_line = options.get('to_line', 0) if options else 0
1486
+
1487
+ if 0 <= from_line < len(lines) and 0 <= to_line < len(lines):
1488
+ line_content = lines.pop(from_line)
1489
+ lines.insert(to_line, line_content)
1490
+
1491
+ return '\n'.join(lines)
1492
+
1493
+ def _copy_text(self, content: str, selection: Optional[Dict[str, Any]]) -> Dict[str, Any]:
1494
+ """Copy selected text to clipboard"""
1495
+ selected_text = self._extract_selected_text(content, selection) if selection else content
1496
+ self._store_clipboard_content(selected_text)
1497
+
1498
+ return {
1499
+ "operation": "copy",
1500
+ "copied_text": selected_text,
1501
+ "copied_length": len(selected_text)
1502
+ }
1503
+
1504
+ def _cut_text(self, content: str, selection: Optional[Dict[str, Any]]) -> Tuple[str, str]:
1505
+ """Cut selected text (copy and delete)"""
1506
+ selected_text = self._extract_selected_text(content, selection) if selection else content
1507
+ new_content = self._delete_text(content, selection) if selection else ""
1508
+
1509
+ return new_content, selected_text
1510
+
1511
+ def _paste_text(self, content: str, position: Optional[Dict[str, Any]], clipboard_content: str) -> str:
1512
+ """Paste text from clipboard"""
1513
+ return self._insert_text(content, clipboard_content, position)
1514
+
1515
+ def _store_clipboard_content(self, content: str):
1516
+ """Store content in clipboard (simplified implementation)"""
1517
+ clipboard_file = os.path.join(self.settings.temp_dir, "clipboard.txt")
1518
+ try:
1519
+ with open(clipboard_file, 'w', encoding='utf-8') as f:
1520
+ f.write(content)
1521
+ except Exception as e:
1522
+ self.logger.warning(f"Failed to store clipboard content: {e}")
1523
+
1524
+ def _get_clipboard_content(self) -> str:
1525
+ """Get content from clipboard"""
1526
+ clipboard_file = os.path.join(self.settings.temp_dir, "clipboard.txt")
1527
+ try:
1528
+ with open(clipboard_file, 'r', encoding='utf-8') as f:
1529
+ return f.read()
1530
+ except Exception:
1531
+ return ""
1532
+
1533
+ def _apply_text_formatting(self, content: str, text_to_format: str, format_type: EditOperation, options: Optional[Dict[str, Any]]) -> str:
1534
+ """Apply formatting to all occurrences of specific text"""
1535
+ if format_type == EditOperation.BOLD:
1536
+ replacement = f"**{text_to_format}**"
1537
+ elif format_type == EditOperation.ITALIC:
1538
+ replacement = f"*{text_to_format}*"
1539
+ elif format_type == EditOperation.UNDERLINE:
1540
+ replacement = f"<u>{text_to_format}</u>"
1541
+ elif format_type == EditOperation.STRIKETHROUGH:
1542
+ replacement = f"~~{text_to_format}~~"
1543
+ elif format_type == EditOperation.HIGHLIGHT:
1544
+ color = options.get('color', 'yellow') if options else 'yellow'
1545
+ replacement = f'<mark style="background-color: {color}">{text_to_format}</mark>'
1546
+ else:
1547
+ replacement = text_to_format
1548
+
1549
+ return content.replace(text_to_format, replacement)
1550
+
1551
+ def _perform_find_replace(self, content: str, find_text: str, replace_text: str,
1552
+ replace_all: bool, case_sensitive: bool, regex_mode: bool) -> Tuple[str, int]:
1553
+ """Perform find and replace operation"""
1554
+ import re
1555
+
1556
+ replacements = 0
1557
+
1558
+ if regex_mode:
1559
+ flags = 0 if case_sensitive else re.IGNORECASE
1560
+ if replace_all:
1561
+ new_content, replacements = re.subn(find_text, replace_text, content, flags=flags)
1562
+ else:
1563
+ new_content = re.sub(find_text, replace_text, content, count=1, flags=flags)
1564
+ replacements = 1 if new_content != content else 0
1565
+ else:
1566
+ if case_sensitive:
1567
+ if replace_all:
1568
+ replacements = content.count(find_text)
1569
+ new_content = content.replace(find_text, replace_text)
1570
+ else:
1571
+ new_content = content.replace(find_text, replace_text, 1)
1572
+ replacements = 1 if new_content != content else 0
1573
+ else:
1574
+ # Case insensitive replacement
1575
+ import re
1576
+ pattern = re.escape(find_text)
1577
+ if replace_all:
1578
+ new_content, replacements = re.subn(pattern, replace_text, content, flags=re.IGNORECASE)
1579
+ else:
1580
+ new_content = re.sub(pattern, replace_text, content, count=1, flags=re.IGNORECASE)
1581
+ replacements = 1 if new_content != content else 0
1582
+
1583
+ return new_content, replacements