cloudflare-images-migrator 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cloudflare_images_migrator-1.0.0.dist-info/METADATA +474 -0
- cloudflare_images_migrator-1.0.0.dist-info/RECORD +17 -0
- cloudflare_images_migrator-1.0.0.dist-info/WHEEL +5 -0
- cloudflare_images_migrator-1.0.0.dist-info/entry_points.txt +3 -0
- cloudflare_images_migrator-1.0.0.dist-info/licenses/LICENSE +21 -0
- cloudflare_images_migrator-1.0.0.dist-info/top_level.txt +1 -0
- src/__init__.py +1 -0
- src/audit.py +620 -0
- src/cloudflare_client.py +746 -0
- src/config.py +161 -0
- src/image_tracker.py +405 -0
- src/logger.py +160 -0
- src/migrator.py +491 -0
- src/parsers.py +609 -0
- src/quality.py +558 -0
- src/security.py +528 -0
- src/utils.py +355 -0
src/logger.py
ADDED
@@ -0,0 +1,160 @@
|
|
1
|
+
"""
|
2
|
+
Logging configuration for Cloudflare Images Migration Tool
|
3
|
+
"""
|
4
|
+
|
5
|
+
import logging
|
6
|
+
import sys
|
7
|
+
from datetime import datetime
|
8
|
+
from pathlib import Path
|
9
|
+
from colorama import init, Fore, Style, Back
|
10
|
+
|
11
|
+
|
12
|
+
# Initialize colorama for cross-platform colored output
|
13
|
+
init(autoreset=True)
|
14
|
+
|
15
|
+
|
16
|
+
class ColoredFormatter(logging.Formatter):
|
17
|
+
"""Custom formatter with colored output."""
|
18
|
+
|
19
|
+
COLORS = {
|
20
|
+
'DEBUG': Fore.BLUE,
|
21
|
+
'INFO': Fore.GREEN,
|
22
|
+
'WARNING': Fore.YELLOW,
|
23
|
+
'ERROR': Fore.RED,
|
24
|
+
'CRITICAL': Fore.RED + Back.WHITE
|
25
|
+
}
|
26
|
+
|
27
|
+
def format(self, record):
|
28
|
+
log_color = self.COLORS.get(record.levelname, '')
|
29
|
+
record.levelname = f"{log_color}{record.levelname}{Style.RESET_ALL}"
|
30
|
+
return super().format(record)
|
31
|
+
|
32
|
+
|
33
|
+
def setup_logger(verbose: bool = False, log_file: str = None) -> logging.Logger:
|
34
|
+
"""
|
35
|
+
Set up the logger with console and file handlers.
|
36
|
+
|
37
|
+
Args:
|
38
|
+
verbose: Enable verbose (DEBUG) logging
|
39
|
+
log_file: Path to log file (optional)
|
40
|
+
|
41
|
+
Returns:
|
42
|
+
Configured logger instance
|
43
|
+
"""
|
44
|
+
|
45
|
+
# Create logger
|
46
|
+
logger = logging.getLogger('cloudflare_images_migrator')
|
47
|
+
logger.setLevel(logging.DEBUG if verbose else logging.INFO)
|
48
|
+
|
49
|
+
# Clear any existing handlers
|
50
|
+
logger.handlers.clear()
|
51
|
+
|
52
|
+
# Console handler with colors
|
53
|
+
console_handler = logging.StreamHandler(sys.stdout)
|
54
|
+
console_handler.setLevel(logging.DEBUG if verbose else logging.INFO)
|
55
|
+
|
56
|
+
console_format = ColoredFormatter(
|
57
|
+
fmt='%(asctime)s - %(levelname)s - %(message)s',
|
58
|
+
datefmt='%H:%M:%S'
|
59
|
+
)
|
60
|
+
console_handler.setFormatter(console_format)
|
61
|
+
logger.addHandler(console_handler)
|
62
|
+
|
63
|
+
# File handler (if specified or default)
|
64
|
+
if log_file is None:
|
65
|
+
log_file = f"migration_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
|
66
|
+
|
67
|
+
try:
|
68
|
+
# Create logs directory if it doesn't exist
|
69
|
+
log_path = Path(log_file)
|
70
|
+
log_path.parent.mkdir(parents=True, exist_ok=True)
|
71
|
+
|
72
|
+
file_handler = logging.FileHandler(log_file, encoding='utf-8')
|
73
|
+
file_handler.setLevel(logging.DEBUG)
|
74
|
+
|
75
|
+
file_format = logging.Formatter(
|
76
|
+
fmt='%(asctime)s - %(name)s - %(levelname)s - %(funcName)s:%(lineno)d - %(message)s',
|
77
|
+
datefmt='%Y-%m-%d %H:%M:%S'
|
78
|
+
)
|
79
|
+
file_handler.setFormatter(file_format)
|
80
|
+
logger.addHandler(file_handler)
|
81
|
+
|
82
|
+
logger.info(f"Logging to file: {log_file}")
|
83
|
+
|
84
|
+
except Exception as e:
|
85
|
+
logger.warning(f"Could not set up file logging: {e}")
|
86
|
+
|
87
|
+
return logger
|
88
|
+
|
89
|
+
|
90
|
+
class ProgressLogger:
|
91
|
+
"""Helper class for logging progress with statistics."""
|
92
|
+
|
93
|
+
def __init__(self, logger: logging.Logger):
|
94
|
+
self.logger = logger
|
95
|
+
self.stats = {
|
96
|
+
'files_processed': 0,
|
97
|
+
'images_found': 0,
|
98
|
+
'images_uploaded': 0,
|
99
|
+
'images_failed': 0,
|
100
|
+
'files_modified': 0,
|
101
|
+
'errors': []
|
102
|
+
}
|
103
|
+
self.start_time = datetime.now()
|
104
|
+
|
105
|
+
def log_file_processed(self, file_path: str, images_found: int):
|
106
|
+
"""Log that a file has been processed."""
|
107
|
+
self.stats['files_processed'] += 1
|
108
|
+
self.stats['images_found'] += images_found
|
109
|
+
self.logger.info(f"Processed {file_path}: found {images_found} images")
|
110
|
+
|
111
|
+
def log_image_uploaded(self, image_path: str, cloudflare_id: str):
|
112
|
+
"""Log successful image upload."""
|
113
|
+
self.stats['images_uploaded'] += 1
|
114
|
+
self.logger.info(f"✓ Uploaded {image_path} -> {cloudflare_id}")
|
115
|
+
|
116
|
+
def log_image_failed(self, image_path: str, error: str):
|
117
|
+
"""Log failed image upload."""
|
118
|
+
self.stats['images_failed'] += 1
|
119
|
+
self.stats['errors'].append(f"{image_path}: {error}")
|
120
|
+
self.logger.error(f"✗ Failed to upload {image_path}: {error}")
|
121
|
+
|
122
|
+
def log_file_modified(self, file_path: str, replacements: int):
|
123
|
+
"""Log file modification."""
|
124
|
+
self.stats['files_modified'] += 1
|
125
|
+
self.logger.info(f"✓ Modified {file_path}: {replacements} replacements")
|
126
|
+
|
127
|
+
def log_progress_summary(self, dry_run: bool = False):
|
128
|
+
"""Log progress summary."""
|
129
|
+
elapsed = datetime.now() - self.start_time
|
130
|
+
|
131
|
+
action = "Would modify" if dry_run else "Modified"
|
132
|
+
|
133
|
+
self.logger.info(f"\n{'='*50}")
|
134
|
+
self.logger.info(f"Migration {'Preview' if dry_run else 'Summary'}")
|
135
|
+
self.logger.info(f"{'='*50}")
|
136
|
+
self.logger.info(f"Files processed: {self.stats['files_processed']}")
|
137
|
+
self.logger.info(f"Images found: {self.stats['images_found']}")
|
138
|
+
|
139
|
+
if not dry_run:
|
140
|
+
self.logger.info(f"Images uploaded: {self.stats['images_uploaded']}")
|
141
|
+
self.logger.info(f"Images failed: {self.stats['images_failed']}")
|
142
|
+
self.logger.info(f"Files modified: {self.stats['files_modified']}")
|
143
|
+
else:
|
144
|
+
self.logger.info(f"Images to upload: {self.stats['images_found']}")
|
145
|
+
self.logger.info(f"Files to modify: {len(set(self.stats.get('files_to_modify', [])))}")
|
146
|
+
|
147
|
+
self.logger.info(f"Time elapsed: {elapsed}")
|
148
|
+
|
149
|
+
if self.stats['errors']:
|
150
|
+
self.logger.warning(f"\nErrors encountered:")
|
151
|
+
for error in self.stats['errors'][:10]: # Show first 10 errors
|
152
|
+
self.logger.warning(f" - {error}")
|
153
|
+
if len(self.stats['errors']) > 10:
|
154
|
+
self.logger.warning(f" ... and {len(self.stats['errors']) - 10} more errors")
|
155
|
+
|
156
|
+
self.logger.info(f"{'='*50}")
|
157
|
+
|
158
|
+
def get_stats(self) -> dict:
|
159
|
+
"""Get current statistics."""
|
160
|
+
return self.stats.copy()
|
src/migrator.py
ADDED
@@ -0,0 +1,491 @@
|
|
1
|
+
"""
|
2
|
+
Main image migration orchestrator
|
3
|
+
"""
|
4
|
+
|
5
|
+
import os
|
6
|
+
import re
|
7
|
+
import shutil
|
8
|
+
from pathlib import Path
|
9
|
+
from typing import List, Dict, Set, Tuple, Optional
|
10
|
+
from tqdm import tqdm
|
11
|
+
from urllib.parse import urljoin
|
12
|
+
import tempfile
|
13
|
+
|
14
|
+
from .config import Config
|
15
|
+
from .logger import ProgressLogger
|
16
|
+
from .utils import (
|
17
|
+
validate_path, is_zip_file, extract_zip, create_backup,
|
18
|
+
find_files_by_extension, is_url, safe_read_file, safe_write_file,
|
19
|
+
get_relative_path, is_image_file
|
20
|
+
)
|
21
|
+
from .parsers import ParserRegistry, ImageReference
|
22
|
+
from .cloudflare_client import CloudflareImagesClient, ImageUploadResult
|
23
|
+
|
24
|
+
|
25
|
+
class MigrationResult:
|
26
|
+
"""Result of a migration operation."""
|
27
|
+
|
28
|
+
def __init__(self):
|
29
|
+
self.success = False
|
30
|
+
self.files_processed = 0
|
31
|
+
self.images_found = 0
|
32
|
+
self.images_uploaded = 0
|
33
|
+
self.images_failed = 0
|
34
|
+
self.files_modified = 0
|
35
|
+
self.errors = []
|
36
|
+
self.backup_path = None
|
37
|
+
self.temp_dir = None
|
38
|
+
|
39
|
+
def add_error(self, error: str):
|
40
|
+
"""Add an error to the result."""
|
41
|
+
self.errors.append(error)
|
42
|
+
|
43
|
+
def get_summary(self) -> Dict:
|
44
|
+
"""Get a summary of the migration results."""
|
45
|
+
return {
|
46
|
+
'success': self.success,
|
47
|
+
'files_processed': self.files_processed,
|
48
|
+
'images_found': self.images_found,
|
49
|
+
'images_uploaded': self.images_uploaded,
|
50
|
+
'images_failed': self.images_failed,
|
51
|
+
'files_modified': self.files_modified,
|
52
|
+
'error_count': len(self.errors),
|
53
|
+
'backup_path': str(self.backup_path) if self.backup_path else None
|
54
|
+
}
|
55
|
+
|
56
|
+
|
57
|
+
class ImageMigrator:
|
58
|
+
"""Main class for migrating images to Cloudflare Images."""
|
59
|
+
|
60
|
+
def __init__(self, config: Config, logger):
|
61
|
+
self.config = config
|
62
|
+
self.logger = logger
|
63
|
+
self.progress_logger = ProgressLogger(logger)
|
64
|
+
self.parser_registry = ParserRegistry()
|
65
|
+
self.cf_client = CloudflareImagesClient(config, logger)
|
66
|
+
|
67
|
+
# Tracking
|
68
|
+
self.processed_files = set()
|
69
|
+
self.image_mapping = {} # original_path -> cloudflare_url
|
70
|
+
self.failed_images = set()
|
71
|
+
|
72
|
+
def migrate(self, source_path: Path) -> bool:
|
73
|
+
"""
|
74
|
+
Main migration function.
|
75
|
+
|
76
|
+
Args:
|
77
|
+
source_path: Path to source directory or zip file
|
78
|
+
|
79
|
+
Returns:
|
80
|
+
True if migration was successful
|
81
|
+
"""
|
82
|
+
result = MigrationResult()
|
83
|
+
|
84
|
+
try:
|
85
|
+
self.logger.info(f"Starting migration of: {source_path}")
|
86
|
+
|
87
|
+
# Test Cloudflare connection
|
88
|
+
if not self.config.dry_run:
|
89
|
+
self.logger.info("Testing Cloudflare Images API connection...")
|
90
|
+
if not self.cf_client.test_connection():
|
91
|
+
result.add_error("Failed to connect to Cloudflare Images API")
|
92
|
+
return False
|
93
|
+
self.logger.info("✓ Successfully connected to Cloudflare Images API")
|
94
|
+
|
95
|
+
# Prepare working directory
|
96
|
+
working_dir = self._prepare_working_directory(source_path, result)
|
97
|
+
if not working_dir:
|
98
|
+
return False
|
99
|
+
|
100
|
+
self.logger.info(f"Working directory: {working_dir}")
|
101
|
+
|
102
|
+
# Create backup if requested
|
103
|
+
if self.config.backup and not self.config.dry_run:
|
104
|
+
result.backup_path = self._create_backup(source_path)
|
105
|
+
self.logger.info(f"Created backup: {result.backup_path}")
|
106
|
+
|
107
|
+
# Phase 1: Scan and collect all image references
|
108
|
+
self.logger.info("Phase 1: Scanning for image references...")
|
109
|
+
image_references = self._scan_for_images(working_dir, result)
|
110
|
+
|
111
|
+
if not image_references:
|
112
|
+
self.logger.info("No image references found.")
|
113
|
+
return True
|
114
|
+
|
115
|
+
self.logger.info(f"Found {len(image_references)} image references")
|
116
|
+
|
117
|
+
# Phase 2: Process and upload images
|
118
|
+
self.logger.info("Phase 2: Processing and uploading images...")
|
119
|
+
if not self.config.dry_run:
|
120
|
+
success = self._process_images(image_references, working_dir, result)
|
121
|
+
if not success:
|
122
|
+
return False
|
123
|
+
else:
|
124
|
+
self.logger.info("Dry run mode - skipping uploads")
|
125
|
+
result.images_found = len(image_references)
|
126
|
+
|
127
|
+
# Phase 3: Replace references in code
|
128
|
+
self.logger.info("Phase 3: Replacing image references in code...")
|
129
|
+
self._replace_image_references(image_references, working_dir, result)
|
130
|
+
|
131
|
+
# Phase 4: Copy results back if working with zip
|
132
|
+
if result.temp_dir and not self.config.dry_run:
|
133
|
+
self._copy_results_back(working_dir, source_path, result)
|
134
|
+
|
135
|
+
# Final summary
|
136
|
+
result.success = True
|
137
|
+
self.progress_logger.log_progress_summary(self.config.dry_run)
|
138
|
+
|
139
|
+
# Cleanup
|
140
|
+
self._cleanup(result)
|
141
|
+
|
142
|
+
return True
|
143
|
+
|
144
|
+
except Exception as e:
|
145
|
+
self.logger.error(f"Migration failed: {str(e)}")
|
146
|
+
result.add_error(str(e))
|
147
|
+
self._cleanup(result)
|
148
|
+
return False
|
149
|
+
|
150
|
+
def _prepare_working_directory(self, source_path: Path, result: MigrationResult) -> Optional[Path]:
|
151
|
+
"""Prepare the working directory for migration."""
|
152
|
+
try:
|
153
|
+
if source_path.is_file() and is_zip_file(source_path):
|
154
|
+
# Extract zip to temporary directory
|
155
|
+
self.logger.info(f"Extracting zip file: {source_path}")
|
156
|
+
temp_dir = Path(tempfile.mkdtemp(prefix="cf_images_migration_"))
|
157
|
+
extracted_dir = extract_zip(source_path, temp_dir)
|
158
|
+
result.temp_dir = temp_dir
|
159
|
+
return extracted_dir
|
160
|
+
elif source_path.is_dir():
|
161
|
+
# Use directory directly
|
162
|
+
return source_path
|
163
|
+
else:
|
164
|
+
result.add_error(f"Invalid source path: {source_path}")
|
165
|
+
return None
|
166
|
+
except Exception as e:
|
167
|
+
result.add_error(f"Failed to prepare working directory: {str(e)}")
|
168
|
+
return None
|
169
|
+
|
170
|
+
def _create_backup(self, source_path: Path) -> Optional[Path]:
|
171
|
+
"""Create a backup of the source."""
|
172
|
+
try:
|
173
|
+
return create_backup(source_path)
|
174
|
+
except Exception as e:
|
175
|
+
self.logger.warning(f"Failed to create backup: {str(e)}")
|
176
|
+
return None
|
177
|
+
|
178
|
+
def _scan_for_images(self, working_dir: Path, result: MigrationResult) -> List[Tuple[Path, List[ImageReference]]]:
|
179
|
+
"""Scan the working directory for image references."""
|
180
|
+
image_references = []
|
181
|
+
|
182
|
+
# Find files to process
|
183
|
+
files_to_process = find_files_by_extension(
|
184
|
+
working_dir,
|
185
|
+
self.config.file_types,
|
186
|
+
self.config.exclude_dirs
|
187
|
+
)
|
188
|
+
|
189
|
+
self.logger.info(f"Scanning {len(files_to_process)} files...")
|
190
|
+
|
191
|
+
# Process files with progress bar
|
192
|
+
with tqdm(files_to_process, desc="Scanning files", disable=not self.logger) as pbar:
|
193
|
+
for file_path in pbar:
|
194
|
+
try:
|
195
|
+
pbar.set_description(f"Scanning {file_path.name}")
|
196
|
+
|
197
|
+
# Parse file for image references
|
198
|
+
refs = self.parser_registry.parse_file(file_path)
|
199
|
+
|
200
|
+
if refs:
|
201
|
+
image_references.append((file_path, refs))
|
202
|
+
self.progress_logger.log_file_processed(
|
203
|
+
get_relative_path(file_path, working_dir),
|
204
|
+
len(refs)
|
205
|
+
)
|
206
|
+
|
207
|
+
result.files_processed += 1
|
208
|
+
|
209
|
+
except Exception as e:
|
210
|
+
error_msg = f"Error scanning {file_path}: {str(e)}"
|
211
|
+
self.logger.warning(error_msg)
|
212
|
+
result.add_error(error_msg)
|
213
|
+
|
214
|
+
return image_references
|
215
|
+
|
216
|
+
def _process_images(self, image_references: List[Tuple[Path, List[ImageReference]]],
|
217
|
+
working_dir: Path, result: MigrationResult) -> bool:
|
218
|
+
"""Process and upload images to Cloudflare."""
|
219
|
+
|
220
|
+
# Collect all unique images
|
221
|
+
unique_images = self._collect_unique_images(image_references, working_dir)
|
222
|
+
result.images_found = len(unique_images)
|
223
|
+
|
224
|
+
if not unique_images:
|
225
|
+
self.logger.info("No images to upload.")
|
226
|
+
return True
|
227
|
+
|
228
|
+
self.logger.info(f"Found {len(unique_images)} unique images to upload")
|
229
|
+
|
230
|
+
# Upload images with progress bar
|
231
|
+
with tqdm(unique_images, desc="Uploading images", disable=not self.logger) as pbar:
|
232
|
+
for image_info in pbar:
|
233
|
+
original_path = image_info['original_path']
|
234
|
+
pbar.set_description(f"Uploading {Path(original_path).name}")
|
235
|
+
|
236
|
+
try:
|
237
|
+
if image_info['is_url']:
|
238
|
+
# Upload from URL
|
239
|
+
upload_result = self.cf_client.upload_image_from_url(original_path)
|
240
|
+
else:
|
241
|
+
# Upload local file
|
242
|
+
local_path = working_dir / original_path if not Path(original_path).is_absolute() else Path(original_path)
|
243
|
+
if local_path.exists():
|
244
|
+
upload_result = self.cf_client.upload_image_from_path(local_path)
|
245
|
+
else:
|
246
|
+
upload_result = None
|
247
|
+
|
248
|
+
if upload_result and upload_result.success:
|
249
|
+
self.image_mapping[original_path] = upload_result.delivery_url
|
250
|
+
self.progress_logger.log_image_uploaded(original_path, upload_result.image_id)
|
251
|
+
result.images_uploaded += 1
|
252
|
+
else:
|
253
|
+
error_msg = upload_result.error if upload_result else "Upload failed"
|
254
|
+
self.failed_images.add(original_path)
|
255
|
+
self.progress_logger.log_image_failed(original_path, error_msg)
|
256
|
+
result.images_failed += 1
|
257
|
+
result.add_error(f"Upload failed for {original_path}: {error_msg}")
|
258
|
+
|
259
|
+
except Exception as e:
|
260
|
+
error_msg = f"Exception uploading {original_path}: {str(e)}"
|
261
|
+
self.logger.error(error_msg)
|
262
|
+
self.failed_images.add(original_path)
|
263
|
+
result.add_error(error_msg)
|
264
|
+
result.images_failed += 1
|
265
|
+
|
266
|
+
success_rate = result.images_uploaded / result.images_found if result.images_found > 0 else 0
|
267
|
+
self.logger.info(f"Upload complete: {result.images_uploaded}/{result.images_found} images uploaded ({success_rate:.1%} success rate)")
|
268
|
+
|
269
|
+
return result.images_uploaded > 0 or result.images_found == 0
|
270
|
+
|
271
|
+
def _collect_unique_images(self, image_references: List[Tuple[Path, List[ImageReference]]],
|
272
|
+
working_dir: Path) -> List[Dict]:
|
273
|
+
"""Collect all unique images that need to be uploaded."""
|
274
|
+
unique_images = {}
|
275
|
+
|
276
|
+
for file_path, refs in image_references:
|
277
|
+
for ref in refs:
|
278
|
+
image_path = ref.path
|
279
|
+
|
280
|
+
# Skip if already processed or failed
|
281
|
+
if image_path in unique_images or image_path in self.failed_images:
|
282
|
+
continue
|
283
|
+
|
284
|
+
# Skip data URLs
|
285
|
+
if image_path.startswith('data:'):
|
286
|
+
continue
|
287
|
+
|
288
|
+
# Determine if it's a URL or local file
|
289
|
+
if is_url(image_path):
|
290
|
+
unique_images[image_path] = {
|
291
|
+
'original_path': image_path,
|
292
|
+
'is_url': True,
|
293
|
+
'references': []
|
294
|
+
}
|
295
|
+
else:
|
296
|
+
# Handle local file
|
297
|
+
# Resolve relative paths
|
298
|
+
if not Path(image_path).is_absolute():
|
299
|
+
full_path = (file_path.parent / image_path).resolve()
|
300
|
+
# Try to make it relative to working directory
|
301
|
+
try:
|
302
|
+
rel_path = full_path.relative_to(working_dir)
|
303
|
+
normalized_path = str(rel_path)
|
304
|
+
except ValueError:
|
305
|
+
normalized_path = str(full_path)
|
306
|
+
else:
|
307
|
+
normalized_path = image_path
|
308
|
+
|
309
|
+
# Check if file exists and is an image
|
310
|
+
local_path = working_dir / normalized_path if not Path(normalized_path).is_absolute() else Path(normalized_path)
|
311
|
+
if local_path.exists() and is_image_file(local_path, self.config.supported_image_formats):
|
312
|
+
unique_images[image_path] = {
|
313
|
+
'original_path': normalized_path,
|
314
|
+
'is_url': False,
|
315
|
+
'references': []
|
316
|
+
}
|
317
|
+
|
318
|
+
return list(unique_images.values())
|
319
|
+
|
320
|
+
def _replace_image_references(self, image_references: List[Tuple[Path, List[ImageReference]]],
|
321
|
+
working_dir: Path, result: MigrationResult):
|
322
|
+
"""Replace image references in files with Cloudflare URLs."""
|
323
|
+
|
324
|
+
files_to_modify = {} # file_path -> list of replacements
|
325
|
+
|
326
|
+
# Collect all replacements needed
|
327
|
+
for file_path, refs in image_references:
|
328
|
+
replacements = []
|
329
|
+
|
330
|
+
for ref in refs:
|
331
|
+
original_path = ref.path
|
332
|
+
|
333
|
+
# Skip if upload failed or no mapping
|
334
|
+
if original_path in self.failed_images or original_path not in self.image_mapping:
|
335
|
+
continue
|
336
|
+
|
337
|
+
cloudflare_url = self.image_mapping[original_path]
|
338
|
+
replacements.append((ref, cloudflare_url))
|
339
|
+
|
340
|
+
if replacements:
|
341
|
+
files_to_modify[file_path] = replacements
|
342
|
+
|
343
|
+
if not files_to_modify:
|
344
|
+
self.logger.info("No files need modification.")
|
345
|
+
return
|
346
|
+
|
347
|
+
self.logger.info(f"Modifying {len(files_to_modify)} files...")
|
348
|
+
|
349
|
+
# Apply replacements
|
350
|
+
with tqdm(files_to_modify.items(), desc="Modifying files", disable=not self.logger) as pbar:
|
351
|
+
for file_path, replacements in pbar:
|
352
|
+
pbar.set_description(f"Modifying {file_path.name}")
|
353
|
+
|
354
|
+
try:
|
355
|
+
if not self.config.dry_run:
|
356
|
+
success = self._modify_file(file_path, replacements)
|
357
|
+
if success:
|
358
|
+
result.files_modified += 1
|
359
|
+
self.progress_logger.log_file_modified(
|
360
|
+
get_relative_path(file_path, working_dir),
|
361
|
+
len(replacements)
|
362
|
+
)
|
363
|
+
else:
|
364
|
+
# Dry run - just log what would be changed
|
365
|
+
rel_path = get_relative_path(file_path, working_dir)
|
366
|
+
self.logger.info(f"Would modify {rel_path}: {len(replacements)} replacements")
|
367
|
+
result.files_modified += 1
|
368
|
+
|
369
|
+
except Exception as e:
|
370
|
+
error_msg = f"Error modifying {file_path}: {str(e)}"
|
371
|
+
self.logger.error(error_msg)
|
372
|
+
result.add_error(error_msg)
|
373
|
+
|
374
|
+
def _modify_file(self, file_path: Path, replacements: List[Tuple[ImageReference, str]]) -> bool:
|
375
|
+
"""Modify a file by replacing image references."""
|
376
|
+
try:
|
377
|
+
# Read file content
|
378
|
+
content = safe_read_file(file_path)
|
379
|
+
if content is None:
|
380
|
+
self.logger.error(f"Could not read file: {file_path}")
|
381
|
+
return False
|
382
|
+
|
383
|
+
# Apply replacements
|
384
|
+
modified_content = content
|
385
|
+
replacement_count = 0
|
386
|
+
|
387
|
+
# Sort replacements by line number and column (descending) to avoid offset issues
|
388
|
+
sorted_replacements = sorted(replacements, key=lambda x: (x[0].line_number, x[0].column), reverse=True)
|
389
|
+
|
390
|
+
for ref, cloudflare_url in sorted_replacements:
|
391
|
+
# Replace the original text with the new URL
|
392
|
+
old_text = ref.original_text
|
393
|
+
|
394
|
+
# Create new text with Cloudflare URL
|
395
|
+
if ref.ref_type == 'url' or is_url(ref.path):
|
396
|
+
# Replace URL directly
|
397
|
+
new_text = old_text.replace(ref.path, cloudflare_url)
|
398
|
+
else:
|
399
|
+
# Replace local path
|
400
|
+
new_text = old_text.replace(ref.path, cloudflare_url)
|
401
|
+
|
402
|
+
# Apply replacement
|
403
|
+
if old_text in modified_content:
|
404
|
+
modified_content = modified_content.replace(old_text, new_text, 1)
|
405
|
+
replacement_count += 1
|
406
|
+
else:
|
407
|
+
# Try fuzzy matching for more complex cases
|
408
|
+
if self._fuzzy_replace(ref, cloudflare_url, modified_content):
|
409
|
+
replacement_count += 1
|
410
|
+
|
411
|
+
# Write modified content back
|
412
|
+
if replacement_count > 0:
|
413
|
+
success = safe_write_file(file_path, modified_content)
|
414
|
+
if success:
|
415
|
+
self.logger.debug(f"Applied {replacement_count} replacements to {file_path}")
|
416
|
+
return True
|
417
|
+
else:
|
418
|
+
self.logger.error(f"Failed to write modified content to {file_path}")
|
419
|
+
return False
|
420
|
+
else:
|
421
|
+
self.logger.warning(f"No replacements applied to {file_path}")
|
422
|
+
return False
|
423
|
+
|
424
|
+
except Exception as e:
|
425
|
+
self.logger.error(f"Exception modifying file {file_path}: {str(e)}")
|
426
|
+
return False
|
427
|
+
|
428
|
+
def _fuzzy_replace(self, ref: ImageReference, cloudflare_url: str, content: str) -> bool:
|
429
|
+
"""Try to perform fuzzy replacement when exact match fails."""
|
430
|
+
try:
|
431
|
+
# Try different patterns
|
432
|
+
patterns = [
|
433
|
+
f'["\']({re.escape(ref.path)})["\']',
|
434
|
+
f'src\\s*=\\s*["\']({re.escape(ref.path)})["\']',
|
435
|
+
f'url\\s*\\(\\s*["\']?({re.escape(ref.path)})["\']?\\s*\\)',
|
436
|
+
f'\\!\\[.*?\\]\\(({re.escape(ref.path)})\\)',
|
437
|
+
]
|
438
|
+
|
439
|
+
for pattern in patterns:
|
440
|
+
if re.search(pattern, content, re.IGNORECASE):
|
441
|
+
# Replace using regex
|
442
|
+
content = re.sub(pattern, lambda m: m.group(0).replace(ref.path, cloudflare_url), content, count=1, flags=re.IGNORECASE)
|
443
|
+
return True
|
444
|
+
|
445
|
+
return False
|
446
|
+
except Exception:
|
447
|
+
return False
|
448
|
+
|
449
|
+
def _copy_results_back(self, working_dir: Path, original_path: Path, result: MigrationResult):
|
450
|
+
"""Copy results back when working with extracted zip."""
|
451
|
+
try:
|
452
|
+
if self.config.output_dir:
|
453
|
+
output_path = Path(self.config.output_dir)
|
454
|
+
else:
|
455
|
+
output_path = original_path.parent / f"{original_path.stem}_migrated"
|
456
|
+
|
457
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
458
|
+
|
459
|
+
# Copy all files from working directory to output
|
460
|
+
for item in working_dir.iterdir():
|
461
|
+
if item.is_dir():
|
462
|
+
shutil.copytree(item, output_path / item.name, dirs_exist_ok=True)
|
463
|
+
else:
|
464
|
+
shutil.copy2(item, output_path / item.name)
|
465
|
+
|
466
|
+
self.logger.info(f"Results copied to: {output_path}")
|
467
|
+
|
468
|
+
except Exception as e:
|
469
|
+
self.logger.error(f"Failed to copy results: {str(e)}")
|
470
|
+
result.add_error(f"Failed to copy results: {str(e)}")
|
471
|
+
|
472
|
+
def _cleanup(self, result: MigrationResult):
|
473
|
+
"""Clean up temporary files."""
|
474
|
+
if result.temp_dir and result.temp_dir.exists():
|
475
|
+
try:
|
476
|
+
shutil.rmtree(result.temp_dir)
|
477
|
+
self.logger.debug(f"Cleaned up temporary directory: {result.temp_dir}")
|
478
|
+
except Exception as e:
|
479
|
+
self.logger.warning(f"Failed to clean up temporary directory: {str(e)}")
|
480
|
+
|
481
|
+
def get_migration_stats(self) -> Dict:
|
482
|
+
"""Get statistics about the migration process."""
|
483
|
+
cf_stats = self.cf_client.get_upload_stats()
|
484
|
+
progress_stats = self.progress_logger.get_stats()
|
485
|
+
|
486
|
+
return {
|
487
|
+
'cloudflare_uploads': cf_stats,
|
488
|
+
'progress': progress_stats,
|
489
|
+
'image_mappings': len(self.image_mapping),
|
490
|
+
'failed_images': len(self.failed_images)
|
491
|
+
}
|