cloudflare-images-migrator 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cloudflare_images_migrator-1.0.0.dist-info/METADATA +474 -0
- cloudflare_images_migrator-1.0.0.dist-info/RECORD +17 -0
- cloudflare_images_migrator-1.0.0.dist-info/WHEEL +5 -0
- cloudflare_images_migrator-1.0.0.dist-info/entry_points.txt +3 -0
- cloudflare_images_migrator-1.0.0.dist-info/licenses/LICENSE +21 -0
- cloudflare_images_migrator-1.0.0.dist-info/top_level.txt +1 -0
- src/__init__.py +1 -0
- src/audit.py +620 -0
- src/cloudflare_client.py +746 -0
- src/config.py +161 -0
- src/image_tracker.py +405 -0
- src/logger.py +160 -0
- src/migrator.py +491 -0
- src/parsers.py +609 -0
- src/quality.py +558 -0
- src/security.py +528 -0
- src/utils.py +355 -0
src/utils.py
ADDED
@@ -0,0 +1,355 @@
|
|
1
|
+
"""
|
2
|
+
Utility functions for Cloudflare Images Migration Tool
|
3
|
+
"""
|
4
|
+
|
5
|
+
import os
|
6
|
+
import re
|
7
|
+
import shutil
|
8
|
+
import zipfile
|
9
|
+
import tempfile
|
10
|
+
import hashlib
|
11
|
+
import validators
|
12
|
+
from pathlib import Path
|
13
|
+
from typing import List, Optional, Tuple, Set
|
14
|
+
from urllib.parse import urlparse, urljoin
|
15
|
+
|
16
|
+
|
17
|
+
def validate_path(path: str) -> Path:
|
18
|
+
"""
|
19
|
+
Validate and normalize a file or directory path.
|
20
|
+
|
21
|
+
Args:
|
22
|
+
path: Path to validate
|
23
|
+
|
24
|
+
Returns:
|
25
|
+
Validated Path object
|
26
|
+
|
27
|
+
Raises:
|
28
|
+
ValueError: If path is invalid
|
29
|
+
"""
|
30
|
+
path_obj = Path(path).resolve()
|
31
|
+
|
32
|
+
if not path_obj.exists():
|
33
|
+
raise ValueError(f"Path does not exist: {path}")
|
34
|
+
|
35
|
+
return path_obj
|
36
|
+
|
37
|
+
|
38
|
+
def is_zip_file(path: Path) -> bool:
|
39
|
+
"""Check if a file is a valid zip archive."""
|
40
|
+
try:
|
41
|
+
with zipfile.ZipFile(path, 'r') as zip_file:
|
42
|
+
zip_file.testzip()
|
43
|
+
return True
|
44
|
+
except (zipfile.BadZipFile, FileNotFoundError):
|
45
|
+
return False
|
46
|
+
|
47
|
+
|
48
|
+
def extract_zip(zip_path: Path, extract_to: Optional[Path] = None) -> Path:
|
49
|
+
"""
|
50
|
+
Extract a zip file to a temporary or specified directory.
|
51
|
+
|
52
|
+
Args:
|
53
|
+
zip_path: Path to zip file
|
54
|
+
extract_to: Directory to extract to (optional)
|
55
|
+
|
56
|
+
Returns:
|
57
|
+
Path to extracted directory
|
58
|
+
"""
|
59
|
+
if extract_to is None:
|
60
|
+
extract_to = Path(tempfile.mkdtemp(prefix="cf_images_"))
|
61
|
+
|
62
|
+
extract_to.mkdir(parents=True, exist_ok=True)
|
63
|
+
|
64
|
+
with zipfile.ZipFile(zip_path, 'r') as zip_file:
|
65
|
+
zip_file.extractall(extract_to)
|
66
|
+
|
67
|
+
return extract_to
|
68
|
+
|
69
|
+
|
70
|
+
def create_backup(source_path: Path, backup_suffix: str = "_backup") -> Path:
|
71
|
+
"""
|
72
|
+
Create a backup of the source directory or file.
|
73
|
+
|
74
|
+
Args:
|
75
|
+
source_path: Path to backup
|
76
|
+
backup_suffix: Suffix to add to backup name
|
77
|
+
|
78
|
+
Returns:
|
79
|
+
Path to backup
|
80
|
+
"""
|
81
|
+
backup_path = source_path.parent / f"{source_path.name}{backup_suffix}"
|
82
|
+
|
83
|
+
# Ensure unique backup name
|
84
|
+
counter = 1
|
85
|
+
while backup_path.exists():
|
86
|
+
backup_path = source_path.parent / f"{source_path.name}{backup_suffix}_{counter}"
|
87
|
+
counter += 1
|
88
|
+
|
89
|
+
if source_path.is_dir():
|
90
|
+
shutil.copytree(source_path, backup_path)
|
91
|
+
else:
|
92
|
+
shutil.copy2(source_path, backup_path)
|
93
|
+
|
94
|
+
return backup_path
|
95
|
+
|
96
|
+
|
97
|
+
def get_file_hash(file_path: Path) -> str:
|
98
|
+
"""
|
99
|
+
Calculate MD5 hash of a file.
|
100
|
+
|
101
|
+
Args:
|
102
|
+
file_path: Path to file
|
103
|
+
|
104
|
+
Returns:
|
105
|
+
MD5 hash as hex string
|
106
|
+
"""
|
107
|
+
hash_md5 = hashlib.md5()
|
108
|
+
try:
|
109
|
+
with open(file_path, "rb") as f:
|
110
|
+
for chunk in iter(lambda: f.read(4096), b""):
|
111
|
+
hash_md5.update(chunk)
|
112
|
+
return hash_md5.hexdigest()
|
113
|
+
except Exception:
|
114
|
+
return ""
|
115
|
+
|
116
|
+
|
117
|
+
def is_image_file(file_path: Path, supported_formats: List[str]) -> bool:
|
118
|
+
"""
|
119
|
+
Check if a file is an image based on its extension.
|
120
|
+
|
121
|
+
Args:
|
122
|
+
file_path: Path to file
|
123
|
+
supported_formats: List of supported image extensions
|
124
|
+
|
125
|
+
Returns:
|
126
|
+
True if file is an image
|
127
|
+
"""
|
128
|
+
return file_path.suffix.lower() in [fmt.lower() for fmt in supported_formats]
|
129
|
+
|
130
|
+
|
131
|
+
def is_url(string: str) -> bool:
|
132
|
+
"""
|
133
|
+
Check if a string is a valid URL.
|
134
|
+
|
135
|
+
Args:
|
136
|
+
string: String to check
|
137
|
+
|
138
|
+
Returns:
|
139
|
+
True if string is a valid URL
|
140
|
+
"""
|
141
|
+
return validators.url(string) is True
|
142
|
+
|
143
|
+
|
144
|
+
def normalize_path(path: str, base_path: Path = None) -> str:
|
145
|
+
"""
|
146
|
+
Normalize a file path for consistent handling.
|
147
|
+
|
148
|
+
Args:
|
149
|
+
path: Path to normalize
|
150
|
+
base_path: Base path for relative paths
|
151
|
+
|
152
|
+
Returns:
|
153
|
+
Normalized path string
|
154
|
+
"""
|
155
|
+
if is_url(path):
|
156
|
+
return path
|
157
|
+
|
158
|
+
# Remove quotes and whitespace
|
159
|
+
path = path.strip().strip('"\'')
|
160
|
+
|
161
|
+
# Handle relative paths
|
162
|
+
if base_path and not os.path.isabs(path):
|
163
|
+
normalized = str((base_path / path).resolve())
|
164
|
+
else:
|
165
|
+
normalized = str(Path(path).resolve())
|
166
|
+
|
167
|
+
return normalized
|
168
|
+
|
169
|
+
|
170
|
+
def get_relative_path(file_path: Path, base_path: Path) -> str:
|
171
|
+
"""
|
172
|
+
Get relative path from base path to file path.
|
173
|
+
|
174
|
+
Args:
|
175
|
+
file_path: Target file path
|
176
|
+
base_path: Base directory path
|
177
|
+
|
178
|
+
Returns:
|
179
|
+
Relative path string
|
180
|
+
"""
|
181
|
+
try:
|
182
|
+
return str(file_path.relative_to(base_path))
|
183
|
+
except ValueError:
|
184
|
+
return str(file_path)
|
185
|
+
|
186
|
+
|
187
|
+
def find_files_by_extension(directory: Path, extensions: List[str],
|
188
|
+
exclude_dirs: List[str] = None) -> List[Path]:
|
189
|
+
"""
|
190
|
+
Find all files with specified extensions in a directory tree.
|
191
|
+
|
192
|
+
Args:
|
193
|
+
directory: Directory to search
|
194
|
+
extensions: List of file extensions to match
|
195
|
+
exclude_dirs: List of directory names to exclude
|
196
|
+
|
197
|
+
Returns:
|
198
|
+
List of matching file paths
|
199
|
+
"""
|
200
|
+
if exclude_dirs is None:
|
201
|
+
exclude_dirs = []
|
202
|
+
|
203
|
+
exclude_dirs = [d.lower() for d in exclude_dirs]
|
204
|
+
extensions = [ext.lower() for ext in extensions]
|
205
|
+
|
206
|
+
found_files = []
|
207
|
+
|
208
|
+
for root, dirs, files in os.walk(directory):
|
209
|
+
# Remove excluded directories from dirs list to skip them
|
210
|
+
dirs[:] = [d for d in dirs if d.lower() not in exclude_dirs]
|
211
|
+
|
212
|
+
for file in files:
|
213
|
+
file_path = Path(root) / file
|
214
|
+
if file_path.suffix.lower() in extensions:
|
215
|
+
found_files.append(file_path)
|
216
|
+
|
217
|
+
return found_files
|
218
|
+
|
219
|
+
|
220
|
+
def extract_domain(url: str) -> str:
|
221
|
+
"""
|
222
|
+
Extract domain from URL.
|
223
|
+
|
224
|
+
Args:
|
225
|
+
url: URL string
|
226
|
+
|
227
|
+
Returns:
|
228
|
+
Domain name
|
229
|
+
"""
|
230
|
+
try:
|
231
|
+
parsed = urlparse(url)
|
232
|
+
return parsed.netloc
|
233
|
+
except Exception:
|
234
|
+
return ""
|
235
|
+
|
236
|
+
|
237
|
+
def make_cloudflare_url(image_id: str, variant: str = "public") -> str:
|
238
|
+
"""
|
239
|
+
Generate Cloudflare Images delivery URL.
|
240
|
+
|
241
|
+
Args:
|
242
|
+
image_id: Cloudflare image ID
|
243
|
+
variant: Image variant name
|
244
|
+
|
245
|
+
Returns:
|
246
|
+
Cloudflare delivery URL
|
247
|
+
"""
|
248
|
+
return f"https://imagedelivery.net/{image_id}/{variant}"
|
249
|
+
|
250
|
+
|
251
|
+
def sanitize_filename(filename: str) -> str:
|
252
|
+
"""
|
253
|
+
Sanitize filename for safe use across different file systems.
|
254
|
+
|
255
|
+
Args:
|
256
|
+
filename: Original filename
|
257
|
+
|
258
|
+
Returns:
|
259
|
+
Sanitized filename
|
260
|
+
"""
|
261
|
+
# Remove or replace invalid characters
|
262
|
+
sanitized = re.sub(r'[<>:"/\\|?*]', '_', filename)
|
263
|
+
|
264
|
+
# Remove leading/trailing whitespace and dots
|
265
|
+
sanitized = sanitized.strip(' .')
|
266
|
+
|
267
|
+
# Limit length
|
268
|
+
if len(sanitized) > 255:
|
269
|
+
name, ext = os.path.splitext(sanitized)
|
270
|
+
sanitized = name[:255-len(ext)] + ext
|
271
|
+
|
272
|
+
return sanitized
|
273
|
+
|
274
|
+
|
275
|
+
def get_file_size_mb(file_path: Path) -> float:
|
276
|
+
"""
|
277
|
+
Get file size in megabytes.
|
278
|
+
|
279
|
+
Args:
|
280
|
+
file_path: Path to file
|
281
|
+
|
282
|
+
Returns:
|
283
|
+
File size in MB
|
284
|
+
"""
|
285
|
+
try:
|
286
|
+
size_bytes = file_path.stat().st_size
|
287
|
+
return size_bytes / (1024 * 1024)
|
288
|
+
except Exception:
|
289
|
+
return 0.0
|
290
|
+
|
291
|
+
|
292
|
+
def is_binary_file(file_path: Path) -> bool:
|
293
|
+
"""
|
294
|
+
Check if a file is binary (non-text).
|
295
|
+
|
296
|
+
Args:
|
297
|
+
file_path: Path to file
|
298
|
+
|
299
|
+
Returns:
|
300
|
+
True if file is binary
|
301
|
+
"""
|
302
|
+
try:
|
303
|
+
with open(file_path, 'rb') as f:
|
304
|
+
chunk = f.read(1024)
|
305
|
+
return b'\0' in chunk
|
306
|
+
except Exception:
|
307
|
+
return True # Assume binary if can't read
|
308
|
+
|
309
|
+
|
310
|
+
def safe_read_file(file_path: Path, encoding: str = 'utf-8') -> Optional[str]:
|
311
|
+
"""
|
312
|
+
Safely read a text file with fallback encodings.
|
313
|
+
|
314
|
+
Args:
|
315
|
+
file_path: Path to file
|
316
|
+
encoding: Primary encoding to try
|
317
|
+
|
318
|
+
Returns:
|
319
|
+
File content or None if reading fails
|
320
|
+
"""
|
321
|
+
encodings = [encoding, 'utf-8', 'latin-1', 'cp1252']
|
322
|
+
|
323
|
+
for enc in encodings:
|
324
|
+
try:
|
325
|
+
with open(file_path, 'r', encoding=enc) as f:
|
326
|
+
return f.read()
|
327
|
+
except (UnicodeDecodeError, UnicodeError):
|
328
|
+
continue
|
329
|
+
except Exception:
|
330
|
+
break
|
331
|
+
|
332
|
+
return None
|
333
|
+
|
334
|
+
|
335
|
+
def safe_write_file(file_path: Path, content: str, encoding: str = 'utf-8') -> bool:
|
336
|
+
"""
|
337
|
+
Safely write content to a file.
|
338
|
+
|
339
|
+
Args:
|
340
|
+
file_path: Path to file
|
341
|
+
content: Content to write
|
342
|
+
encoding: Encoding to use
|
343
|
+
|
344
|
+
Returns:
|
345
|
+
True if successful
|
346
|
+
"""
|
347
|
+
try:
|
348
|
+
# Create parent directories if they don't exist
|
349
|
+
file_path.parent.mkdir(parents=True, exist_ok=True)
|
350
|
+
|
351
|
+
with open(file_path, 'w', encoding=encoding) as f:
|
352
|
+
f.write(content)
|
353
|
+
return True
|
354
|
+
except Exception:
|
355
|
+
return False
|