cloudflare-images-migrator 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
src/utils.py ADDED
@@ -0,0 +1,355 @@
1
+ """
2
+ Utility functions for Cloudflare Images Migration Tool
3
+ """
4
+
5
+ import os
6
+ import re
7
+ import shutil
8
+ import zipfile
9
+ import tempfile
10
+ import hashlib
11
+ import validators
12
+ from pathlib import Path
13
+ from typing import List, Optional, Tuple, Set
14
+ from urllib.parse import urlparse, urljoin
15
+
16
+
17
+ def validate_path(path: str) -> Path:
18
+ """
19
+ Validate and normalize a file or directory path.
20
+
21
+ Args:
22
+ path: Path to validate
23
+
24
+ Returns:
25
+ Validated Path object
26
+
27
+ Raises:
28
+ ValueError: If path is invalid
29
+ """
30
+ path_obj = Path(path).resolve()
31
+
32
+ if not path_obj.exists():
33
+ raise ValueError(f"Path does not exist: {path}")
34
+
35
+ return path_obj
36
+
37
+
38
+ def is_zip_file(path: Path) -> bool:
39
+ """Check if a file is a valid zip archive."""
40
+ try:
41
+ with zipfile.ZipFile(path, 'r') as zip_file:
42
+ zip_file.testzip()
43
+ return True
44
+ except (zipfile.BadZipFile, FileNotFoundError):
45
+ return False
46
+
47
+
48
+ def extract_zip(zip_path: Path, extract_to: Optional[Path] = None) -> Path:
49
+ """
50
+ Extract a zip file to a temporary or specified directory.
51
+
52
+ Args:
53
+ zip_path: Path to zip file
54
+ extract_to: Directory to extract to (optional)
55
+
56
+ Returns:
57
+ Path to extracted directory
58
+ """
59
+ if extract_to is None:
60
+ extract_to = Path(tempfile.mkdtemp(prefix="cf_images_"))
61
+
62
+ extract_to.mkdir(parents=True, exist_ok=True)
63
+
64
+ with zipfile.ZipFile(zip_path, 'r') as zip_file:
65
+ zip_file.extractall(extract_to)
66
+
67
+ return extract_to
68
+
69
+
70
+ def create_backup(source_path: Path, backup_suffix: str = "_backup") -> Path:
71
+ """
72
+ Create a backup of the source directory or file.
73
+
74
+ Args:
75
+ source_path: Path to backup
76
+ backup_suffix: Suffix to add to backup name
77
+
78
+ Returns:
79
+ Path to backup
80
+ """
81
+ backup_path = source_path.parent / f"{source_path.name}{backup_suffix}"
82
+
83
+ # Ensure unique backup name
84
+ counter = 1
85
+ while backup_path.exists():
86
+ backup_path = source_path.parent / f"{source_path.name}{backup_suffix}_{counter}"
87
+ counter += 1
88
+
89
+ if source_path.is_dir():
90
+ shutil.copytree(source_path, backup_path)
91
+ else:
92
+ shutil.copy2(source_path, backup_path)
93
+
94
+ return backup_path
95
+
96
+
97
+ def get_file_hash(file_path: Path) -> str:
98
+ """
99
+ Calculate MD5 hash of a file.
100
+
101
+ Args:
102
+ file_path: Path to file
103
+
104
+ Returns:
105
+ MD5 hash as hex string
106
+ """
107
+ hash_md5 = hashlib.md5()
108
+ try:
109
+ with open(file_path, "rb") as f:
110
+ for chunk in iter(lambda: f.read(4096), b""):
111
+ hash_md5.update(chunk)
112
+ return hash_md5.hexdigest()
113
+ except Exception:
114
+ return ""
115
+
116
+
117
+ def is_image_file(file_path: Path, supported_formats: List[str]) -> bool:
118
+ """
119
+ Check if a file is an image based on its extension.
120
+
121
+ Args:
122
+ file_path: Path to file
123
+ supported_formats: List of supported image extensions
124
+
125
+ Returns:
126
+ True if file is an image
127
+ """
128
+ return file_path.suffix.lower() in [fmt.lower() for fmt in supported_formats]
129
+
130
+
131
+ def is_url(string: str) -> bool:
132
+ """
133
+ Check if a string is a valid URL.
134
+
135
+ Args:
136
+ string: String to check
137
+
138
+ Returns:
139
+ True if string is a valid URL
140
+ """
141
+ return validators.url(string) is True
142
+
143
+
144
+ def normalize_path(path: str, base_path: Path = None) -> str:
145
+ """
146
+ Normalize a file path for consistent handling.
147
+
148
+ Args:
149
+ path: Path to normalize
150
+ base_path: Base path for relative paths
151
+
152
+ Returns:
153
+ Normalized path string
154
+ """
155
+ if is_url(path):
156
+ return path
157
+
158
+ # Remove quotes and whitespace
159
+ path = path.strip().strip('"\'')
160
+
161
+ # Handle relative paths
162
+ if base_path and not os.path.isabs(path):
163
+ normalized = str((base_path / path).resolve())
164
+ else:
165
+ normalized = str(Path(path).resolve())
166
+
167
+ return normalized
168
+
169
+
170
+ def get_relative_path(file_path: Path, base_path: Path) -> str:
171
+ """
172
+ Get relative path from base path to file path.
173
+
174
+ Args:
175
+ file_path: Target file path
176
+ base_path: Base directory path
177
+
178
+ Returns:
179
+ Relative path string
180
+ """
181
+ try:
182
+ return str(file_path.relative_to(base_path))
183
+ except ValueError:
184
+ return str(file_path)
185
+
186
+
187
+ def find_files_by_extension(directory: Path, extensions: List[str],
188
+ exclude_dirs: List[str] = None) -> List[Path]:
189
+ """
190
+ Find all files with specified extensions in a directory tree.
191
+
192
+ Args:
193
+ directory: Directory to search
194
+ extensions: List of file extensions to match
195
+ exclude_dirs: List of directory names to exclude
196
+
197
+ Returns:
198
+ List of matching file paths
199
+ """
200
+ if exclude_dirs is None:
201
+ exclude_dirs = []
202
+
203
+ exclude_dirs = [d.lower() for d in exclude_dirs]
204
+ extensions = [ext.lower() for ext in extensions]
205
+
206
+ found_files = []
207
+
208
+ for root, dirs, files in os.walk(directory):
209
+ # Remove excluded directories from dirs list to skip them
210
+ dirs[:] = [d for d in dirs if d.lower() not in exclude_dirs]
211
+
212
+ for file in files:
213
+ file_path = Path(root) / file
214
+ if file_path.suffix.lower() in extensions:
215
+ found_files.append(file_path)
216
+
217
+ return found_files
218
+
219
+
220
+ def extract_domain(url: str) -> str:
221
+ """
222
+ Extract domain from URL.
223
+
224
+ Args:
225
+ url: URL string
226
+
227
+ Returns:
228
+ Domain name
229
+ """
230
+ try:
231
+ parsed = urlparse(url)
232
+ return parsed.netloc
233
+ except Exception:
234
+ return ""
235
+
236
+
237
+ def make_cloudflare_url(image_id: str, variant: str = "public") -> str:
238
+ """
239
+ Generate Cloudflare Images delivery URL.
240
+
241
+ Args:
242
+ image_id: Cloudflare image ID
243
+ variant: Image variant name
244
+
245
+ Returns:
246
+ Cloudflare delivery URL
247
+ """
248
+ return f"https://imagedelivery.net/{image_id}/{variant}"
249
+
250
+
251
+ def sanitize_filename(filename: str) -> str:
252
+ """
253
+ Sanitize filename for safe use across different file systems.
254
+
255
+ Args:
256
+ filename: Original filename
257
+
258
+ Returns:
259
+ Sanitized filename
260
+ """
261
+ # Remove or replace invalid characters
262
+ sanitized = re.sub(r'[<>:"/\\|?*]', '_', filename)
263
+
264
+ # Remove leading/trailing whitespace and dots
265
+ sanitized = sanitized.strip(' .')
266
+
267
+ # Limit length
268
+ if len(sanitized) > 255:
269
+ name, ext = os.path.splitext(sanitized)
270
+ sanitized = name[:255-len(ext)] + ext
271
+
272
+ return sanitized
273
+
274
+
275
+ def get_file_size_mb(file_path: Path) -> float:
276
+ """
277
+ Get file size in megabytes.
278
+
279
+ Args:
280
+ file_path: Path to file
281
+
282
+ Returns:
283
+ File size in MB
284
+ """
285
+ try:
286
+ size_bytes = file_path.stat().st_size
287
+ return size_bytes / (1024 * 1024)
288
+ except Exception:
289
+ return 0.0
290
+
291
+
292
+ def is_binary_file(file_path: Path) -> bool:
293
+ """
294
+ Check if a file is binary (non-text).
295
+
296
+ Args:
297
+ file_path: Path to file
298
+
299
+ Returns:
300
+ True if file is binary
301
+ """
302
+ try:
303
+ with open(file_path, 'rb') as f:
304
+ chunk = f.read(1024)
305
+ return b'\0' in chunk
306
+ except Exception:
307
+ return True # Assume binary if can't read
308
+
309
+
310
+ def safe_read_file(file_path: Path, encoding: str = 'utf-8') -> Optional[str]:
311
+ """
312
+ Safely read a text file with fallback encodings.
313
+
314
+ Args:
315
+ file_path: Path to file
316
+ encoding: Primary encoding to try
317
+
318
+ Returns:
319
+ File content or None if reading fails
320
+ """
321
+ encodings = [encoding, 'utf-8', 'latin-1', 'cp1252']
322
+
323
+ for enc in encodings:
324
+ try:
325
+ with open(file_path, 'r', encoding=enc) as f:
326
+ return f.read()
327
+ except (UnicodeDecodeError, UnicodeError):
328
+ continue
329
+ except Exception:
330
+ break
331
+
332
+ return None
333
+
334
+
335
+ def safe_write_file(file_path: Path, content: str, encoding: str = 'utf-8') -> bool:
336
+ """
337
+ Safely write content to a file.
338
+
339
+ Args:
340
+ file_path: Path to file
341
+ content: Content to write
342
+ encoding: Encoding to use
343
+
344
+ Returns:
345
+ True if successful
346
+ """
347
+ try:
348
+ # Create parent directories if they don't exist
349
+ file_path.parent.mkdir(parents=True, exist_ok=True)
350
+
351
+ with open(file_path, 'w', encoding=encoding) as f:
352
+ f.write(content)
353
+ return True
354
+ except Exception:
355
+ return False