cloudflare-images-migrator 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
src/config.py ADDED
@@ -0,0 +1,161 @@
1
+ """
2
+ Configuration management for Cloudflare Images Migration Tool
3
+ """
4
+
5
+ import os
6
+ import yaml
7
+ from pathlib import Path
8
+ from dotenv import load_dotenv
9
+ from typing import List, Optional, Dict, Any
10
+
11
+
12
+ class Config:
13
+ """Configuration manager for the migration tool."""
14
+
15
+ def __init__(self, config_file: Optional[str] = None, **kwargs):
16
+ """
17
+ Initialize configuration from various sources.
18
+
19
+ Priority order:
20
+ 1. Command line arguments (kwargs)
21
+ 2. Configuration file
22
+ 3. Environment variables
23
+ 4. Default values
24
+ """
25
+
26
+ # Load environment variables
27
+ load_dotenv()
28
+
29
+ # Default configuration
30
+ self.defaults = {
31
+ 'account_id': None,
32
+ 'api_token': None,
33
+ 'output_dir': None,
34
+ 'dry_run': False,
35
+ 'backup': True,
36
+ 'file_types': [
37
+ '.html', '.htm', '.css', '.js', '.jsx', '.ts', '.tsx',
38
+ '.md', '.json', '.xml', '.yaml', '.yml', '.scss', '.sass', '.less'
39
+ ],
40
+ 'exclude_dirs': [
41
+ 'node_modules', '.git', '.vscode', '.idea', '__pycache__',
42
+ 'venv', 'env', '.env', 'dist', 'build', 'target'
43
+ ],
44
+ 'supported_image_formats': ['.png', '.jpg', '.jpeg', '.gif', '.webp', '.svg'],
45
+ 'max_file_size_mb': 10,
46
+ 'batch_size': 10,
47
+ 'retry_count': 3,
48
+ 'timeout': 30
49
+ }
50
+
51
+ # Load configuration from file
52
+ if config_file:
53
+ self._load_config_file(config_file)
54
+
55
+ # Override with command line arguments
56
+ self._load_from_kwargs(kwargs)
57
+
58
+ # Load from environment variables
59
+ self._load_from_env()
60
+
61
+ def _load_config_file(self, config_file: str):
62
+ """Load configuration from YAML file."""
63
+ config_path = Path(config_file)
64
+ if config_path.exists():
65
+ try:
66
+ with open(config_path, 'r') as f:
67
+ file_config = yaml.safe_load(f)
68
+ if file_config:
69
+ for key, value in file_config.items():
70
+ if key in self.defaults:
71
+ setattr(self, key, value)
72
+ except Exception as e:
73
+ print(f"Warning: Could not load config file {config_file}: {e}")
74
+
75
+ def _load_from_kwargs(self, kwargs: Dict[str, Any]):
76
+ """Load configuration from command line arguments."""
77
+ for key, value in kwargs.items():
78
+ if value is not None:
79
+ if key == 'file_types' and isinstance(value, str):
80
+ value = [ext.strip() for ext in value.split(',')]
81
+ elif key == 'exclude_dirs' and isinstance(value, str):
82
+ value = [dir.strip() for dir in value.split(',')]
83
+ setattr(self, key, value)
84
+
85
+ def _load_from_env(self):
86
+ """Load configuration from environment variables."""
87
+ env_mappings = {
88
+ 'CLOUDFLARE_ACCOUNT_ID': 'account_id',
89
+ 'CLOUDFLARE_API_TOKEN': 'api_token',
90
+ 'CF_ACCOUNT_ID': 'account_id',
91
+ 'CF_API_TOKEN': 'api_token'
92
+ }
93
+
94
+ for env_var, config_key in env_mappings.items():
95
+ env_value = os.getenv(env_var)
96
+ if env_value and not hasattr(self, config_key):
97
+ setattr(self, config_key, env_value)
98
+
99
+ # Set defaults for any missing values
100
+ for key, default_value in self.defaults.items():
101
+ if not hasattr(self, key):
102
+ setattr(self, key, default_value)
103
+
104
+ def validate(self) -> bool:
105
+ """Validate configuration and return True if valid."""
106
+ errors = []
107
+
108
+ if not self.account_id:
109
+ errors.append("Cloudflare Account ID is required")
110
+
111
+ if not self.api_token:
112
+ errors.append("Cloudflare API Token is required")
113
+
114
+ if self.output_dir and not os.path.exists(os.path.dirname(self.output_dir)):
115
+ errors.append(f"Output directory parent does not exist: {self.output_dir}")
116
+
117
+ if errors:
118
+ print("Configuration errors:")
119
+ for error in errors:
120
+ print(f" - {error}")
121
+ return False
122
+
123
+ return True
124
+
125
+ def get_cloudflare_api_url(self) -> str:
126
+ """Get the Cloudflare Images API URL."""
127
+ return f"https://api.cloudflare.com/client/v4/accounts/{self.account_id}/images/v1"
128
+
129
+ def get_headers(self) -> Dict[str, str]:
130
+ """Get HTTP headers for Cloudflare API requests."""
131
+ return {
132
+ 'Authorization': f'Bearer {self.api_token}',
133
+ 'User-Agent': 'Cloudflare-Images-Migration-Tool/1.0'
134
+ }
135
+
136
+ def to_dict(self) -> Dict[str, Any]:
137
+ """Convert configuration to dictionary."""
138
+ config_dict = {}
139
+ for key in self.defaults.keys():
140
+ config_dict[key] = getattr(self, key, self.defaults[key])
141
+ return config_dict
142
+
143
+ def save_config_template(self, file_path: str):
144
+ """Save a configuration template file."""
145
+ template_config = {
146
+ 'account_id': 'your_cloudflare_account_id',
147
+ 'api_token': 'your_cloudflare_api_token',
148
+ 'file_types': self.defaults['file_types'],
149
+ 'exclude_dirs': self.defaults['exclude_dirs'],
150
+ 'dry_run': False,
151
+ 'backup': True,
152
+ 'batch_size': 10,
153
+ 'retry_count': 3,
154
+ 'timeout': 30
155
+ }
156
+
157
+ with open(file_path, 'w') as f:
158
+ yaml.dump(template_config, f, default_flow_style=False, indent=2)
159
+
160
+ print(f"Configuration template saved to: {file_path}")
161
+ print("Please edit the file with your Cloudflare credentials.")
src/image_tracker.py ADDED
@@ -0,0 +1,405 @@
1
+ """
2
+ Enterprise Image Tracking System
3
+
4
+ Provides persistent duplicate detection, comprehensive tracking,
5
+ and CSV/database management for uploaded images.
6
+ """
7
+
8
+ import sqlite3
9
+ import csv
10
+ import json
11
+ import hashlib
12
+ import time
13
+ from pathlib import Path
14
+ from typing import Dict, List, Optional, Tuple, Set
15
+ from dataclasses import dataclass, asdict
16
+ from datetime import datetime
17
+
18
+
19
+ @dataclass
20
+ class ImageRecord:
21
+ """Represents a tracked image record."""
22
+
23
+ # Core identification
24
+ original_path: str # Original file path or URL
25
+ cloudflare_id: str # Cloudflare image ID
26
+ cloudflare_url: str # Cloudflare delivery URL
27
+
28
+ # Hashing and deduplication
29
+ file_hash: Optional[str] = None # SHA256 hash of file content
30
+ url_hash: Optional[str] = None # MD5 hash of URL
31
+
32
+ # Metadata
33
+ original_filename: str = "" # Original filename
34
+ file_size_bytes: int = 0 # File size in bytes
35
+ mime_type: str = "" # MIME type
36
+ upload_timestamp: float = 0 # Unix timestamp
37
+ upload_date: str = "" # Human readable date
38
+
39
+ # Source tracking
40
+ source_project: str = "" # Project name/path
41
+ source_file: str = "" # File that referenced this image
42
+ migration_session: str = "" # Migration session ID
43
+
44
+ # Quality and optimization
45
+ was_optimized: bool = False # Whether image was optimized
46
+ original_size_bytes: int = 0 # Size before optimization
47
+ compression_ratio: float = 0.0 # Compression ratio achieved
48
+ quality_score: float = 0.0 # Quality analysis score
49
+
50
+ # Security
51
+ security_level: str = "" # Security validation level
52
+ security_issues: str = "" # JSON string of issues found
53
+
54
+ def to_dict(self) -> Dict:
55
+ """Convert to dictionary for JSON/CSV export."""
56
+ return asdict(self)
57
+
58
+
59
+ class ImageTracker:
60
+ """Enterprise image tracking system with persistent storage."""
61
+
62
+ def __init__(self, database_path: str = "cloudflare_images.db",
63
+ csv_export_path: str = "cloudflare_images.csv"):
64
+ self.database_path = Path(database_path)
65
+ self.csv_export_path = Path(csv_export_path)
66
+ self.session_id = self._generate_session_id()
67
+
68
+ # In-memory cache for fast lookups
69
+ self._hash_cache: Dict[str, ImageRecord] = {}
70
+ self._url_cache: Dict[str, ImageRecord] = {}
71
+ self._id_cache: Dict[str, ImageRecord] = {}
72
+
73
+ # Initialize database
74
+ self._init_database()
75
+
76
+ # Load existing records into cache
77
+ self._load_cache()
78
+
79
+ def _generate_session_id(self) -> str:
80
+ """Generate unique session ID for this migration run."""
81
+ timestamp = int(time.time())
82
+ return f"migration_{timestamp}_{hashlib.md5(str(timestamp).encode()).hexdigest()[:8]}"
83
+
84
+ def _init_database(self):
85
+ """Initialize SQLite database with proper schema."""
86
+ self.database_path.parent.mkdir(parents=True, exist_ok=True)
87
+
88
+ with sqlite3.connect(self.database_path) as conn:
89
+ conn.execute("""
90
+ CREATE TABLE IF NOT EXISTS uploaded_images (
91
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
92
+
93
+ -- Core identification
94
+ original_path TEXT NOT NULL,
95
+ cloudflare_id TEXT UNIQUE NOT NULL,
96
+ cloudflare_url TEXT NOT NULL,
97
+
98
+ -- Hashing and deduplication
99
+ file_hash TEXT,
100
+ url_hash TEXT,
101
+
102
+ -- Metadata
103
+ original_filename TEXT,
104
+ file_size_bytes INTEGER DEFAULT 0,
105
+ mime_type TEXT,
106
+ upload_timestamp REAL NOT NULL,
107
+ upload_date TEXT NOT NULL,
108
+
109
+ -- Source tracking
110
+ source_project TEXT,
111
+ source_file TEXT,
112
+ migration_session TEXT,
113
+
114
+ -- Quality and optimization
115
+ was_optimized BOOLEAN DEFAULT FALSE,
116
+ original_size_bytes INTEGER DEFAULT 0,
117
+ compression_ratio REAL DEFAULT 0.0,
118
+ quality_score REAL DEFAULT 0.0,
119
+
120
+ -- Security
121
+ security_level TEXT,
122
+ security_issues TEXT,
123
+
124
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
125
+ updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
126
+ )
127
+ """)
128
+
129
+ # Create indexes for fast lookups
130
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_file_hash ON uploaded_images(file_hash)")
131
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_url_hash ON uploaded_images(url_hash)")
132
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_cloudflare_id ON uploaded_images(cloudflare_id)")
133
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_original_path ON uploaded_images(original_path)")
134
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_session ON uploaded_images(migration_session)")
135
+
136
+ conn.commit()
137
+
138
+ def _load_cache(self):
139
+ """Load existing records into memory cache for fast lookups."""
140
+ with sqlite3.connect(self.database_path) as conn:
141
+ conn.row_factory = sqlite3.Row
142
+ cursor = conn.execute("SELECT * FROM uploaded_images")
143
+
144
+ for row in cursor:
145
+ record = ImageRecord(
146
+ original_path=row['original_path'],
147
+ cloudflare_id=row['cloudflare_id'],
148
+ cloudflare_url=row['cloudflare_url'],
149
+ file_hash=row['file_hash'],
150
+ url_hash=row['url_hash'],
151
+ original_filename=row['original_filename'] or "",
152
+ file_size_bytes=row['file_size_bytes'] or 0,
153
+ mime_type=row['mime_type'] or "",
154
+ upload_timestamp=row['upload_timestamp'],
155
+ upload_date=row['upload_date'],
156
+ source_project=row['source_project'] or "",
157
+ source_file=row['source_file'] or "",
158
+ migration_session=row['migration_session'] or "",
159
+ was_optimized=bool(row['was_optimized']),
160
+ original_size_bytes=row['original_size_bytes'] or 0,
161
+ compression_ratio=row['compression_ratio'] or 0.0,
162
+ quality_score=row['quality_score'] or 0.0,
163
+ security_level=row['security_level'] or "",
164
+ security_issues=row['security_issues'] or ""
165
+ )
166
+
167
+ # Cache by different keys for fast lookups
168
+ if record.file_hash:
169
+ self._hash_cache[record.file_hash] = record
170
+ if record.url_hash:
171
+ self._url_cache[record.url_hash] = record
172
+ self._id_cache[record.cloudflare_id] = record
173
+
174
+ def check_duplicate_by_hash(self, file_hash: str) -> Optional[ImageRecord]:
175
+ """Check if an image with this hash already exists."""
176
+ return self._hash_cache.get(file_hash)
177
+
178
+ def check_duplicate_by_url(self, url: str) -> Optional[ImageRecord]:
179
+ """Check if an image from this URL already exists."""
180
+ url_hash = hashlib.md5(url.encode()).hexdigest()
181
+ return self._url_cache.get(url_hash)
182
+
183
+ def check_duplicate_by_path(self, path: str) -> Optional[ImageRecord]:
184
+ """Check if an image with this exact path already exists."""
185
+ with sqlite3.connect(self.database_path) as conn:
186
+ conn.row_factory = sqlite3.Row
187
+ cursor = conn.execute(
188
+ "SELECT * FROM uploaded_images WHERE original_path = ? LIMIT 1",
189
+ (path,)
190
+ )
191
+ row = cursor.fetchone()
192
+ if row:
193
+ return ImageRecord(
194
+ original_path=row['original_path'],
195
+ cloudflare_id=row['cloudflare_id'],
196
+ cloudflare_url=row['cloudflare_url'],
197
+ file_hash=row['file_hash'],
198
+ url_hash=row['url_hash'],
199
+ original_filename=row['original_filename'] or "",
200
+ file_size_bytes=row['file_size_bytes'] or 0,
201
+ mime_type=row['mime_type'] or "",
202
+ upload_timestamp=row['upload_timestamp'],
203
+ upload_date=row['upload_date'],
204
+ source_project=row['source_project'] or "",
205
+ source_file=row['source_file'] or "",
206
+ migration_session=row['migration_session'] or "",
207
+ was_optimized=bool(row['was_optimized']),
208
+ original_size_bytes=row['original_size_bytes'] or 0,
209
+ compression_ratio=row['compression_ratio'] or 0.0,
210
+ quality_score=row['quality_score'] or 0.0,
211
+ security_level=row['security_level'] or "",
212
+ security_issues=row['security_issues'] or ""
213
+ )
214
+ return None
215
+
216
+ def add_image_record(self, record: ImageRecord) -> bool:
217
+ """Add a new image record to the tracking system."""
218
+ try:
219
+ # Ensure timestamp and date are set
220
+ if not record.upload_timestamp:
221
+ record.upload_timestamp = time.time()
222
+ if not record.upload_date:
223
+ record.upload_date = datetime.fromtimestamp(record.upload_timestamp).isoformat()
224
+ if not record.migration_session:
225
+ record.migration_session = self.session_id
226
+
227
+ with sqlite3.connect(self.database_path) as conn:
228
+ conn.execute("""
229
+ INSERT OR REPLACE INTO uploaded_images (
230
+ original_path, cloudflare_id, cloudflare_url,
231
+ file_hash, url_hash, original_filename, file_size_bytes,
232
+ mime_type, upload_timestamp, upload_date,
233
+ source_project, source_file, migration_session,
234
+ was_optimized, original_size_bytes, compression_ratio,
235
+ quality_score, security_level, security_issues
236
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
237
+ """, (
238
+ record.original_path, record.cloudflare_id, record.cloudflare_url,
239
+ record.file_hash, record.url_hash, record.original_filename,
240
+ record.file_size_bytes, record.mime_type, record.upload_timestamp,
241
+ record.upload_date, record.source_project, record.source_file,
242
+ record.migration_session, record.was_optimized, record.original_size_bytes,
243
+ record.compression_ratio, record.quality_score, record.security_level,
244
+ record.security_issues
245
+ ))
246
+ conn.commit()
247
+
248
+ # Update caches
249
+ if record.file_hash:
250
+ self._hash_cache[record.file_hash] = record
251
+ if record.url_hash:
252
+ self._url_cache[record.url_hash] = record
253
+ self._id_cache[record.cloudflare_id] = record
254
+
255
+ return True
256
+
257
+ except Exception as e:
258
+ print(f"Error adding image record: {e}")
259
+ return False
260
+
261
+ def get_all_records(self) -> List[ImageRecord]:
262
+ """Get all image records from the database."""
263
+ records = []
264
+ with sqlite3.connect(self.database_path) as conn:
265
+ conn.row_factory = sqlite3.Row
266
+ cursor = conn.execute("SELECT * FROM uploaded_images ORDER BY upload_timestamp DESC")
267
+
268
+ for row in cursor:
269
+ record = ImageRecord(
270
+ original_path=row['original_path'],
271
+ cloudflare_id=row['cloudflare_id'],
272
+ cloudflare_url=row['cloudflare_url'],
273
+ file_hash=row['file_hash'],
274
+ url_hash=row['url_hash'],
275
+ original_filename=row['original_filename'] or "",
276
+ file_size_bytes=row['file_size_bytes'] or 0,
277
+ mime_type=row['mime_type'] or "",
278
+ upload_timestamp=row['upload_timestamp'],
279
+ upload_date=row['upload_date'],
280
+ source_project=row['source_project'] or "",
281
+ source_file=row['source_file'] or "",
282
+ migration_session=row['migration_session'] or "",
283
+ was_optimized=bool(row['was_optimized']),
284
+ original_size_bytes=row['original_size_bytes'] or 0,
285
+ compression_ratio=row['compression_ratio'] or 0.0,
286
+ quality_score=row['quality_score'] or 0.0,
287
+ security_level=row['security_level'] or "",
288
+ security_issues=row['security_issues'] or ""
289
+ )
290
+ records.append(record)
291
+
292
+ return records
293
+
294
+ def export_to_csv(self, include_session_only: bool = False) -> bool:
295
+ """Export image records to CSV file."""
296
+ try:
297
+ records = self.get_all_records()
298
+
299
+ if include_session_only:
300
+ records = [r for r in records if r.migration_session == self.session_id]
301
+
302
+ if not records:
303
+ return True
304
+
305
+ # Ensure parent directory exists
306
+ self.csv_export_path.parent.mkdir(parents=True, exist_ok=True)
307
+
308
+ with open(self.csv_export_path, 'w', newline='', encoding='utf-8') as csvfile:
309
+ fieldnames = [
310
+ 'original_path', 'cloudflare_id', 'cloudflare_url',
311
+ 'file_hash', 'url_hash', 'original_filename',
312
+ 'file_size_bytes', 'mime_type', 'upload_timestamp',
313
+ 'upload_date', 'source_project', 'source_file',
314
+ 'migration_session', 'was_optimized', 'original_size_bytes',
315
+ 'compression_ratio', 'quality_score', 'security_level',
316
+ 'security_issues'
317
+ ]
318
+
319
+ writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
320
+ writer.writeheader()
321
+
322
+ for record in records:
323
+ writer.writerow(record.to_dict())
324
+
325
+ return True
326
+
327
+ except Exception as e:
328
+ print(f"Error exporting to CSV: {e}")
329
+ return False
330
+
331
+ def get_statistics(self) -> Dict:
332
+ """Get comprehensive statistics about tracked images."""
333
+ with sqlite3.connect(self.database_path) as conn:
334
+ cursor = conn.cursor()
335
+
336
+ # Basic counts
337
+ cursor.execute("SELECT COUNT(*) FROM uploaded_images")
338
+ total_images = cursor.fetchone()[0]
339
+
340
+ cursor.execute("SELECT COUNT(*) FROM uploaded_images WHERE migration_session = ?", (self.session_id,))
341
+ session_images = cursor.fetchone()[0]
342
+
343
+ cursor.execute("SELECT COUNT(DISTINCT migration_session) FROM uploaded_images")
344
+ total_sessions = cursor.fetchone()[0]
345
+
346
+ cursor.execute("SELECT COUNT(*) FROM uploaded_images WHERE was_optimized = 1")
347
+ optimized_images = cursor.fetchone()[0]
348
+
349
+ # Size statistics
350
+ cursor.execute("SELECT SUM(file_size_bytes), AVG(file_size_bytes) FROM uploaded_images")
351
+ size_stats = cursor.fetchone()
352
+ total_size = size_stats[0] or 0
353
+ avg_size = size_stats[1] or 0
354
+
355
+ # Compression statistics
356
+ cursor.execute("SELECT AVG(compression_ratio) FROM uploaded_images WHERE compression_ratio > 0")
357
+ avg_compression = cursor.fetchone()[0] or 0
358
+
359
+ # Recent activity
360
+ cursor.execute("""
361
+ SELECT COUNT(*) FROM uploaded_images
362
+ WHERE upload_timestamp > ?
363
+ """, (time.time() - 86400,)) # Last 24 hours
364
+ recent_uploads = cursor.fetchone()[0]
365
+
366
+ return {
367
+ 'total_images': total_images,
368
+ 'session_images': session_images,
369
+ 'total_sessions': total_sessions,
370
+ 'optimized_images': optimized_images,
371
+ 'total_size_mb': round(total_size / (1024 * 1024), 2),
372
+ 'average_size_mb': round(avg_size / (1024 * 1024), 2),
373
+ 'average_compression_ratio': round(avg_compression, 3),
374
+ 'recent_uploads_24h': recent_uploads,
375
+ 'current_session_id': self.session_id
376
+ }
377
+
378
+ def cleanup_old_sessions(self, keep_sessions: int = 10):
379
+ """Clean up old migration session data, keeping only the most recent ones."""
380
+ with sqlite3.connect(self.database_path) as conn:
381
+ # Get session IDs ordered by newest first
382
+ cursor = conn.execute("""
383
+ SELECT DISTINCT migration_session
384
+ FROM uploaded_images
385
+ ORDER BY MAX(upload_timestamp) DESC
386
+ """)
387
+ sessions = [row[0] for row in cursor.fetchall()]
388
+
389
+ if len(sessions) > keep_sessions:
390
+ sessions_to_delete = sessions[keep_sessions:]
391
+
392
+ # Delete old sessions
393
+ placeholders = ','.join(['?'] * len(sessions_to_delete))
394
+ conn.execute(f"""
395
+ DELETE FROM uploaded_images
396
+ WHERE migration_session IN ({placeholders})
397
+ """, sessions_to_delete)
398
+
399
+ conn.commit()
400
+
401
+ # Reload cache
402
+ self._hash_cache.clear()
403
+ self._url_cache.clear()
404
+ self._id_cache.clear()
405
+ self._load_cache()