auto-coder 0.1.396__py3-none-any.whl → 0.1.398__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of auto-coder might be problematic. Click here for more details.
- {auto_coder-0.1.396.dist-info → auto_coder-0.1.398.dist-info}/METADATA +2 -2
- {auto_coder-0.1.396.dist-info → auto_coder-0.1.398.dist-info}/RECORD +31 -12
- autocoder/auto_coder_rag.py +1 -0
- autocoder/chat_auto_coder.py +3 -0
- autocoder/common/conversations/__init__.py +84 -39
- autocoder/common/conversations/backup/__init__.py +14 -0
- autocoder/common/conversations/backup/backup_manager.py +564 -0
- autocoder/common/conversations/backup/restore_manager.py +546 -0
- autocoder/common/conversations/cache/__init__.py +16 -0
- autocoder/common/conversations/cache/base_cache.py +89 -0
- autocoder/common/conversations/cache/cache_manager.py +368 -0
- autocoder/common/conversations/cache/memory_cache.py +224 -0
- autocoder/common/conversations/config.py +195 -0
- autocoder/common/conversations/exceptions.py +72 -0
- autocoder/common/conversations/file_locker.py +145 -0
- autocoder/common/conversations/manager.py +917 -0
- autocoder/common/conversations/models.py +154 -0
- autocoder/common/conversations/search/__init__.py +15 -0
- autocoder/common/conversations/search/filter_manager.py +431 -0
- autocoder/common/conversations/search/text_searcher.py +366 -0
- autocoder/common/conversations/storage/__init__.py +16 -0
- autocoder/common/conversations/storage/base_storage.py +82 -0
- autocoder/common/conversations/storage/file_storage.py +267 -0
- autocoder/common/conversations/storage/index_manager.py +317 -0
- autocoder/common/rag_manager/rag_manager.py +16 -18
- autocoder/rags.py +74 -24
- autocoder/version.py +1 -1
- {auto_coder-0.1.396.dist-info → auto_coder-0.1.398.dist-info}/LICENSE +0 -0
- {auto_coder-0.1.396.dist-info → auto_coder-0.1.398.dist-info}/WHEEL +0 -0
- {auto_coder-0.1.396.dist-info → auto_coder-0.1.398.dist-info}/entry_points.txt +0 -0
- {auto_coder-0.1.396.dist-info → auto_coder-0.1.398.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,564 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Backup manager for conversation data.
|
|
3
|
+
|
|
4
|
+
This module provides functionality to create, manage, and schedule backups
|
|
5
|
+
of conversation data, supporting both incremental and full backup strategies.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import os
|
|
9
|
+
import json
|
|
10
|
+
import shutil
|
|
11
|
+
import hashlib
|
|
12
|
+
import threading
|
|
13
|
+
from datetime import datetime, timedelta
|
|
14
|
+
from typing import Dict, List, Optional, Set, Tuple
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from dataclasses import dataclass, asdict
|
|
17
|
+
|
|
18
|
+
from ..exceptions import BackupError, ConversationManagerError
|
|
19
|
+
from ..config import ConversationManagerConfig
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class BackupMetadata:
|
|
24
|
+
"""Backup metadata information."""
|
|
25
|
+
backup_id: str
|
|
26
|
+
backup_type: str # 'full' or 'incremental'
|
|
27
|
+
timestamp: float
|
|
28
|
+
created_at: str
|
|
29
|
+
base_backup_id: Optional[str] = None # For incremental backups
|
|
30
|
+
conversation_ids: Optional[List[str]] = None
|
|
31
|
+
file_count: int = 0
|
|
32
|
+
total_size_bytes: int = 0
|
|
33
|
+
checksum: Optional[str] = None
|
|
34
|
+
description: Optional[str] = None
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class BackupManager:
|
|
38
|
+
"""
|
|
39
|
+
Manages backup operations for conversation data.
|
|
40
|
+
|
|
41
|
+
Supports both full and incremental backups, with automatic scheduling
|
|
42
|
+
and retention management.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
def __init__(self, config: ConversationManagerConfig):
|
|
46
|
+
"""
|
|
47
|
+
Initialize backup manager.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
config: Configuration object containing backup settings
|
|
51
|
+
"""
|
|
52
|
+
self.config = config
|
|
53
|
+
self.storage_path = Path(config.storage_path)
|
|
54
|
+
self.backup_path = self.storage_path / "backups"
|
|
55
|
+
self.temp_path = self.storage_path / "temp"
|
|
56
|
+
|
|
57
|
+
# Ensure backup directories exist
|
|
58
|
+
self.backup_path.mkdir(parents=True, exist_ok=True)
|
|
59
|
+
self.temp_path.mkdir(parents=True, exist_ok=True)
|
|
60
|
+
|
|
61
|
+
# Backup metadata file
|
|
62
|
+
self.metadata_file = self.backup_path / "backup_metadata.json"
|
|
63
|
+
|
|
64
|
+
# Thread lock for backup operations
|
|
65
|
+
self._backup_lock = threading.Lock()
|
|
66
|
+
|
|
67
|
+
# Load existing backup metadata
|
|
68
|
+
self._metadata: Dict[str, BackupMetadata] = self._load_metadata()
|
|
69
|
+
|
|
70
|
+
# Track last backup timestamps
|
|
71
|
+
self._last_full_backup: Optional[float] = None
|
|
72
|
+
self._last_incremental_backup: Optional[float] = None
|
|
73
|
+
self._update_backup_timestamps()
|
|
74
|
+
|
|
75
|
+
def create_full_backup(self, description: Optional[str] = None) -> str:
|
|
76
|
+
"""
|
|
77
|
+
Create a full backup of all conversation data.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
description: Optional description for the backup
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
Backup ID of the created backup
|
|
84
|
+
|
|
85
|
+
Raises:
|
|
86
|
+
BackupError: If backup creation fails
|
|
87
|
+
"""
|
|
88
|
+
with self._backup_lock:
|
|
89
|
+
try:
|
|
90
|
+
backup_id = self._generate_backup_id("full")
|
|
91
|
+
backup_dir = self.backup_path / backup_id
|
|
92
|
+
backup_dir.mkdir(parents=True, exist_ok=True)
|
|
93
|
+
|
|
94
|
+
# Get all conversation files
|
|
95
|
+
conversations_dir = self.storage_path / "conversations"
|
|
96
|
+
if not conversations_dir.exists():
|
|
97
|
+
# Create empty backup if no conversations exist
|
|
98
|
+
conversation_files = []
|
|
99
|
+
else:
|
|
100
|
+
conversation_files = list(conversations_dir.glob("*.json"))
|
|
101
|
+
|
|
102
|
+
# Copy conversation files to backup directory
|
|
103
|
+
copied_files = []
|
|
104
|
+
total_size = 0
|
|
105
|
+
|
|
106
|
+
for conv_file in conversation_files:
|
|
107
|
+
if conv_file.name.endswith('.lock'):
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
dest_file = backup_dir / conv_file.name
|
|
111
|
+
shutil.copy2(conv_file, dest_file)
|
|
112
|
+
copied_files.append(conv_file.name)
|
|
113
|
+
total_size += conv_file.stat().st_size
|
|
114
|
+
|
|
115
|
+
# Copy index files
|
|
116
|
+
index_dir = self.storage_path / "index"
|
|
117
|
+
if index_dir.exists():
|
|
118
|
+
backup_index_dir = backup_dir / "index"
|
|
119
|
+
backup_index_dir.mkdir(parents=True, exist_ok=True)
|
|
120
|
+
|
|
121
|
+
for index_file in index_dir.glob("*.json"):
|
|
122
|
+
dest_file = backup_index_dir / index_file.name
|
|
123
|
+
shutil.copy2(index_file, dest_file)
|
|
124
|
+
total_size += index_file.stat().st_size
|
|
125
|
+
|
|
126
|
+
# Calculate backup checksum
|
|
127
|
+
checksum = self._calculate_backup_checksum(backup_dir)
|
|
128
|
+
|
|
129
|
+
# Create backup metadata
|
|
130
|
+
metadata = BackupMetadata(
|
|
131
|
+
backup_id=backup_id,
|
|
132
|
+
backup_type="full",
|
|
133
|
+
timestamp=datetime.now().timestamp(),
|
|
134
|
+
created_at=datetime.now().isoformat(),
|
|
135
|
+
conversation_ids=[f.stem for f in conversation_files],
|
|
136
|
+
file_count=len(copied_files),
|
|
137
|
+
total_size_bytes=total_size,
|
|
138
|
+
checksum=checksum,
|
|
139
|
+
description=description
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
# Save metadata
|
|
143
|
+
self._metadata[backup_id] = metadata
|
|
144
|
+
self._save_metadata()
|
|
145
|
+
|
|
146
|
+
# Update last backup timestamp
|
|
147
|
+
self._last_full_backup = metadata.timestamp
|
|
148
|
+
|
|
149
|
+
# Clean up old backups if necessary
|
|
150
|
+
self._cleanup_old_backups()
|
|
151
|
+
|
|
152
|
+
return backup_id
|
|
153
|
+
|
|
154
|
+
except Exception as e:
|
|
155
|
+
# Clean up partial backup on failure
|
|
156
|
+
backup_dir = self.backup_path / backup_id
|
|
157
|
+
if backup_dir.exists():
|
|
158
|
+
shutil.rmtree(backup_dir, ignore_errors=True)
|
|
159
|
+
raise BackupError(f"Failed to create full backup: {str(e)}") from e
|
|
160
|
+
|
|
161
|
+
def create_incremental_backup(
|
|
162
|
+
self,
|
|
163
|
+
base_backup_id: Optional[str] = None,
|
|
164
|
+
description: Optional[str] = None
|
|
165
|
+
) -> str:
|
|
166
|
+
"""
|
|
167
|
+
Create an incremental backup based on changes since the last backup.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
base_backup_id: Base backup ID to compare against. If None, uses latest full backup
|
|
171
|
+
description: Optional description for the backup
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
Backup ID of the created incremental backup
|
|
175
|
+
|
|
176
|
+
Raises:
|
|
177
|
+
BackupError: If incremental backup creation fails
|
|
178
|
+
"""
|
|
179
|
+
with self._backup_lock:
|
|
180
|
+
try:
|
|
181
|
+
# Find base backup
|
|
182
|
+
if base_backup_id is None:
|
|
183
|
+
base_backup_id = self._get_latest_full_backup_id()
|
|
184
|
+
if base_backup_id is None:
|
|
185
|
+
raise BackupError("No full backup found for incremental backup")
|
|
186
|
+
|
|
187
|
+
if base_backup_id not in self._metadata:
|
|
188
|
+
raise BackupError(f"Base backup {base_backup_id} not found")
|
|
189
|
+
|
|
190
|
+
base_metadata = self._metadata[base_backup_id]
|
|
191
|
+
base_timestamp = base_metadata.timestamp
|
|
192
|
+
|
|
193
|
+
# Find changed files since base backup
|
|
194
|
+
changed_files = self._find_changed_files_since(base_timestamp)
|
|
195
|
+
|
|
196
|
+
if not changed_files:
|
|
197
|
+
# No changes, create empty incremental backup
|
|
198
|
+
pass
|
|
199
|
+
|
|
200
|
+
backup_id = self._generate_backup_id("incremental")
|
|
201
|
+
backup_dir = self.backup_path / backup_id
|
|
202
|
+
backup_dir.mkdir(parents=True, exist_ok=True)
|
|
203
|
+
|
|
204
|
+
# Copy changed files
|
|
205
|
+
copied_files = []
|
|
206
|
+
total_size = 0
|
|
207
|
+
|
|
208
|
+
for file_path in changed_files:
|
|
209
|
+
rel_path = file_path.relative_to(self.storage_path)
|
|
210
|
+
dest_file = backup_dir / rel_path
|
|
211
|
+
dest_file.parent.mkdir(parents=True, exist_ok=True)
|
|
212
|
+
|
|
213
|
+
shutil.copy2(file_path, dest_file)
|
|
214
|
+
copied_files.append(str(rel_path))
|
|
215
|
+
total_size += file_path.stat().st_size
|
|
216
|
+
|
|
217
|
+
# Calculate backup checksum
|
|
218
|
+
checksum = self._calculate_backup_checksum(backup_dir)
|
|
219
|
+
|
|
220
|
+
# Create backup metadata
|
|
221
|
+
metadata = BackupMetadata(
|
|
222
|
+
backup_id=backup_id,
|
|
223
|
+
backup_type="incremental",
|
|
224
|
+
timestamp=datetime.now().timestamp(),
|
|
225
|
+
created_at=datetime.now().isoformat(),
|
|
226
|
+
base_backup_id=base_backup_id,
|
|
227
|
+
conversation_ids=self._extract_conversation_ids_from_files(copied_files),
|
|
228
|
+
file_count=len(copied_files),
|
|
229
|
+
total_size_bytes=total_size,
|
|
230
|
+
checksum=checksum,
|
|
231
|
+
description=description
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
# Save metadata
|
|
235
|
+
self._metadata[backup_id] = metadata
|
|
236
|
+
self._save_metadata()
|
|
237
|
+
|
|
238
|
+
# Update last backup timestamp
|
|
239
|
+
self._last_incremental_backup = metadata.timestamp
|
|
240
|
+
|
|
241
|
+
return backup_id
|
|
242
|
+
|
|
243
|
+
except Exception as e:
|
|
244
|
+
# Clean up partial backup on failure
|
|
245
|
+
try:
|
|
246
|
+
backup_dir = self.backup_path / backup_id
|
|
247
|
+
if backup_dir.exists():
|
|
248
|
+
shutil.rmtree(backup_dir, ignore_errors=True)
|
|
249
|
+
except NameError:
|
|
250
|
+
# backup_id not defined yet, no cleanup needed
|
|
251
|
+
pass
|
|
252
|
+
raise BackupError(f"Failed to create incremental backup: {str(e)}") from e
|
|
253
|
+
|
|
254
|
+
def list_backups(self) -> List[BackupMetadata]:
|
|
255
|
+
"""
|
|
256
|
+
List all available backups.
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
List of backup metadata, sorted by timestamp (newest first)
|
|
260
|
+
"""
|
|
261
|
+
backups = list(self._metadata.values())
|
|
262
|
+
return sorted(backups, key=lambda x: x.timestamp, reverse=True)
|
|
263
|
+
|
|
264
|
+
def get_backup_metadata(self, backup_id: str) -> Optional[BackupMetadata]:
|
|
265
|
+
"""
|
|
266
|
+
Get metadata for a specific backup.
|
|
267
|
+
|
|
268
|
+
Args:
|
|
269
|
+
backup_id: ID of the backup
|
|
270
|
+
|
|
271
|
+
Returns:
|
|
272
|
+
Backup metadata or None if not found
|
|
273
|
+
"""
|
|
274
|
+
return self._metadata.get(backup_id)
|
|
275
|
+
|
|
276
|
+
def delete_backup(self, backup_id: str) -> bool:
|
|
277
|
+
"""
|
|
278
|
+
Delete a specific backup.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
backup_id: ID of the backup to delete
|
|
282
|
+
|
|
283
|
+
Returns:
|
|
284
|
+
True if backup was deleted, False if not found
|
|
285
|
+
|
|
286
|
+
Raises:
|
|
287
|
+
BackupError: If deletion fails
|
|
288
|
+
"""
|
|
289
|
+
with self._backup_lock:
|
|
290
|
+
try:
|
|
291
|
+
if backup_id not in self._metadata:
|
|
292
|
+
return False
|
|
293
|
+
|
|
294
|
+
# Check if this backup is referenced by incremental backups
|
|
295
|
+
dependent_backups = [
|
|
296
|
+
b for b in self._metadata.values()
|
|
297
|
+
if b.base_backup_id == backup_id
|
|
298
|
+
]
|
|
299
|
+
|
|
300
|
+
if dependent_backups:
|
|
301
|
+
dependent_ids = [b.backup_id for b in dependent_backups]
|
|
302
|
+
raise BackupError(
|
|
303
|
+
f"Cannot delete backup {backup_id}: it is referenced by "
|
|
304
|
+
f"incremental backups: {', '.join(dependent_ids)}"
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
# Remove backup directory
|
|
308
|
+
backup_dir = self.backup_path / backup_id
|
|
309
|
+
if backup_dir.exists():
|
|
310
|
+
shutil.rmtree(backup_dir)
|
|
311
|
+
|
|
312
|
+
# Remove from metadata
|
|
313
|
+
del self._metadata[backup_id]
|
|
314
|
+
self._save_metadata()
|
|
315
|
+
|
|
316
|
+
return True
|
|
317
|
+
|
|
318
|
+
except Exception as e:
|
|
319
|
+
raise BackupError(f"Failed to delete backup {backup_id}: {str(e)}") from e
|
|
320
|
+
|
|
321
|
+
def cleanup_old_backups(self) -> int:
|
|
322
|
+
"""
|
|
323
|
+
Clean up old backups according to retention policy.
|
|
324
|
+
|
|
325
|
+
Returns:
|
|
326
|
+
Number of backups deleted
|
|
327
|
+
"""
|
|
328
|
+
with self._backup_lock:
|
|
329
|
+
return self._cleanup_old_backups()
|
|
330
|
+
|
|
331
|
+
def get_backup_statistics(self) -> Dict:
|
|
332
|
+
"""
|
|
333
|
+
Get backup statistics.
|
|
334
|
+
|
|
335
|
+
Returns:
|
|
336
|
+
Dictionary containing backup statistics
|
|
337
|
+
"""
|
|
338
|
+
backups = list(self._metadata.values())
|
|
339
|
+
full_backups = [b for b in backups if b.backup_type == "full"]
|
|
340
|
+
incremental_backups = [b for b in backups if b.backup_type == "incremental"]
|
|
341
|
+
|
|
342
|
+
total_size = sum(b.total_size_bytes for b in backups)
|
|
343
|
+
|
|
344
|
+
return {
|
|
345
|
+
"total_backups": len(backups),
|
|
346
|
+
"full_backups": len(full_backups),
|
|
347
|
+
"incremental_backups": len(incremental_backups),
|
|
348
|
+
"total_size_bytes": total_size,
|
|
349
|
+
"total_size_mb": round(total_size / (1024 * 1024), 2),
|
|
350
|
+
"last_full_backup": self._last_full_backup,
|
|
351
|
+
"last_incremental_backup": self._last_incremental_backup,
|
|
352
|
+
"oldest_backup": min(b.timestamp for b in backups) if backups else None,
|
|
353
|
+
"newest_backup": max(b.timestamp for b in backups) if backups else None,
|
|
354
|
+
}
|
|
355
|
+
|
|
356
|
+
def verify_backup(self, backup_id: str) -> bool:
|
|
357
|
+
"""
|
|
358
|
+
Verify the integrity of a backup.
|
|
359
|
+
|
|
360
|
+
Args:
|
|
361
|
+
backup_id: ID of the backup to verify
|
|
362
|
+
|
|
363
|
+
Returns:
|
|
364
|
+
True if backup is valid, False otherwise
|
|
365
|
+
"""
|
|
366
|
+
try:
|
|
367
|
+
if backup_id not in self._metadata:
|
|
368
|
+
return False
|
|
369
|
+
|
|
370
|
+
metadata = self._metadata[backup_id]
|
|
371
|
+
backup_dir = self.backup_path / backup_id
|
|
372
|
+
|
|
373
|
+
if not backup_dir.exists():
|
|
374
|
+
return False
|
|
375
|
+
|
|
376
|
+
# Verify checksum
|
|
377
|
+
current_checksum = self._calculate_backup_checksum(backup_dir)
|
|
378
|
+
return current_checksum == metadata.checksum
|
|
379
|
+
|
|
380
|
+
except Exception:
|
|
381
|
+
return False
|
|
382
|
+
|
|
383
|
+
def should_create_backup(self) -> Tuple[bool, str]:
|
|
384
|
+
"""
|
|
385
|
+
Check if a backup should be created based on schedule.
|
|
386
|
+
|
|
387
|
+
Returns:
|
|
388
|
+
Tuple of (should_backup, backup_type)
|
|
389
|
+
"""
|
|
390
|
+
now = datetime.now().timestamp()
|
|
391
|
+
|
|
392
|
+
# Check if full backup is needed
|
|
393
|
+
if (self._last_full_backup is None or
|
|
394
|
+
now - self._last_full_backup > self.config.backup_interval):
|
|
395
|
+
return True, "full"
|
|
396
|
+
|
|
397
|
+
# Check if incremental backup is needed
|
|
398
|
+
incremental_interval = self.config.backup_interval / 4 # 4 times more frequent
|
|
399
|
+
if (self._last_incremental_backup is None or
|
|
400
|
+
now - self._last_incremental_backup > incremental_interval):
|
|
401
|
+
return True, "incremental"
|
|
402
|
+
|
|
403
|
+
return False, ""
|
|
404
|
+
|
|
405
|
+
def _generate_backup_id(self, backup_type: str) -> str:
|
|
406
|
+
"""Generate a unique backup ID."""
|
|
407
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
408
|
+
return f"{backup_type}_{timestamp}"
|
|
409
|
+
|
|
410
|
+
def _load_metadata(self) -> Dict[str, BackupMetadata]:
|
|
411
|
+
"""Load backup metadata from file."""
|
|
412
|
+
if not self.metadata_file.exists():
|
|
413
|
+
return {}
|
|
414
|
+
|
|
415
|
+
try:
|
|
416
|
+
with open(self.metadata_file, 'r', encoding='utf-8') as f:
|
|
417
|
+
data = json.load(f)
|
|
418
|
+
|
|
419
|
+
metadata = {}
|
|
420
|
+
for backup_id, backup_data in data.items():
|
|
421
|
+
metadata[backup_id] = BackupMetadata(**backup_data)
|
|
422
|
+
|
|
423
|
+
return metadata
|
|
424
|
+
|
|
425
|
+
except Exception as e:
|
|
426
|
+
# If metadata is corrupted, start fresh but log the error
|
|
427
|
+
backup_file = self.metadata_file.with_suffix('.corrupted')
|
|
428
|
+
if self.metadata_file.exists():
|
|
429
|
+
shutil.move(str(self.metadata_file), str(backup_file))
|
|
430
|
+
return {}
|
|
431
|
+
|
|
432
|
+
def _save_metadata(self) -> None:
|
|
433
|
+
"""Save backup metadata to file."""
|
|
434
|
+
try:
|
|
435
|
+
data = {
|
|
436
|
+
backup_id: asdict(metadata)
|
|
437
|
+
for backup_id, metadata in self._metadata.items()
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
# Write to temporary file first
|
|
441
|
+
temp_file = self.metadata_file.with_suffix('.tmp')
|
|
442
|
+
with open(temp_file, 'w', encoding='utf-8') as f:
|
|
443
|
+
json.dump(data, f, indent=2, ensure_ascii=False)
|
|
444
|
+
|
|
445
|
+
# Atomic replace
|
|
446
|
+
temp_file.replace(self.metadata_file)
|
|
447
|
+
|
|
448
|
+
except Exception as e:
|
|
449
|
+
raise BackupError(f"Failed to save backup metadata: {str(e)}") from e
|
|
450
|
+
|
|
451
|
+
def _calculate_backup_checksum(self, backup_dir: Path) -> str:
|
|
452
|
+
"""Calculate checksum for entire backup directory."""
|
|
453
|
+
hasher = hashlib.sha256()
|
|
454
|
+
|
|
455
|
+
for file_path in sorted(backup_dir.rglob("*")):
|
|
456
|
+
if file_path.is_file():
|
|
457
|
+
with open(file_path, 'rb') as f:
|
|
458
|
+
while chunk := f.read(8192):
|
|
459
|
+
hasher.update(chunk)
|
|
460
|
+
hasher.update(file_path.name.encode('utf-8'))
|
|
461
|
+
|
|
462
|
+
return hasher.hexdigest()
|
|
463
|
+
|
|
464
|
+
def _find_changed_files_since(self, timestamp: float) -> List[Path]:
|
|
465
|
+
"""Find files modified since given timestamp."""
|
|
466
|
+
changed_files = []
|
|
467
|
+
|
|
468
|
+
# Check conversation files
|
|
469
|
+
conversations_dir = self.storage_path / "conversations"
|
|
470
|
+
if conversations_dir.exists():
|
|
471
|
+
for conv_file in conversations_dir.glob("*.json"):
|
|
472
|
+
if conv_file.stat().st_mtime > timestamp:
|
|
473
|
+
changed_files.append(conv_file)
|
|
474
|
+
|
|
475
|
+
# Check index files
|
|
476
|
+
index_dir = self.storage_path / "index"
|
|
477
|
+
if index_dir.exists():
|
|
478
|
+
for index_file in index_dir.glob("*.json"):
|
|
479
|
+
if index_file.stat().st_mtime > timestamp:
|
|
480
|
+
changed_files.append(index_file)
|
|
481
|
+
|
|
482
|
+
return changed_files
|
|
483
|
+
|
|
484
|
+
def _extract_conversation_ids_from_files(self, file_paths: List[str]) -> List[str]:
|
|
485
|
+
"""Extract conversation IDs from file paths."""
|
|
486
|
+
conversation_ids = []
|
|
487
|
+
for file_path in file_paths:
|
|
488
|
+
if file_path.startswith("conversations/") and file_path.endswith(".json"):
|
|
489
|
+
conv_id = Path(file_path).stem
|
|
490
|
+
conversation_ids.append(conv_id)
|
|
491
|
+
return conversation_ids
|
|
492
|
+
|
|
493
|
+
def _get_latest_full_backup_id(self) -> Optional[str]:
|
|
494
|
+
"""Get the ID of the latest full backup."""
|
|
495
|
+
full_backups = [
|
|
496
|
+
b for b in self._metadata.values()
|
|
497
|
+
if b.backup_type == "full"
|
|
498
|
+
]
|
|
499
|
+
|
|
500
|
+
if not full_backups:
|
|
501
|
+
return None
|
|
502
|
+
|
|
503
|
+
latest = max(full_backups, key=lambda x: x.timestamp)
|
|
504
|
+
return latest.backup_id
|
|
505
|
+
|
|
506
|
+
def _update_backup_timestamps(self) -> None:
|
|
507
|
+
"""Update last backup timestamps from metadata."""
|
|
508
|
+
full_backups = [
|
|
509
|
+
b for b in self._metadata.values()
|
|
510
|
+
if b.backup_type == "full"
|
|
511
|
+
]
|
|
512
|
+
incremental_backups = [
|
|
513
|
+
b for b in self._metadata.values()
|
|
514
|
+
if b.backup_type == "incremental"
|
|
515
|
+
]
|
|
516
|
+
|
|
517
|
+
if full_backups:
|
|
518
|
+
self._last_full_backup = max(b.timestamp for b in full_backups)
|
|
519
|
+
|
|
520
|
+
if incremental_backups:
|
|
521
|
+
self._last_incremental_backup = max(b.timestamp for b in incremental_backups)
|
|
522
|
+
|
|
523
|
+
def _cleanup_old_backups(self) -> int:
|
|
524
|
+
"""Clean up old backups according to retention policy."""
|
|
525
|
+
if not self.config.backup_enabled or self.config.max_backups <= 0:
|
|
526
|
+
return 0
|
|
527
|
+
|
|
528
|
+
backups = sorted(
|
|
529
|
+
self._metadata.values(),
|
|
530
|
+
key=lambda x: x.timestamp,
|
|
531
|
+
reverse=True
|
|
532
|
+
)
|
|
533
|
+
|
|
534
|
+
if len(backups) <= self.config.max_backups:
|
|
535
|
+
return 0
|
|
536
|
+
|
|
537
|
+
# Keep the most recent backups
|
|
538
|
+
backups_to_delete = backups[self.config.max_backups:]
|
|
539
|
+
deleted_count = 0
|
|
540
|
+
|
|
541
|
+
for backup in backups_to_delete:
|
|
542
|
+
try:
|
|
543
|
+
# Don't delete if it's referenced by other backups
|
|
544
|
+
dependent_backups = [
|
|
545
|
+
b for b in self._metadata.values()
|
|
546
|
+
if b.base_backup_id == backup.backup_id
|
|
547
|
+
]
|
|
548
|
+
|
|
549
|
+
if not dependent_backups:
|
|
550
|
+
backup_dir = self.backup_path / backup.backup_id
|
|
551
|
+
if backup_dir.exists():
|
|
552
|
+
shutil.rmtree(backup_dir)
|
|
553
|
+
|
|
554
|
+
del self._metadata[backup.backup_id]
|
|
555
|
+
deleted_count += 1
|
|
556
|
+
|
|
557
|
+
except Exception:
|
|
558
|
+
# Continue with other backups if one fails
|
|
559
|
+
continue
|
|
560
|
+
|
|
561
|
+
if deleted_count > 0:
|
|
562
|
+
self._save_metadata()
|
|
563
|
+
|
|
564
|
+
return deleted_count
|