lsync 1.2.5 → 2.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,22 +0,0 @@
1
-
2
- class Hash
3
- def keys_matching(p, &block)
4
- s = {}
5
-
6
- self.each do |k,v|
7
- next unless k.match(p)
8
-
9
- v = yield(v, k) if block_given?
10
-
11
- s[k] = v
12
- end
13
-
14
- return s
15
- end
16
-
17
- def collect_values
18
- each do |k,v|
19
- self[k] = yield v
20
- end
21
- end
22
- end
data/lib/lsync/lb.py DELETED
@@ -1,1304 +0,0 @@
1
- #!/usr/bin/env python
2
- # -*- Mode: Python; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
3
-
4
- """Link-Backup
5
- Copyright (c) 2004 Scott Ludwig
6
- http://www.scottlu.com
7
-
8
- Link-Backup is a backup utility that creates hard links between a series
9
- of backed-up trees, and intelligently handles renames, moves, and
10
- duplicate files without additional storage or transfer.
11
-
12
- Transfer occurs over standard i/o locally or remotely between a client and
13
- server instance of this script. Remote backups rely on the secure remote
14
- shell program ssh.
15
-
16
- viewlb.cgi, a simple web based viewer of backups made by link-backup, is
17
- also available from the link-backup page.
18
-
19
- http://www.scottlu.com/Content/Link-Backup.html
20
-
21
- Usage:
22
-
23
- lb [options] srcdir dstdir
24
- lb [options] user@host:srcdir dstdir
25
- lb [options] srcdir user@host:dstdir
26
-
27
- Source or dest can be remote. Backups are dated with the following entries:
28
-
29
- dstdir/YYYY.MM.DD-HH.MM:SS/tree/ backed up file tree
30
- dstdir/YYYY.MM.DD-HH.MM:SS/log logfile
31
-
32
- Options:
33
-
34
- --verify Run rsync with --dry-run to cross-verify
35
- --numeric-ids Keep uid/gid values instead of mapping; requires root
36
- --minutes <mins> Only run for <mins> minutes. Incremental backup.
37
- --showfiles Don't backup, only list relative path files needing
38
- backup
39
- --catalogonly Update catalog only
40
- --filelist <- or file> Specify filelist. Files relative to srcdir.
41
- --lock Ensure only one backup to a given dest will run at a time
42
- --verbose Show what is happening
43
-
44
- Comments:
45
-
46
- Link-Backup tracks unique file instances in a tree and creates a backup that
47
- while identical in structure, ensures that no file is duplicated unnecessarily.
48
- Files that are moved, renamed, or duplicated won't cause additional storage or
49
- transfer. dstdir/.catalog is a catalog of all unique file instances; backup
50
- trees hard-link to the catalog. If a backup tree would be identical to the
51
- previous backup tree, it won't be needlessly created.
52
-
53
- How it works:
54
-
55
- The src sends a file list to the dst. First dst updates the catalog by checking
56
- to see if it knows about each file. If not, the file is retrieved from the src
57
- and a new catalog entry is made:
58
-
59
- For each file:
60
- 1. Check to see if the file path + file stat is present in the last tree.
61
- 2. If not, ask for md5sum from the src. See if md5sum+stat is in the
62
- catalog.
63
- 3. If not, see if md5sum only is in the catalog. If so copy catalog entry,
64
- rename with md5sum+new stat
65
- 4. If not, request file from src, make new catalog entry.
66
-
67
- Catalog files are named by md5sum+stats and stored in flat directories. Once
68
- complete, a tree is created that mirrors the src by hardlinking to the catalog.
69
-
70
- Example 1:
71
-
72
- python lb.py pictures pictures-backup
73
-
74
- Makes a new backup of pictures in pictures-backup.
75
-
76
- Example 2:
77
-
78
- python lb.py pictures me@fluffy:~/pictures-backup
79
-
80
- Backs up on remote machine fluffy instead of locally.
81
-
82
- Example 3:
83
-
84
- python lb.py --minutes 240 pictures me@remote:~/pictures-backup
85
-
86
- Same as above except for 240 minutes only. This is useful if backing up over
87
- the internet only during specific times (at night for example). Does what it
88
- can in 240 minutes. If the catalog update completes, a tree is created
89
- hardlinked to the catalog.
90
-
91
- Example 4:
92
- python lb.py --showfiles pictures pictures-backup | \
93
- python lb.py --filelist - pictures pictures-backup
94
-
95
- Same as example #1.
96
-
97
- Example 5:
98
-
99
- 1)
100
- python lb.py --showfiles pictures me@remote:~/pictures-backup | \
101
- python lb.py --filelist - pictures me@laptop:~/pictures-transfer
102
-
103
- 2)
104
- python lb.py --catalogonly pictures-transfer me@remote:~/pictures-backup
105
-
106
- 3)
107
- python lb.py pictures me@remote:~/pictures-backup
108
-
109
- If the difference between pictures and pictures-backup (for example) is too
110
- large for internet backup, the steps above can be used. Step 1 transfers only
111
- the differences to a laptop. Step 2 is at the location of machine "remote" and
112
- is initiated from the laptop to machine "remote". Step 3 is back at the source
113
- and will do a backup and notice all the files are present in the remote catalog,
114
- and will build the tree.
115
-
116
- Note the source in step 2 could be more perfectly specified as the backup tree
117
- created underneath the pictures-transfer directory, although it is not necessary
118
- since only the catalog is being updated (however it would be a speedup).
119
-
120
- History:
121
-
122
- v 0.83 17/Apr/2009 Samel Williams http://www.oriontransfer.co.nz/
123
- - Collaboration with Scott to fix a bug that caused a crash
124
- when a file changed (stat -> fstat)
125
-
126
- v 0.82 20/Oct/2008 Samuel Williams http://www.oriontransfer.co.nz/
127
- - Removed --ssh-(x) options in favor of rsync style -e '...' style,
128
- this makes the command compatible with rsync style syntax.
129
-
130
- v 0.81 6/Sep/2008 Samuel Williams http://www.oriontransfer.co.nz/
131
- - Added mode-line and #! line
132
- - Fixed parsing of command line arguments that contain spaces to match rsync
133
- (shlex parsing)
134
- - Fixed escaping of ssh strings so that they get passed correctly
135
-
136
- v 0.8 12/23/2006 scottlu
137
- - allow backups to occur while files are changing
138
- - minor --verify command bug
139
- - added --verbose logging to tree building
140
-
141
- v 0.7 09/02/2006 scottlu
142
- - Ignore pipe, socket, and device file types
143
- - Added --ssh-i to select ssh id file to use (see ssh -i) (Damien Mascord)
144
- - Added --ssh-C to perform ssh compression (see ssh -C) (David Precious)
145
- - Added --ssh-p to specify remote port (see ssh -p) (David Precious)
146
-
147
- v 0.6 06/17/2006 scottlu
148
- - Ignore broken symlinks and other failed stats during filelist creation
149
- (David Precious)
150
- - Added --lock, which ensures only one backup to a given dest can occur
151
- at a time (Joe Beda)
152
-
153
- v 0.5 04/15/2006 scottlu
154
- - Added 'latest' link from Joe Beda http://eightypercent.net (thanks Joe!)
155
- - Fixed --verify. It wasn't specifying the remote machine (I rarely use
156
- verify but sometimes it is nice to sanity check backups)
157
-
158
- v 0.4 11/14/2004 scottlu
159
- - Changed a central catalog design with trees hardlinking to the catalog.
160
- This way catalog updating can be incremental.
161
- - Removed filemaps - not required any longer
162
- - Add catalog logging as well as backup logging.
163
- - Added incremental backup feature --minutes <minutes>
164
- - Make md5hash calculation incremental so a timeout doesn't waste time
165
- - Created 0.3-0.4.py for 0.3 to 0.4 upgrading
166
- - Added --showfiles, shows differences between src and dst
167
- - Added --catalogonly, updates catalog only, doesn't create tree
168
- - Added --filelist, specifies file list to use instead of tree
169
- - Removed --rmempty
170
- - Added --verbose
171
-
172
- v 0.3 9/10/2004 scottlu
173
- - Added backup stat query methods
174
- - Changed log file format
175
- - Added viewlb.cgi, a web interface for viewing backups
176
- - added gzip compression of filemap
177
- - added --numeric-ids
178
-
179
- v 0.2 8/28/2004 scottlu
180
- - filemap format change
181
- - added --rmempty
182
- - added --verify to run rsync in verify mode
183
- - added uid/gid mapping by default unless --numeric-ids is specified
184
-
185
- v 0.1 8/19/2004 scottlu
186
- - Fully working backup, hardlinking between trees
187
-
188
- License:
189
-
190
- Permission is hereby granted, free of charge, to any person obtaining a copy
191
- of this software and associated documentation files (the "Software"), to deal
192
- in the Software without restriction, including without limitation the rights
193
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
194
- copies of the Software, and to permit persons to whom the Software is
195
- furnished to do so, subject to the following conditions:
196
-
197
- The above copyright notice and this permission notice shall be included in all
198
- copies or substantial portions of the Software.
199
-
200
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
201
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
202
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
203
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
204
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
205
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
206
- SOFTWARE.
207
- """
208
-
209
- import os
210
- import sys
211
- import cPickle
212
- from os.path import join
213
- import time
214
- import stat
215
- import md5
216
- import shutil
217
- import tempfile
218
- import struct
219
- import re
220
- import glob
221
- import fcntl
222
- import shlex
223
-
224
- fd_send = None
225
- fd_recv = None
226
- pickler = None
227
- unpickler = None
228
- date_format = '%Y.%m.%d-%H.%M.%S'
229
-
230
- MODE = 0
231
- SIZE = 1
232
- MTIME = 2
233
- UID = 3
234
- GID = 4
235
- CHMOD_BITS = int('6777', 8)
236
-
237
- def send_object(object):
238
- global pickler, fd_send
239
- pickler.dump(object)
240
- fd_send.flush()
241
-
242
- def recv_object():
243
- global unpickler
244
- return unpickler.load()
245
-
246
- def init_io(send, recv):
247
- global fd_send, fd_recv, pickler, unpickler
248
- fd_send = send
249
- fd_recv = recv
250
- pickler = cPickle.Pickler(fd_send, 1)
251
- unpickler = cPickle.Unpickler(fd_recv)
252
-
253
- def verbose_log(s):
254
- if have_option('--verbose'):
255
- sys.stderr.write('%s\n' % s)
256
-
257
- class Log:
258
- def __init__(self, logfile, mode):
259
- self.mode = mode
260
- try:
261
- self.logfile = file(os.path.abspath(logfile), self.mode)
262
- except:
263
- self.logfile = None
264
- self.re = re.compile(r'^(?P<time>....\...\...\-..\...\...)\: (?P<message>.*)$')
265
-
266
- def __del__(self):
267
- if self.logfile:
268
- self.logfile.close()
269
-
270
- def write(self, message):
271
- if not self.logfile or self.mode == 'rt':
272
- return
273
-
274
- try:
275
- strtime = time.strftime(date_format, time.localtime())
276
- self.logfile.write('%s: %s\n' % (strtime, message))
277
- self.logfile.flush()
278
- except:
279
- pass
280
-
281
- def nextline(self):
282
- if not self.logfile or self.mode == 'at':
283
- return
284
-
285
- line = self.logfile.readline()
286
- if len(line) == 0:
287
- return None
288
- m = self.re.match(line)
289
- return (time.strptime(m.group('time'), date_format), m.group('message'))
290
-
291
- class Catalog:
292
- """Central store for files of different hash/stat combinations
293
- Backup trees hard link to the catalog. The catalog can be updated
294
- incrementally. A backup tree is not created until the catalog is
295
- up to date.
296
- """
297
- def __init__(self, path):
298
- self.path = os.path.abspath(path)
299
- self.lenbase = len('%s%s' % (self.path, os.sep))
300
- self.logpath = join(self.path, 'log')
301
- if not os.path.exists(self.path):
302
- os.mkdir(self.path)
303
- os.mkdir(self.logpath)
304
- for n in xrange(256):
305
- os.mkdir(join(self.path, '%03d' % n))
306
-
307
- def get_logfiles(self):
308
- list = []
309
- for item in os.listdir(self.logpath):
310
- s = os.stat(join(self.logpath, item))
311
- if stat.S_ISDIR(s.st_mode):
312
- continue
313
- try:
314
- datestr = item.rstrip('.log')
315
- time.strptime(datestr, date_format)
316
- list.append(datestr)
317
- except:
318
- pass
319
- list.sort()
320
- return [(time.strptime(datestr, date_format), join(self.logpath, '%s.log' % datestr)) for datestr in list]
321
-
322
- def parse_log(self, logpath_abs):
323
- log = Log(logpath_abs, 'rt')
324
- parse = []
325
- while True:
326
- line = log.nextline()
327
- if line == None:
328
- break
329
- elif line[1].startswith('+++'):
330
- continue
331
- elif line[1].startswith('copy from: '):
332
- tT = line[0]
333
- fromT = line[1][11:]
334
- forT = ''
335
- line = log.nextline()
336
- if line[1].startswith('copy for: '):
337
- forT = line[1][10:]
338
- parse.append(('copy', tT, fromT, forT))
339
- elif line[1].startswith('new from: '):
340
- tT = line[0]
341
- fromT = line[1][10:]
342
- forT = ''
343
- line = log.nextline()
344
- if line[1].startswith('new for: '):
345
- forT = line[1][9:]
346
- parse.append(('new', tT, fromT, forT))
347
- return parse
348
-
349
- def file_from_hash(self, md5):
350
- subdir = join(self.path, '%03d' % (hash(md5) & 255))
351
- files = glob.glob1(subdir, '%s*' % md5)
352
- if len(files) > 0:
353
- return join(subdir, files[0])
354
- return None
355
-
356
- def file_from_hashstat(self, md5, s):
357
- filepath_abs = self.getfilepath(md5, s)
358
- if os.path.exists(filepath_abs):
359
- return filepath_abs
360
- return None
361
-
362
- def getfilepath(self, md5, s):
363
- mdate = time.strftime(date_format, time.localtime(s[MTIME]))
364
- fn = '%s-%s-%08x-%05x-%04d-%04d' % (md5, mdate, s[SIZE], s[MODE] & CHMOD_BITS, s[UID], s[GID])
365
- return join(join(self.path, '%03d' % (hash(md5) & 255)), fn)
366
-
367
- def update(self, filelist, treepath_last, end_time):
368
- # This is the slow (and expensive!) bandwidth eating portion
369
- # of link-backup. If --minutes is specified, don't go beyond
370
- # the minutes specified.
371
-
372
- # For each file see if exists in the catalog; if not copy it
373
- # if the md5 exists or download it
374
-
375
- datestr = time.strftime(date_format, time.localtime())
376
- log = Log(join(self.logpath, '%s.log' % datestr), 'wt')
377
- dl_seconds = 0
378
- dl_size = 0
379
- md5hashes = [None for n in xrange(len(filelist))]
380
- log.write('+++begin+++')
381
- for n in xrange(len(filelist)):
382
- # Only files
383
-
384
- filepath_rel, s = filelist[n]
385
- if stat.S_ISDIR(s[MODE]):
386
- continue
387
-
388
- # If stat equal we don't need a hash for this file
389
-
390
- if treepath_last and is_stat_equal(join(treepath_last, filepath_rel), s):
391
- verbose_log('dst: found file %s' % filelist[n][0])
392
- continue
393
-
394
- # Get the md5hash for this file
395
-
396
- verbose_log('dst: request hash for %s' % filelist[n][0])
397
- send_object(n)
398
- md5hashes[n] = recv_object()
399
- if not md5hashes[n]:
400
- verbose_log('dst: did not receive hash?')
401
- send_object(False)
402
- continue
403
-
404
- # File already present? Skip.
405
- if self.file_from_hashstat(md5hashes[n], s):
406
- verbose_log('dst: file present already %s' % filelist[n][0])
407
- send_object(False)
408
- continue
409
-
410
- # File not present. Copy locally or from the source
411
- fd, tmpfilepath_abs = tempfile.mkstemp(dir=self.path)
412
- filepath_abs = self.getfilepath(md5hashes[n], s)
413
- try:
414
- copyfile_abs = self.file_from_hash(md5hashes[n])
415
- if copyfile_abs:
416
- # Found same file with different stats. Requires a copy
417
- verbose_log('dst: using file with same hash %s' % filelist[n][0])
418
- send_object(False)
419
- shutil.copyfile(copyfile_abs, tmpfilepath_abs)
420
- log.write('copy from: %s' % filepath_abs[self.lenbase:])
421
- log.write('copy for: %s' % filepath_rel)
422
- else:
423
- # Enough time for this file?
424
- if end_time != 0 and dl_seconds != 0:
425
- est_seconds = s[SIZE] / (dl_size / dl_seconds)
426
- if time.time() + est_seconds >= end_time:
427
- verbose_log('dst: timeout')
428
- send_object(False)
429
- raise
430
-
431
- # Time downloads to understand average download rate and use as
432
- # an estimator of a given file's download time
433
- verbose_log('dst: requesting file %s' % filelist[n][0])
434
- dl_time_start = time.time()
435
-
436
- # Copy from source
437
- # The chunks are sized independent from stats for robustness
438
- # Stat is resent to have most up to date copy
439
- # Recalc the md5 hash along the way so it is right
440
- send_object(True)
441
- m = md5.new()
442
- while True:
443
- readcount = struct.unpack('!i', fd_recv.read(4))[0]
444
- if readcount == 0:
445
- break
446
- if readcount < 0:
447
- raise 'Error reading file'
448
- bytes = fd_recv.read(readcount)
449
- m.update(bytes)
450
- os.write(fd, bytes)
451
-
452
- # Delta accumulator
453
- dl_seconds += time.time() - dl_time_start
454
- os.fsync(fd)
455
- dl_size += os.fstat(fd).st_size
456
-
457
- # File might of changed during the update
458
- # Update has and size and check to see if it already
459
- # exists in the catalog
460
- if md5hashes[n] != m.hexdigest():
461
- verbose_log('dst: file changed during copy %s' % filelist[n][0])
462
- md5hashes[n] = m.hexdigest()
463
- s[SIZE] = os.fstat(fd).st_size
464
- filelist[n] = (filepath_rel, s)
465
- if self.file_from_hashstat(md5hashes[n], s):
466
- verbose_log('dst: file already in catalog %s' % filelist[n][0])
467
- os.close(fd)
468
- os.remove(tempfilepath_abs)
469
- continue
470
-
471
- log.write('new from: %s' % filepath_abs[self.lenbase:])
472
- log.write('new for: %s' % filepath_rel)
473
-
474
- except:
475
- os.close(fd)
476
- os.remove(tmpfilepath_abs)
477
- send_object(-1)
478
- log.write('+++end+++')
479
-
480
- return False, dl_size, md5hashes
481
-
482
- # Rename and set file stats
483
-
484
- os.close(fd)
485
- os.utime(tmpfilepath_abs, (s[MTIME], s[MTIME]))
486
- os.chown(tmpfilepath_abs, s[UID], s[GID])
487
- os.rename(tmpfilepath_abs, filepath_abs)
488
- os.chmod(filepath_abs, s[MODE] & CHMOD_BITS)
489
-
490
- # Done with file requests
491
-
492
- verbose_log('dst: catalog update done')
493
- send_object(-1)
494
- log.write('+++end+++')
495
- return True, dl_size, md5hashes
496
-
497
- def get_showfiles(self, filelist, treepath_last):
498
-
499
- # Get hashes for new files. If file doesn't exist in old backup with same
500
- # stat, we need ask the client for a hash
501
-
502
- md5requests = []
503
- for n in xrange(len(filelist)):
504
- # Only files
505
-
506
- filepath_rel, s = filelist[n]
507
- if stat.S_ISDIR(s[MODE]):
508
- continue
509
-
510
- # If stat equal we don't need a hash for this file
511
-
512
- if treepath_last and is_stat_equal(join(treepath_last, filepath_rel), s):
513
- continue
514
-
515
- # Need hash for this file
516
-
517
- md5requests.append(n)
518
-
519
- # Retrieve hashes
520
-
521
- send_object(md5requests)
522
- md5hashes = recv_object()
523
- if len(md5hashes) != len(md5requests):
524
- raise AssertionError, 'Hash count mismatch'
525
-
526
- # Make one sorted list to eliminate duplicates
527
- # Check if already present in catalog
528
-
529
- md5sort = [(md5requests[n], md5hashes[n]) for n in xrange(len(md5hashes)) if not self.file_from_hash(md5hashes[n])]
530
- def sortme(a, b):
531
- if a[1] == b[1]:
532
- return 0
533
- if a[1] > b[1]:
534
- return 1
535
- return -1
536
- md5sort.sort(sortme)
537
-
538
- # Eliminate duplicates and return
539
-
540
- showfiles = []
541
- md5 = None
542
- for n in xrange(len(md5sort)):
543
- if md5 == md5sort[n][1]:
544
- continue
545
- md5 = md5sort[n][1]
546
- showfiles.append(md5sort[n][0])
547
- return showfiles
548
-
549
- # Backup
550
-
551
- class Backup:
552
- """Represents a dated backup.
553
- """
554
- def __init__(self, path):
555
- self.path = os.path.abspath(path)
556
- self.logpath_abs = join(self.path, 'log')
557
- self.treepath = join(self.path, 'tree')
558
- if not os.path.exists(self.treepath):
559
- os.mkdir(self.treepath)
560
-
561
- def parse_log(self):
562
- log = Log(self.logpath_abs, 'rt')
563
- parse = []
564
- while True:
565
- line = log.nextline()
566
- if line == None:
567
- break
568
- if line[1] == '+++end+++' or line[1] == '+++begin+++':
569
- continue
570
- if line[1].startswith('new: '):
571
- parse.append(('new', line[1][5:]))
572
- elif line[1].startswith('copy: '):
573
- parse.append(('copy', line[1][6:]))
574
- elif line[1].startswith('link: '):
575
- parse.append(('link', line[1][6:]))
576
-
577
- return parse
578
-
579
- def get_date(self):
580
- return time.strptime(self.get_dirname(), date_format)
581
-
582
- def get_dirname(self):
583
- return os.path.basename(self.path)
584
-
585
- def get_treepath(self):
586
- return self.treepath
587
-
588
- def get_files_since(self, backup_last, catalog):
589
- # Get files added to the catalog since last tree was built
590
-
591
- tlast = 0
592
- if backup_last:
593
- tlast = time.mktime(backup_last.get_date())
594
- filessince = {}
595
- for tm, logfile_abs in catalog.get_logfiles():
596
- if time.mktime(tm) < tlast:
597
- continue
598
- for item in catalog.parse_log(logfile_abs):
599
- filessince[item[3]] = item[0]
600
- return filessince
601
-
602
- def build_tree(self, backup_last, filelist, md5hashes, catalog):
603
- """All files are present and can be found either in the
604
- previous backup or the catalog. Just build the structure.
605
- """
606
-
607
- treepath_last = None
608
- if backup_last:
609
- treepath_last = backup_last.get_treepath()
610
- filessince = self.get_files_since(backup_last, catalog)
611
- log = Log(self.logpath_abs, 'at')
612
- log.write('+++begin+++')
613
- verbose_log('dst: creating tree %s' % self.treepath)
614
-
615
- # Create directories (they are in depth last order)
616
- # Set permissions later
617
- verbose_log('dst: making directories...')
618
- for filepath_rel, s in filelist:
619
- if stat.S_ISDIR(s[MODE]):
620
- verbose_log('dst: making dir %s' % filepath_rel)
621
- dirpath_abs = join(self.treepath, filepath_rel)
622
- os.mkdir(dirpath_abs)
623
-
624
- # Link in files
625
- verbose_log('dst: linking files...')
626
- for n in xrange(len(filelist)):
627
-
628
- # Skip dirs
629
- filepath_rel, s = filelist[n]
630
- if stat.S_ISDIR(s[MODE]):
631
- continue
632
- verbose_log('dst: inspecting file %s' % filepath_rel)
633
-
634
- # If there is no hash, it's in the last backup, otherwise it's
635
- # in the catalog
636
- if not md5hashes[n]:
637
- verbose_log('dst: found in last backup: %s' % filepath_rel)
638
- linkpath_abs = join(treepath_last, filepath_rel)
639
- else:
640
- verbose_log('dst: found in catalog: %s' % filepath_rel)
641
- linkpath_abs = catalog.file_from_hashstat(md5hashes[n], s)
642
-
643
- # Only log files new to the catalog since last tree. This
644
- # ensures file renames, dups, moves etc don't show up as new
645
- # in the tree log
646
- if filessince.has_key(filepath_rel):
647
- log.write('%s: %s' % (filessince[filepath_rel], filepath_rel))
648
- else:
649
- log.write('link: %s' % filepath_rel)
650
-
651
- # Hard-link the file
652
- verbose_log('dst: hardlinking %s to %s' % (join(self.treepath, filepath_rel), linkpath_abs))
653
- os.link(linkpath_abs, join(self.treepath, filepath_rel))
654
-
655
- # Set permissions for directories depth-first.
656
- verbose_log('dst: setting permissions on directories...')
657
- for n in xrange(len(filelist) - 1, -1, -1):
658
- dirpath_rel, s = filelist[n]
659
- if stat.S_ISDIR(s[MODE]):
660
- verbose_log('dst: setting permissions on: %s' % dirpath_rel)
661
- dirpath_abs = join(self.treepath, dirpath_rel)
662
- os.utime(dirpath_abs, (s[MTIME], s[MTIME]))
663
- os.chown(dirpath_abs, s[UID], s[GID])
664
- os.chmod(dirpath_abs, s[MODE] & CHMOD_BITS)
665
-
666
- verbose_log('dst: done creating tree %s' % self.treepath)
667
- log.write('+++end+++')
668
-
669
- # Manager
670
-
671
- class Manager:
672
- """Manages Backup instances
673
- """
674
- def __init__(self, path):
675
- self.path = os.path.abspath(path)
676
- if not os.path.exists(self.path):
677
- os.mkdir(self.path)
678
- self.catalog = Catalog(join(self.path, '.catalog'))
679
-
680
- def get_path(self):
681
- return self.path
682
-
683
- def new_backup(self):
684
- dirpath = join(self.path, time.strftime(date_format, time.localtime()))
685
- os.mkdir(dirpath)
686
- return Backup(dirpath)
687
-
688
- def delete_backup(self, backup):
689
- dirpath_abs = join(self.path, backup.get_dirname())
690
- if os.path.exists(dirpath_abs):
691
- for root, dirs, files in os.walk(dirpath_abs, topdown=False):
692
- for name in files:
693
- os.remove(join(root, name))
694
- for name in dirs:
695
- os.rmdir(join(root, name))
696
- os.rmdir(dirpath_abs)
697
-
698
- def get_backup(self, backup):
699
- return Backup(join(self.path, backup))
700
-
701
- def get_backups(self):
702
- list = []
703
- for item in os.listdir(self.path):
704
- s = os.stat(join(self.path, item))
705
- if not stat.S_ISDIR(s.st_mode):
706
- continue
707
- try:
708
- time.strptime(item, date_format)
709
- list.append(item)
710
- except:
711
- pass
712
- list.sort()
713
- return [Backup(join(self.path, item)) for item in list]
714
-
715
- # Helpers
716
-
717
- def dump_arg(x):
718
- s = '"'
719
- for c in x:
720
- if c in '\\$"`':
721
- s = s + '\\'
722
- s = s + c
723
- s = s + '"'
724
- return s
725
-
726
- def start_server(src, dst, is_source):
727
- # Command line for server
728
-
729
- cmd1 = "python -c 'import sys;import cPickle;exec(cPickle.Unpickler(sys.stdin).load())' --server"
730
- if is_source:
731
- cmd1 = "%s --source" % cmd1
732
- for arg in sys.argv[1:-2]:
733
- cmd1 = '%s %s' % (cmd1, arg)
734
- cmd1 = "%s %s %s" % (cmd1, dump_arg(src['string']), dump_arg(dst['string']))
735
-
736
- # Remote?
737
-
738
- addr = dst
739
- if is_source:
740
- addr = src
741
-
742
- # Add ssh and args if remote
743
- if addr['remote']:
744
- ssh_args = '%s %s' % (addr['remote'], dump_arg(cmd1))
745
- if have_option('-e'):
746
- cmd2 = '%s %s' % (get_option_value('-e'), ssh_args)
747
- else:
748
- cmd2 = 'ssh %s' % ssh_args
749
- else:
750
- cmd2 = cmd1
751
-
752
- # Start and pass this code
753
- verbose_log('command: %s' % cmd2)
754
- fdin, fdout = os.popen2(cmd2, mode='b')
755
- init_io(fdin, fdout)
756
- f = open(sys.argv[0])
757
- send_object(f.read())
758
- f.close()
759
-
760
- def is_mode_ok(mode):
761
- if stat.S_ISBLK(mode):
762
- return False
763
- if stat.S_ISCHR(mode):
764
- return False
765
- if stat.S_ISFIFO(mode):
766
- return False
767
- if stat.S_ISSOCK(mode):
768
- return False
769
- return True
770
-
771
- def build_filelist_from_tree(treepath):
772
- class ListBuilder:
773
- def __init__(self, basepath):
774
- self.lenbase = len('%s%s' % (basepath, os.sep))
775
-
776
- def callback(self, arg, dirpath, filelist):
777
- for file in filelist:
778
- # Sometimes a stat may fail, like if there are broken
779
- # symlinks in the file system
780
- try:
781
- # Collect stat values instead of stat objects. It's 6
782
- # times smaller (measured) and mutuable
783
- # (for uid/gid mapping at the dest)
784
- filepath = join(dirpath, file)
785
- s = os.stat(filepath)
786
- if not is_mode_ok(s.st_mode):
787
- continue
788
- arg.append((filepath[self.lenbase:], [s.st_mode, s.st_size, s.st_mtime, s.st_uid, s.st_gid]))
789
- except:
790
- pass
791
-
792
- treepath_abs = os.path.abspath(treepath)
793
- filelist = []
794
- os.path.walk(treepath_abs, ListBuilder(treepath_abs).callback, filelist)
795
- return filelist
796
-
797
- def build_filelist_from_file(treepath, file):
798
- filelist = []
799
- for line in file.readlines():
800
- filepath_rel = line.rstrip('\n')
801
- s = os.stat(join(treepath, filepath_rel))
802
- if not is_mode_ok(s.st_mode):
803
- continue
804
- filelist.append((filepath_rel, [s.st_mode, s.st_size, s.st_mtime, s.st_uid, s.st_gid]))
805
- return filelist
806
-
807
- def build_filelist(treepath):
808
- verbose_log('building filelist...')
809
- for n in xrange(len(sys.argv)):
810
- if sys.argv[n] == '--filelist':
811
- if sys.argv[n + 1] == '-':
812
- return build_filelist_from_file(treepath, sys.stdin)
813
- else:
814
- file = open(sys.argv[n + 1])
815
- filelist = build_filelist_from_file(treepath, file)
816
- file.close()
817
- return filelist
818
- return build_filelist_from_tree(treepath)
819
-
820
- def build_uidgidmap(filelist):
821
- """Build a map of uid's to names and gid's to names
822
- so mapping can occur at the destination
823
- """
824
- import pwd
825
- import grp
826
- uidname_map = {}
827
- gidname_map = {}
828
- for filepath_rel, s in filelist:
829
- if not uidname_map.has_key(s[UID]):
830
- try:
831
- uidname_map[s[UID]] = pwd.getpwuid(s[UID])[0]
832
- except:
833
- uidname_map[s[UID]] = str(s[UID])
834
- if not gidname_map.has_key(s[GID]):
835
- try:
836
- gidname_map[s[GID]] = grp.getgrgid(s[GID])[0]
837
- except:
838
- gidname_map[s[GID]] = str(s[GID])
839
- return uidname_map, gidname_map
840
-
841
- def map_uidgid(filelist, idname_map):
842
- """Fix up uid / gid to dest values
843
- """
844
-
845
- # If root and --numeric-ids specified, keep the numeric
846
- # ids
847
-
848
- if os.getuid() == 0 and have_option('--numeric-ids'):
849
- return
850
-
851
- # First build a uid->uid map. If not root, valid
852
- # uid mapping is only current user. If root, attempt
853
- # to map uid, if that fails keep the uid.
854
-
855
- import pwd
856
- import grp
857
-
858
- uid_user = os.getuid()
859
- uidname_map = idname_map[0]
860
- uiduid_map = {}
861
- for uid_source in uidname_map.keys():
862
- if uid_user == 0:
863
- try:
864
- uid_dest = pwd.getpwnam(uidname_map[uid_source])[2]
865
- uiduid_map[uid_source] = uid_dest
866
- except:
867
- uiduid_map[uid_source] = uid_source
868
- else:
869
- uiduid_map[uid_source] = uid_user
870
-
871
- # Build gid->gid map. If not root, valid gid mapping is any group
872
- # this user is a part of. First build a list of valid name->gids
873
- # mappings
874
-
875
- gid_user = os.getgid()
876
- gid_name = grp.getgrgid(gid_user)[0]
877
- namegid_map = {}
878
- for group in grp.getgrall():
879
- if uid_user == 0 or gid_name in group[3]:
880
- namegid_map[group[0]] = group[2]
881
-
882
- # Now build a gid map to valid gids for this user
883
-
884
- gidname_map = idname_map[1]
885
- gidgid_map = {}
886
- for gid_source in gidname_map.keys():
887
- gid_sourcename = gidname_map[gid_source]
888
- if namegid_map.has_key(gid_sourcename):
889
- gidgid_map[gid_source] = namegid_map[gid_sourcename]
890
- else:
891
- gidgid_map[gid_source] = gid_user
892
-
893
- # Now map filelist entries
894
-
895
- for filepath_rel, s in filelist:
896
- # Continue if nothing to do. Unlikely in the mapping case
897
-
898
- if uiduid_map[s[UID]] == s[UID] and gidgid_map[s[GID]] == s[GID]:
899
- continue
900
-
901
- # Map entries
902
-
903
- s[UID] = uiduid_map[s[UID]]
904
- s[GID] = gidgid_map[s[GID]]
905
-
906
- def serve_files(treepath, filelist):
907
- """Serve requested files.
908
- """
909
- global fd_recv
910
-
911
- while True:
912
- # Which file?
913
-
914
- n = recv_object()
915
- if n == -1:
916
- break
917
-
918
- # Calc hash and return it
919
-
920
- verbose_log('src: calc hash for %s' % filelist[n][0])
921
- filepath_rel, s = filelist[n]
922
- filepath_abs = join(treepath, filepath_rel)
923
- try:
924
- f = open(filepath_abs)
925
- m = md5.new()
926
- while True:
927
- bytes = f.read(1024 * 1024)
928
- if len(bytes) == 0:
929
- break
930
- m.update(bytes)
931
- f.close()
932
- send_object(m.hexdigest())
933
- except:
934
- verbose_log('src: error calcing hash for %s' % filelist[n][0])
935
- send_object(None)
936
-
937
- # False means don't need the file
938
-
939
- if not recv_object():
940
- verbose_log('src: skipping file %s' % filelist[n][0])
941
- continue
942
-
943
- # Send size with data chunks in case the file is changing
944
- # while this occurs
945
-
946
- verbose_log('src: sending file %s' % filelist[n][0])
947
- try:
948
- f = open(filepath_abs)
949
- while True:
950
- bytes = f.read(1024 * 1024)
951
- fd_send.write(struct.pack('!i', len(bytes)))
952
- if len(bytes) == 0:
953
- break
954
- fd_send.write(bytes)
955
- fd_send.flush()
956
- f.close()
957
- except:
958
- verbose_log('src: error sending file %s' % filelist[n][0])
959
- fd_send.write(struct.pack('!i', -1))
960
-
961
- verbose_log('src: send complete %s' % filelist[n][0])
962
-
963
- def serve_hashes(treepath, filelist):
964
- """Serve requested hashes
965
- """
966
- hashrequests = recv_object()
967
- hashlist = []
968
- for n in xrange(len(hashrequests)):
969
- filepath_rel, s = filelist[hashrequests[n]]
970
- filepath_abs = join(treepath, filepath_rel)
971
- f = open(filepath_abs)
972
- m = md5.new()
973
- while True:
974
- bytes = f.read(1024 * 1024)
975
- if len(bytes) == 0:
976
- break
977
- m.update(bytes)
978
- f.close()
979
- hashlist.append(m.hexdigest())
980
- send_object(hashlist)
981
-
982
- def is_stat_equal(filepath_abs, s):
983
- try:
984
- s2 = os.stat(filepath_abs)
985
- if (s[MODE] & CHMOD_BITS) == (s2.st_mode & CHMOD_BITS) and s[SIZE] == s2.st_size and s[MTIME] == s2.st_mtime and s[UID] == s2.st_uid and s[GID] == s2.st_gid:
986
- return True
987
- except:
988
- pass
989
- return False
990
-
991
- def is_tree_equal(filelist, treepath_last):
992
- verbose_log('checking for need to build tree...')
993
- if not treepath_last:
994
- verbose_log('tree not equal: no last tree!')
995
- return False
996
- filelist_old = build_filelist_from_tree(treepath_last)
997
- if len(filelist) != len(filelist_old):
998
- verbose_log('tree not equal: filelists different sizes!')
999
- return False
1000
- dict_new = dict(filelist)
1001
- dict_old = dict(filelist_old)
1002
- for key in dict_new.keys():
1003
- different = False
1004
- if not dict_old.has_key(key):
1005
- different = True
1006
- else:
1007
- s_old = dict_old[key]
1008
- s_new = dict_new[key]
1009
- different = False
1010
- if stat.S_ISDIR(s_old[MODE]):
1011
- if s_old[MODE] != s_new[MODE] or s_old[MTIME] != s_new[MTIME] or s_old[UID] != s_new[UID] or s_old[GID] != s_new[GID]:
1012
- different = True
1013
- else:
1014
- if s_old != s_new:
1015
- different = True
1016
- if different:
1017
- verbose_log('tree not equal: stats different %s' % key)
1018
- if dict_old.has_key(key):
1019
- verbose_log('old %s' % str(dict_old[key]))
1020
- verbose_log('new %s' % str(dict_new[key]))
1021
- return False
1022
- verbose_log('no need to build tree - it would be identical to the last tree');
1023
- return True
1024
-
1025
- def execute(src, dst, is_source):
1026
- if is_source:
1027
- # Sending side
1028
- # Create filelist, calc name map, send both
1029
-
1030
- srcpath = os.path.abspath(os.path.expanduser(src['path']))
1031
- filelist = build_filelist(srcpath)
1032
- send_object(filelist)
1033
- idname_map = build_uidgidmap(filelist)
1034
- send_object(idname_map)
1035
-
1036
- # Which command
1037
-
1038
- if have_option('--showfiles'):
1039
- serve_hashes(srcpath, filelist)
1040
- else:
1041
- serve_files(srcpath, filelist)
1042
-
1043
- results = recv_object()
1044
- subdir = recv_object()
1045
- else:
1046
- # Receiving side
1047
- # Recv filelist and name mapping, perform uid/gid mapping
1048
- filelist = recv_object()
1049
- idname_map = recv_object()
1050
- map_uidgid(filelist, idname_map)
1051
- manager = Manager(os.path.expanduser(dst['path']))
1052
- catalog = manager.catalog
1053
- backups = manager.get_backups()
1054
- treepath_last = None
1055
- backup_last = None
1056
- if len(backups) != 0:
1057
- backup_last = backups[-1]
1058
- treepath_last = backup_last.get_treepath()
1059
-
1060
- # If --lock specified, only one receiver at a time.
1061
- # This temp file will get deleted before the script ends,
1062
- # unless the power cord is pulled. On Linux and Macs, /tmp
1063
- # gets cleared at boot, so backup will be unlocked. On
1064
- # Windows, there isn't an equivalent. Also note flock
1065
- # doesn't work in some filesystems such as nfs.
1066
- # For these reasons, locking is optional.
1067
-
1068
- if have_option('--lock'):
1069
- lock_file = LockFile('lockfile.lb')
1070
- if not lock_file.lock():
1071
- results = 'Attempt to lock failed.'
1072
- send_object(-1)
1073
- send_object(results)
1074
- send_object(None)
1075
- return results, None
1076
-
1077
- # Command?
1078
-
1079
- if have_option('--showfiles'):
1080
- showfiles = catalog.get_showfiles(filelist, treepath_last)
1081
- results = '\n'.join([filelist[n][0] for n in showfiles])
1082
- subdir = None
1083
- else:
1084
- # Calc when the server should stop; used for --minutes control
1085
-
1086
- end_time = 0
1087
- for n in xrange(len(sys.argv)):
1088
- if sys.argv[n] == '--minutes':
1089
- end_time = int(time.time()) + int(sys.argv[n + 1]) * 60
1090
- break
1091
-
1092
- # Update catalog
1093
-
1094
- complete, transferred, md5hashes = catalog.update(filelist, treepath_last, end_time)
1095
- if complete:
1096
- results = 'catalog update complete, %d bytes transferred.' % transferred
1097
- else:
1098
- results = 'catalog update not complete. %d bytes transferred.' % transferred
1099
-
1100
- # Count stats
1101
-
1102
- verbose_log('catalog stats:')
1103
- new = 0
1104
- copy = 0
1105
- for entry in catalog.parse_log(catalog.get_logfiles()[-1][1]):
1106
- if entry[0] == 'copy':
1107
- copy += 1
1108
- elif entry[0] == 'new':
1109
- new += 1
1110
- results += '\ncatalog: %d new %d copied.' % (new, copy)
1111
-
1112
- # Create structure if complete
1113
- # Don't create if --catalogonly specified
1114
- # Don't create if new tree would be identical to old tree
1115
-
1116
- subdir = None
1117
- if complete and not have_option('--catalogonly') and not is_tree_equal(filelist, treepath_last):
1118
- backup_new = manager.new_backup()
1119
- backup_new.build_tree(backup_last, filelist, md5hashes, catalog)
1120
- subdir = backup_new.get_treepath()
1121
- results += '\ntree created: %s' % subdir
1122
-
1123
- # 'latest' link
1124
- latest_link = join(manager.get_path(), 'latest')
1125
- if os.path.exists(latest_link):
1126
- os.remove(latest_link)
1127
- os.symlink(backup_new.get_dirname(), join(manager.get_path(), 'latest'))
1128
-
1129
- # tree stats
1130
-
1131
- new = 0
1132
- copy = 0
1133
- link = 0
1134
- for entry in backup_new.parse_log():
1135
- if entry[0] == 'copy':
1136
- copy += 1
1137
- elif entry[0] == 'new':
1138
- new += 1
1139
- elif entry[0] == 'link':
1140
- link += 1
1141
- results += '\ntree: %d new %d copied %d linked.' % (new, copy, link)
1142
- else:
1143
- results += '\ntree not created.'
1144
-
1145
- # Send results
1146
-
1147
- send_object(results)
1148
- send_object(subdir)
1149
-
1150
- return results, subdir
1151
-
1152
- def parse_address(string):
1153
- """Parse these formats:
1154
- dir
1155
- user@host:dir
1156
-
1157
- Return dictionary:
1158
- remote : user@host or empty
1159
- path : path portion
1160
- string : whole string
1161
- """
1162
-
1163
- addr = {}
1164
- addr['string'] = string
1165
- if string.find(':') != -1:
1166
- addr['remote'], addr['path'] = string.split(':')
1167
- else:
1168
- addr['remote'] = ''
1169
- addr['path'] = string
1170
-
1171
- # Check to see if we are in quotes
1172
- # Unicode might be an issue here..
1173
- addr['path'] = shlex.split(addr['path'])[0]
1174
-
1175
- return addr
1176
-
1177
- def have_option(option):
1178
- for s in sys.argv:
1179
- if s == option:
1180
- return True
1181
- return False
1182
-
1183
- def get_option_value(option):
1184
- for n in xrange(len(sys.argv)):
1185
- if sys.argv[n] == option:
1186
- return sys.argv[n + 1]
1187
- return None
1188
-
1189
- def error(string):
1190
- sys.stderr.write("*** " + string + "\n")
1191
- sys.exit(1)
1192
-
1193
- class LockFile:
1194
- def __init__(self, file_name):
1195
- # /tmp gets cleared at system boot on *nix systems,
1196
- # so the file will get cleared if the system reboots.
1197
- # On Windows all bets are off.
1198
- self.file_name = join(tempfile.gettempdir(), file_name)
1199
- self.file = None
1200
-
1201
- def lock(self):
1202
- # Fail if locked twice. No need to reference count
1203
- if self.file:
1204
- return False
1205
-
1206
- # Attempt an exclusive, non-blocking lock
1207
- # Doesn't work on NFS
1208
- self.file = file(self.file_name, 'w+')
1209
- try:
1210
- fcntl.flock(self.file, fcntl.LOCK_EX | fcntl.LOCK_NB)
1211
- except IOError, e:
1212
- self.file.close()
1213
- self.file = None
1214
- return False
1215
- return True
1216
-
1217
- def unlock(self):
1218
- if self.file:
1219
- self.file.close()
1220
- self.file = None
1221
- os.unlink(self.file_name)
1222
-
1223
- def __del__(self):
1224
- # Gets called if script is control-c'd
1225
- self.unlock()
1226
-
1227
- # Main code
1228
-
1229
- if __name__ == '__main__':
1230
- # Print help
1231
-
1232
- if len(sys.argv) == 1:
1233
- print __doc__
1234
- sys.exit(1)
1235
-
1236
- if len(sys.argv) < 3:
1237
- error('Too few parameters.')
1238
-
1239
- # Parse addresses
1240
-
1241
- src = parse_address(sys.argv[-2:-1][0])
1242
- dst = parse_address(sys.argv[-1:][0])
1243
-
1244
- if have_option('--ssh-i') or have_option('--ssh-C') or have_option('--ssh-p'):
1245
- error("--ssh-x style options have been deprecated in favor of -e (rsync style). Please change your command.")
1246
-
1247
- # Is this the server?
1248
-
1249
- if have_option('--server'):
1250
- init_io(sys.stdout, sys.stdin)
1251
- execute(src, dst, have_option('--source'))
1252
- sys.exit(0)
1253
-
1254
- # Client starting. Only one remote allowed.
1255
-
1256
- if src['remote'] and dst['remote']:
1257
- error('Source and Dest cannot both be remote.')
1258
-
1259
- # The source generates the file list, the dest asks for new files
1260
- # The server can talk through stderr to the console
1261
-
1262
- if not src['remote']:
1263
- # Client is source, server is dest
1264
-
1265
- start_server(src, dst, False)
1266
- results, subdir = execute(src, dst, True)
1267
-
1268
- else:
1269
- # Server is source, client is dest
1270
-
1271
- start_server(src, dst, True)
1272
- results, subdir = execute(src, dst, False)
1273
-
1274
- # Print results
1275
-
1276
- print results
1277
-
1278
- # Verification
1279
-
1280
- if subdir != None:
1281
- srcpath = '%s/' % os.path.normpath(src['path'])
1282
- if (src['remote']):
1283
- srcpath = src['remote'] + ':' + repr(srcpath)
1284
- dstpath = os.path.normpath(join(dst['path'], subdir))
1285
- if (dst['remote']):
1286
- dstpath = dst['remote'] + ':' + repr(dstpath)
1287
- if os.getuid() == 0 and have_option('--numeric-ids'):
1288
- rsync_cmd = 'rsync -av --numeric-ids --dry-run %s %s' % (dump_arg(srcpath), dump_arg(dstpath))
1289
- else:
1290
- rsync_cmd = 'rsync -av --dry-run %s %s' % (dump_arg(srcpath), dump_arg(dstpath))
1291
-
1292
- if have_option('--verify'):
1293
- print rsync_cmd
1294
- sys.stdout.flush()
1295
- os.system(rsync_cmd)
1296
- else:
1297
- print 'to cross-verify:'
1298
- print rsync_cmd
1299
-
1300
- # Close server
1301
-
1302
- fd_send.close()
1303
- fd_recv.close()
1304
- sys.exit(0)