lsync 1.2.5 → 2.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +51 -0
- data/lib/lsync.rb +0 -23
- data/lib/lsync/action.rb +97 -92
- data/lib/lsync/actions/darwin/disk +5 -5
- data/lib/lsync/actions/generic/prune +29 -7
- data/lib/lsync/actions/generic/rotate +52 -40
- data/lib/lsync/actions/linux/disk +11 -11
- data/lib/lsync/actions/linux/terminal +2 -0
- data/lib/lsync/directory.rb +49 -35
- data/lib/lsync/error.rb +30 -30
- data/lib/lsync/event_handler.rb +72 -0
- data/lib/lsync/event_timer.rb +80 -0
- data/lib/lsync/method.rb +19 -185
- data/lib/lsync/methods/rsync.rb +132 -0
- data/lib/lsync/run.rb +30 -29
- data/lib/lsync/script.rb +212 -125
- data/lib/lsync/server.rb +77 -92
- data/lib/lsync/shell.rb +58 -97
- data/lib/lsync/shell_client.rb +65 -61
- data/lib/lsync/shells/ssh.rb +47 -0
- data/lib/lsync/tee_logger.rb +44 -31
- data/lib/lsync/version.rb +3 -3
- metadata +25 -58
- data/bin/lsync +0 -142
- data/lib/lsync/extensions.rb +0 -22
- data/lib/lsync/lb.py +0 -1304
- data/lib/lsync/password.rb +0 -35
- data/lib/lsync/plan.rb +0 -249
data/lib/lsync/extensions.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
|
2
|
-
class Hash
|
3
|
-
def keys_matching(p, &block)
|
4
|
-
s = {}
|
5
|
-
|
6
|
-
self.each do |k,v|
|
7
|
-
next unless k.match(p)
|
8
|
-
|
9
|
-
v = yield(v, k) if block_given?
|
10
|
-
|
11
|
-
s[k] = v
|
12
|
-
end
|
13
|
-
|
14
|
-
return s
|
15
|
-
end
|
16
|
-
|
17
|
-
def collect_values
|
18
|
-
each do |k,v|
|
19
|
-
self[k] = yield v
|
20
|
-
end
|
21
|
-
end
|
22
|
-
end
|
data/lib/lsync/lb.py
DELETED
@@ -1,1304 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# -*- Mode: Python; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
|
3
|
-
|
4
|
-
"""Link-Backup
|
5
|
-
Copyright (c) 2004 Scott Ludwig
|
6
|
-
http://www.scottlu.com
|
7
|
-
|
8
|
-
Link-Backup is a backup utility that creates hard links between a series
|
9
|
-
of backed-up trees, and intelligently handles renames, moves, and
|
10
|
-
duplicate files without additional storage or transfer.
|
11
|
-
|
12
|
-
Transfer occurs over standard i/o locally or remotely between a client and
|
13
|
-
server instance of this script. Remote backups rely on the secure remote
|
14
|
-
shell program ssh.
|
15
|
-
|
16
|
-
viewlb.cgi, a simple web based viewer of backups made by link-backup, is
|
17
|
-
also available from the link-backup page.
|
18
|
-
|
19
|
-
http://www.scottlu.com/Content/Link-Backup.html
|
20
|
-
|
21
|
-
Usage:
|
22
|
-
|
23
|
-
lb [options] srcdir dstdir
|
24
|
-
lb [options] user@host:srcdir dstdir
|
25
|
-
lb [options] srcdir user@host:dstdir
|
26
|
-
|
27
|
-
Source or dest can be remote. Backups are dated with the following entries:
|
28
|
-
|
29
|
-
dstdir/YYYY.MM.DD-HH.MM:SS/tree/ backed up file tree
|
30
|
-
dstdir/YYYY.MM.DD-HH.MM:SS/log logfile
|
31
|
-
|
32
|
-
Options:
|
33
|
-
|
34
|
-
--verify Run rsync with --dry-run to cross-verify
|
35
|
-
--numeric-ids Keep uid/gid values instead of mapping; requires root
|
36
|
-
--minutes <mins> Only run for <mins> minutes. Incremental backup.
|
37
|
-
--showfiles Don't backup, only list relative path files needing
|
38
|
-
backup
|
39
|
-
--catalogonly Update catalog only
|
40
|
-
--filelist <- or file> Specify filelist. Files relative to srcdir.
|
41
|
-
--lock Ensure only one backup to a given dest will run at a time
|
42
|
-
--verbose Show what is happening
|
43
|
-
|
44
|
-
Comments:
|
45
|
-
|
46
|
-
Link-Backup tracks unique file instances in a tree and creates a backup that
|
47
|
-
while identical in structure, ensures that no file is duplicated unnecessarily.
|
48
|
-
Files that are moved, renamed, or duplicated won't cause additional storage or
|
49
|
-
transfer. dstdir/.catalog is a catalog of all unique file instances; backup
|
50
|
-
trees hard-link to the catalog. If a backup tree would be identical to the
|
51
|
-
previous backup tree, it won't be needlessly created.
|
52
|
-
|
53
|
-
How it works:
|
54
|
-
|
55
|
-
The src sends a file list to the dst. First dst updates the catalog by checking
|
56
|
-
to see if it knows about each file. If not, the file is retrieved from the src
|
57
|
-
and a new catalog entry is made:
|
58
|
-
|
59
|
-
For each file:
|
60
|
-
1. Check to see if the file path + file stat is present in the last tree.
|
61
|
-
2. If not, ask for md5sum from the src. See if md5sum+stat is in the
|
62
|
-
catalog.
|
63
|
-
3. If not, see if md5sum only is in the catalog. If so copy catalog entry,
|
64
|
-
rename with md5sum+new stat
|
65
|
-
4. If not, request file from src, make new catalog entry.
|
66
|
-
|
67
|
-
Catalog files are named by md5sum+stats and stored in flat directories. Once
|
68
|
-
complete, a tree is created that mirrors the src by hardlinking to the catalog.
|
69
|
-
|
70
|
-
Example 1:
|
71
|
-
|
72
|
-
python lb.py pictures pictures-backup
|
73
|
-
|
74
|
-
Makes a new backup of pictures in pictures-backup.
|
75
|
-
|
76
|
-
Example 2:
|
77
|
-
|
78
|
-
python lb.py pictures me@fluffy:~/pictures-backup
|
79
|
-
|
80
|
-
Backs up on remote machine fluffy instead of locally.
|
81
|
-
|
82
|
-
Example 3:
|
83
|
-
|
84
|
-
python lb.py --minutes 240 pictures me@remote:~/pictures-backup
|
85
|
-
|
86
|
-
Same as above except for 240 minutes only. This is useful if backing up over
|
87
|
-
the internet only during specific times (at night for example). Does what it
|
88
|
-
can in 240 minutes. If the catalog update completes, a tree is created
|
89
|
-
hardlinked to the catalog.
|
90
|
-
|
91
|
-
Example 4:
|
92
|
-
python lb.py --showfiles pictures pictures-backup | \
|
93
|
-
python lb.py --filelist - pictures pictures-backup
|
94
|
-
|
95
|
-
Same as example #1.
|
96
|
-
|
97
|
-
Example 5:
|
98
|
-
|
99
|
-
1)
|
100
|
-
python lb.py --showfiles pictures me@remote:~/pictures-backup | \
|
101
|
-
python lb.py --filelist - pictures me@laptop:~/pictures-transfer
|
102
|
-
|
103
|
-
2)
|
104
|
-
python lb.py --catalogonly pictures-transfer me@remote:~/pictures-backup
|
105
|
-
|
106
|
-
3)
|
107
|
-
python lb.py pictures me@remote:~/pictures-backup
|
108
|
-
|
109
|
-
If the difference between pictures and pictures-backup (for example) is too
|
110
|
-
large for internet backup, the steps above can be used. Step 1 transfers only
|
111
|
-
the differences to a laptop. Step 2 is at the location of machine "remote" and
|
112
|
-
is initiated from the laptop to machine "remote". Step 3 is back at the source
|
113
|
-
and will do a backup and notice all the files are present in the remote catalog,
|
114
|
-
and will build the tree.
|
115
|
-
|
116
|
-
Note the source in step 2 could be more perfectly specified as the backup tree
|
117
|
-
created underneath the pictures-transfer directory, although it is not necessary
|
118
|
-
since only the catalog is being updated (however it would be a speedup).
|
119
|
-
|
120
|
-
History:
|
121
|
-
|
122
|
-
v 0.83 17/Apr/2009 Samel Williams http://www.oriontransfer.co.nz/
|
123
|
-
- Collaboration with Scott to fix a bug that caused a crash
|
124
|
-
when a file changed (stat -> fstat)
|
125
|
-
|
126
|
-
v 0.82 20/Oct/2008 Samuel Williams http://www.oriontransfer.co.nz/
|
127
|
-
- Removed --ssh-(x) options in favor of rsync style -e '...' style,
|
128
|
-
this makes the command compatible with rsync style syntax.
|
129
|
-
|
130
|
-
v 0.81 6/Sep/2008 Samuel Williams http://www.oriontransfer.co.nz/
|
131
|
-
- Added mode-line and #! line
|
132
|
-
- Fixed parsing of command line arguments that contain spaces to match rsync
|
133
|
-
(shlex parsing)
|
134
|
-
- Fixed escaping of ssh strings so that they get passed correctly
|
135
|
-
|
136
|
-
v 0.8 12/23/2006 scottlu
|
137
|
-
- allow backups to occur while files are changing
|
138
|
-
- minor --verify command bug
|
139
|
-
- added --verbose logging to tree building
|
140
|
-
|
141
|
-
v 0.7 09/02/2006 scottlu
|
142
|
-
- Ignore pipe, socket, and device file types
|
143
|
-
- Added --ssh-i to select ssh id file to use (see ssh -i) (Damien Mascord)
|
144
|
-
- Added --ssh-C to perform ssh compression (see ssh -C) (David Precious)
|
145
|
-
- Added --ssh-p to specify remote port (see ssh -p) (David Precious)
|
146
|
-
|
147
|
-
v 0.6 06/17/2006 scottlu
|
148
|
-
- Ignore broken symlinks and other failed stats during filelist creation
|
149
|
-
(David Precious)
|
150
|
-
- Added --lock, which ensures only one backup to a given dest can occur
|
151
|
-
at a time (Joe Beda)
|
152
|
-
|
153
|
-
v 0.5 04/15/2006 scottlu
|
154
|
-
- Added 'latest' link from Joe Beda http://eightypercent.net (thanks Joe!)
|
155
|
-
- Fixed --verify. It wasn't specifying the remote machine (I rarely use
|
156
|
-
verify but sometimes it is nice to sanity check backups)
|
157
|
-
|
158
|
-
v 0.4 11/14/2004 scottlu
|
159
|
-
- Changed a central catalog design with trees hardlinking to the catalog.
|
160
|
-
This way catalog updating can be incremental.
|
161
|
-
- Removed filemaps - not required any longer
|
162
|
-
- Add catalog logging as well as backup logging.
|
163
|
-
- Added incremental backup feature --minutes <minutes>
|
164
|
-
- Make md5hash calculation incremental so a timeout doesn't waste time
|
165
|
-
- Created 0.3-0.4.py for 0.3 to 0.4 upgrading
|
166
|
-
- Added --showfiles, shows differences between src and dst
|
167
|
-
- Added --catalogonly, updates catalog only, doesn't create tree
|
168
|
-
- Added --filelist, specifies file list to use instead of tree
|
169
|
-
- Removed --rmempty
|
170
|
-
- Added --verbose
|
171
|
-
|
172
|
-
v 0.3 9/10/2004 scottlu
|
173
|
-
- Added backup stat query methods
|
174
|
-
- Changed log file format
|
175
|
-
- Added viewlb.cgi, a web interface for viewing backups
|
176
|
-
- added gzip compression of filemap
|
177
|
-
- added --numeric-ids
|
178
|
-
|
179
|
-
v 0.2 8/28/2004 scottlu
|
180
|
-
- filemap format change
|
181
|
-
- added --rmempty
|
182
|
-
- added --verify to run rsync in verify mode
|
183
|
-
- added uid/gid mapping by default unless --numeric-ids is specified
|
184
|
-
|
185
|
-
v 0.1 8/19/2004 scottlu
|
186
|
-
- Fully working backup, hardlinking between trees
|
187
|
-
|
188
|
-
License:
|
189
|
-
|
190
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
191
|
-
of this software and associated documentation files (the "Software"), to deal
|
192
|
-
in the Software without restriction, including without limitation the rights
|
193
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
194
|
-
copies of the Software, and to permit persons to whom the Software is
|
195
|
-
furnished to do so, subject to the following conditions:
|
196
|
-
|
197
|
-
The above copyright notice and this permission notice shall be included in all
|
198
|
-
copies or substantial portions of the Software.
|
199
|
-
|
200
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
201
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
202
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
203
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
204
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
205
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
206
|
-
SOFTWARE.
|
207
|
-
"""
|
208
|
-
|
209
|
-
import os
|
210
|
-
import sys
|
211
|
-
import cPickle
|
212
|
-
from os.path import join
|
213
|
-
import time
|
214
|
-
import stat
|
215
|
-
import md5
|
216
|
-
import shutil
|
217
|
-
import tempfile
|
218
|
-
import struct
|
219
|
-
import re
|
220
|
-
import glob
|
221
|
-
import fcntl
|
222
|
-
import shlex
|
223
|
-
|
224
|
-
fd_send = None
|
225
|
-
fd_recv = None
|
226
|
-
pickler = None
|
227
|
-
unpickler = None
|
228
|
-
date_format = '%Y.%m.%d-%H.%M.%S'
|
229
|
-
|
230
|
-
MODE = 0
|
231
|
-
SIZE = 1
|
232
|
-
MTIME = 2
|
233
|
-
UID = 3
|
234
|
-
GID = 4
|
235
|
-
CHMOD_BITS = int('6777', 8)
|
236
|
-
|
237
|
-
def send_object(object):
|
238
|
-
global pickler, fd_send
|
239
|
-
pickler.dump(object)
|
240
|
-
fd_send.flush()
|
241
|
-
|
242
|
-
def recv_object():
|
243
|
-
global unpickler
|
244
|
-
return unpickler.load()
|
245
|
-
|
246
|
-
def init_io(send, recv):
|
247
|
-
global fd_send, fd_recv, pickler, unpickler
|
248
|
-
fd_send = send
|
249
|
-
fd_recv = recv
|
250
|
-
pickler = cPickle.Pickler(fd_send, 1)
|
251
|
-
unpickler = cPickle.Unpickler(fd_recv)
|
252
|
-
|
253
|
-
def verbose_log(s):
|
254
|
-
if have_option('--verbose'):
|
255
|
-
sys.stderr.write('%s\n' % s)
|
256
|
-
|
257
|
-
class Log:
|
258
|
-
def __init__(self, logfile, mode):
|
259
|
-
self.mode = mode
|
260
|
-
try:
|
261
|
-
self.logfile = file(os.path.abspath(logfile), self.mode)
|
262
|
-
except:
|
263
|
-
self.logfile = None
|
264
|
-
self.re = re.compile(r'^(?P<time>....\...\...\-..\...\...)\: (?P<message>.*)$')
|
265
|
-
|
266
|
-
def __del__(self):
|
267
|
-
if self.logfile:
|
268
|
-
self.logfile.close()
|
269
|
-
|
270
|
-
def write(self, message):
|
271
|
-
if not self.logfile or self.mode == 'rt':
|
272
|
-
return
|
273
|
-
|
274
|
-
try:
|
275
|
-
strtime = time.strftime(date_format, time.localtime())
|
276
|
-
self.logfile.write('%s: %s\n' % (strtime, message))
|
277
|
-
self.logfile.flush()
|
278
|
-
except:
|
279
|
-
pass
|
280
|
-
|
281
|
-
def nextline(self):
|
282
|
-
if not self.logfile or self.mode == 'at':
|
283
|
-
return
|
284
|
-
|
285
|
-
line = self.logfile.readline()
|
286
|
-
if len(line) == 0:
|
287
|
-
return None
|
288
|
-
m = self.re.match(line)
|
289
|
-
return (time.strptime(m.group('time'), date_format), m.group('message'))
|
290
|
-
|
291
|
-
class Catalog:
|
292
|
-
"""Central store for files of different hash/stat combinations
|
293
|
-
Backup trees hard link to the catalog. The catalog can be updated
|
294
|
-
incrementally. A backup tree is not created until the catalog is
|
295
|
-
up to date.
|
296
|
-
"""
|
297
|
-
def __init__(self, path):
|
298
|
-
self.path = os.path.abspath(path)
|
299
|
-
self.lenbase = len('%s%s' % (self.path, os.sep))
|
300
|
-
self.logpath = join(self.path, 'log')
|
301
|
-
if not os.path.exists(self.path):
|
302
|
-
os.mkdir(self.path)
|
303
|
-
os.mkdir(self.logpath)
|
304
|
-
for n in xrange(256):
|
305
|
-
os.mkdir(join(self.path, '%03d' % n))
|
306
|
-
|
307
|
-
def get_logfiles(self):
|
308
|
-
list = []
|
309
|
-
for item in os.listdir(self.logpath):
|
310
|
-
s = os.stat(join(self.logpath, item))
|
311
|
-
if stat.S_ISDIR(s.st_mode):
|
312
|
-
continue
|
313
|
-
try:
|
314
|
-
datestr = item.rstrip('.log')
|
315
|
-
time.strptime(datestr, date_format)
|
316
|
-
list.append(datestr)
|
317
|
-
except:
|
318
|
-
pass
|
319
|
-
list.sort()
|
320
|
-
return [(time.strptime(datestr, date_format), join(self.logpath, '%s.log' % datestr)) for datestr in list]
|
321
|
-
|
322
|
-
def parse_log(self, logpath_abs):
|
323
|
-
log = Log(logpath_abs, 'rt')
|
324
|
-
parse = []
|
325
|
-
while True:
|
326
|
-
line = log.nextline()
|
327
|
-
if line == None:
|
328
|
-
break
|
329
|
-
elif line[1].startswith('+++'):
|
330
|
-
continue
|
331
|
-
elif line[1].startswith('copy from: '):
|
332
|
-
tT = line[0]
|
333
|
-
fromT = line[1][11:]
|
334
|
-
forT = ''
|
335
|
-
line = log.nextline()
|
336
|
-
if line[1].startswith('copy for: '):
|
337
|
-
forT = line[1][10:]
|
338
|
-
parse.append(('copy', tT, fromT, forT))
|
339
|
-
elif line[1].startswith('new from: '):
|
340
|
-
tT = line[0]
|
341
|
-
fromT = line[1][10:]
|
342
|
-
forT = ''
|
343
|
-
line = log.nextline()
|
344
|
-
if line[1].startswith('new for: '):
|
345
|
-
forT = line[1][9:]
|
346
|
-
parse.append(('new', tT, fromT, forT))
|
347
|
-
return parse
|
348
|
-
|
349
|
-
def file_from_hash(self, md5):
|
350
|
-
subdir = join(self.path, '%03d' % (hash(md5) & 255))
|
351
|
-
files = glob.glob1(subdir, '%s*' % md5)
|
352
|
-
if len(files) > 0:
|
353
|
-
return join(subdir, files[0])
|
354
|
-
return None
|
355
|
-
|
356
|
-
def file_from_hashstat(self, md5, s):
|
357
|
-
filepath_abs = self.getfilepath(md5, s)
|
358
|
-
if os.path.exists(filepath_abs):
|
359
|
-
return filepath_abs
|
360
|
-
return None
|
361
|
-
|
362
|
-
def getfilepath(self, md5, s):
|
363
|
-
mdate = time.strftime(date_format, time.localtime(s[MTIME]))
|
364
|
-
fn = '%s-%s-%08x-%05x-%04d-%04d' % (md5, mdate, s[SIZE], s[MODE] & CHMOD_BITS, s[UID], s[GID])
|
365
|
-
return join(join(self.path, '%03d' % (hash(md5) & 255)), fn)
|
366
|
-
|
367
|
-
def update(self, filelist, treepath_last, end_time):
|
368
|
-
# This is the slow (and expensive!) bandwidth eating portion
|
369
|
-
# of link-backup. If --minutes is specified, don't go beyond
|
370
|
-
# the minutes specified.
|
371
|
-
|
372
|
-
# For each file see if exists in the catalog; if not copy it
|
373
|
-
# if the md5 exists or download it
|
374
|
-
|
375
|
-
datestr = time.strftime(date_format, time.localtime())
|
376
|
-
log = Log(join(self.logpath, '%s.log' % datestr), 'wt')
|
377
|
-
dl_seconds = 0
|
378
|
-
dl_size = 0
|
379
|
-
md5hashes = [None for n in xrange(len(filelist))]
|
380
|
-
log.write('+++begin+++')
|
381
|
-
for n in xrange(len(filelist)):
|
382
|
-
# Only files
|
383
|
-
|
384
|
-
filepath_rel, s = filelist[n]
|
385
|
-
if stat.S_ISDIR(s[MODE]):
|
386
|
-
continue
|
387
|
-
|
388
|
-
# If stat equal we don't need a hash for this file
|
389
|
-
|
390
|
-
if treepath_last and is_stat_equal(join(treepath_last, filepath_rel), s):
|
391
|
-
verbose_log('dst: found file %s' % filelist[n][0])
|
392
|
-
continue
|
393
|
-
|
394
|
-
# Get the md5hash for this file
|
395
|
-
|
396
|
-
verbose_log('dst: request hash for %s' % filelist[n][0])
|
397
|
-
send_object(n)
|
398
|
-
md5hashes[n] = recv_object()
|
399
|
-
if not md5hashes[n]:
|
400
|
-
verbose_log('dst: did not receive hash?')
|
401
|
-
send_object(False)
|
402
|
-
continue
|
403
|
-
|
404
|
-
# File already present? Skip.
|
405
|
-
if self.file_from_hashstat(md5hashes[n], s):
|
406
|
-
verbose_log('dst: file present already %s' % filelist[n][0])
|
407
|
-
send_object(False)
|
408
|
-
continue
|
409
|
-
|
410
|
-
# File not present. Copy locally or from the source
|
411
|
-
fd, tmpfilepath_abs = tempfile.mkstemp(dir=self.path)
|
412
|
-
filepath_abs = self.getfilepath(md5hashes[n], s)
|
413
|
-
try:
|
414
|
-
copyfile_abs = self.file_from_hash(md5hashes[n])
|
415
|
-
if copyfile_abs:
|
416
|
-
# Found same file with different stats. Requires a copy
|
417
|
-
verbose_log('dst: using file with same hash %s' % filelist[n][0])
|
418
|
-
send_object(False)
|
419
|
-
shutil.copyfile(copyfile_abs, tmpfilepath_abs)
|
420
|
-
log.write('copy from: %s' % filepath_abs[self.lenbase:])
|
421
|
-
log.write('copy for: %s' % filepath_rel)
|
422
|
-
else:
|
423
|
-
# Enough time for this file?
|
424
|
-
if end_time != 0 and dl_seconds != 0:
|
425
|
-
est_seconds = s[SIZE] / (dl_size / dl_seconds)
|
426
|
-
if time.time() + est_seconds >= end_time:
|
427
|
-
verbose_log('dst: timeout')
|
428
|
-
send_object(False)
|
429
|
-
raise
|
430
|
-
|
431
|
-
# Time downloads to understand average download rate and use as
|
432
|
-
# an estimator of a given file's download time
|
433
|
-
verbose_log('dst: requesting file %s' % filelist[n][0])
|
434
|
-
dl_time_start = time.time()
|
435
|
-
|
436
|
-
# Copy from source
|
437
|
-
# The chunks are sized independent from stats for robustness
|
438
|
-
# Stat is resent to have most up to date copy
|
439
|
-
# Recalc the md5 hash along the way so it is right
|
440
|
-
send_object(True)
|
441
|
-
m = md5.new()
|
442
|
-
while True:
|
443
|
-
readcount = struct.unpack('!i', fd_recv.read(4))[0]
|
444
|
-
if readcount == 0:
|
445
|
-
break
|
446
|
-
if readcount < 0:
|
447
|
-
raise 'Error reading file'
|
448
|
-
bytes = fd_recv.read(readcount)
|
449
|
-
m.update(bytes)
|
450
|
-
os.write(fd, bytes)
|
451
|
-
|
452
|
-
# Delta accumulator
|
453
|
-
dl_seconds += time.time() - dl_time_start
|
454
|
-
os.fsync(fd)
|
455
|
-
dl_size += os.fstat(fd).st_size
|
456
|
-
|
457
|
-
# File might of changed during the update
|
458
|
-
# Update has and size and check to see if it already
|
459
|
-
# exists in the catalog
|
460
|
-
if md5hashes[n] != m.hexdigest():
|
461
|
-
verbose_log('dst: file changed during copy %s' % filelist[n][0])
|
462
|
-
md5hashes[n] = m.hexdigest()
|
463
|
-
s[SIZE] = os.fstat(fd).st_size
|
464
|
-
filelist[n] = (filepath_rel, s)
|
465
|
-
if self.file_from_hashstat(md5hashes[n], s):
|
466
|
-
verbose_log('dst: file already in catalog %s' % filelist[n][0])
|
467
|
-
os.close(fd)
|
468
|
-
os.remove(tempfilepath_abs)
|
469
|
-
continue
|
470
|
-
|
471
|
-
log.write('new from: %s' % filepath_abs[self.lenbase:])
|
472
|
-
log.write('new for: %s' % filepath_rel)
|
473
|
-
|
474
|
-
except:
|
475
|
-
os.close(fd)
|
476
|
-
os.remove(tmpfilepath_abs)
|
477
|
-
send_object(-1)
|
478
|
-
log.write('+++end+++')
|
479
|
-
|
480
|
-
return False, dl_size, md5hashes
|
481
|
-
|
482
|
-
# Rename and set file stats
|
483
|
-
|
484
|
-
os.close(fd)
|
485
|
-
os.utime(tmpfilepath_abs, (s[MTIME], s[MTIME]))
|
486
|
-
os.chown(tmpfilepath_abs, s[UID], s[GID])
|
487
|
-
os.rename(tmpfilepath_abs, filepath_abs)
|
488
|
-
os.chmod(filepath_abs, s[MODE] & CHMOD_BITS)
|
489
|
-
|
490
|
-
# Done with file requests
|
491
|
-
|
492
|
-
verbose_log('dst: catalog update done')
|
493
|
-
send_object(-1)
|
494
|
-
log.write('+++end+++')
|
495
|
-
return True, dl_size, md5hashes
|
496
|
-
|
497
|
-
def get_showfiles(self, filelist, treepath_last):
|
498
|
-
|
499
|
-
# Get hashes for new files. If file doesn't exist in old backup with same
|
500
|
-
# stat, we need ask the client for a hash
|
501
|
-
|
502
|
-
md5requests = []
|
503
|
-
for n in xrange(len(filelist)):
|
504
|
-
# Only files
|
505
|
-
|
506
|
-
filepath_rel, s = filelist[n]
|
507
|
-
if stat.S_ISDIR(s[MODE]):
|
508
|
-
continue
|
509
|
-
|
510
|
-
# If stat equal we don't need a hash for this file
|
511
|
-
|
512
|
-
if treepath_last and is_stat_equal(join(treepath_last, filepath_rel), s):
|
513
|
-
continue
|
514
|
-
|
515
|
-
# Need hash for this file
|
516
|
-
|
517
|
-
md5requests.append(n)
|
518
|
-
|
519
|
-
# Retrieve hashes
|
520
|
-
|
521
|
-
send_object(md5requests)
|
522
|
-
md5hashes = recv_object()
|
523
|
-
if len(md5hashes) != len(md5requests):
|
524
|
-
raise AssertionError, 'Hash count mismatch'
|
525
|
-
|
526
|
-
# Make one sorted list to eliminate duplicates
|
527
|
-
# Check if already present in catalog
|
528
|
-
|
529
|
-
md5sort = [(md5requests[n], md5hashes[n]) for n in xrange(len(md5hashes)) if not self.file_from_hash(md5hashes[n])]
|
530
|
-
def sortme(a, b):
|
531
|
-
if a[1] == b[1]:
|
532
|
-
return 0
|
533
|
-
if a[1] > b[1]:
|
534
|
-
return 1
|
535
|
-
return -1
|
536
|
-
md5sort.sort(sortme)
|
537
|
-
|
538
|
-
# Eliminate duplicates and return
|
539
|
-
|
540
|
-
showfiles = []
|
541
|
-
md5 = None
|
542
|
-
for n in xrange(len(md5sort)):
|
543
|
-
if md5 == md5sort[n][1]:
|
544
|
-
continue
|
545
|
-
md5 = md5sort[n][1]
|
546
|
-
showfiles.append(md5sort[n][0])
|
547
|
-
return showfiles
|
548
|
-
|
549
|
-
# Backup
|
550
|
-
|
551
|
-
class Backup:
|
552
|
-
"""Represents a dated backup.
|
553
|
-
"""
|
554
|
-
def __init__(self, path):
|
555
|
-
self.path = os.path.abspath(path)
|
556
|
-
self.logpath_abs = join(self.path, 'log')
|
557
|
-
self.treepath = join(self.path, 'tree')
|
558
|
-
if not os.path.exists(self.treepath):
|
559
|
-
os.mkdir(self.treepath)
|
560
|
-
|
561
|
-
def parse_log(self):
|
562
|
-
log = Log(self.logpath_abs, 'rt')
|
563
|
-
parse = []
|
564
|
-
while True:
|
565
|
-
line = log.nextline()
|
566
|
-
if line == None:
|
567
|
-
break
|
568
|
-
if line[1] == '+++end+++' or line[1] == '+++begin+++':
|
569
|
-
continue
|
570
|
-
if line[1].startswith('new: '):
|
571
|
-
parse.append(('new', line[1][5:]))
|
572
|
-
elif line[1].startswith('copy: '):
|
573
|
-
parse.append(('copy', line[1][6:]))
|
574
|
-
elif line[1].startswith('link: '):
|
575
|
-
parse.append(('link', line[1][6:]))
|
576
|
-
|
577
|
-
return parse
|
578
|
-
|
579
|
-
def get_date(self):
|
580
|
-
return time.strptime(self.get_dirname(), date_format)
|
581
|
-
|
582
|
-
def get_dirname(self):
|
583
|
-
return os.path.basename(self.path)
|
584
|
-
|
585
|
-
def get_treepath(self):
|
586
|
-
return self.treepath
|
587
|
-
|
588
|
-
def get_files_since(self, backup_last, catalog):
|
589
|
-
# Get files added to the catalog since last tree was built
|
590
|
-
|
591
|
-
tlast = 0
|
592
|
-
if backup_last:
|
593
|
-
tlast = time.mktime(backup_last.get_date())
|
594
|
-
filessince = {}
|
595
|
-
for tm, logfile_abs in catalog.get_logfiles():
|
596
|
-
if time.mktime(tm) < tlast:
|
597
|
-
continue
|
598
|
-
for item in catalog.parse_log(logfile_abs):
|
599
|
-
filessince[item[3]] = item[0]
|
600
|
-
return filessince
|
601
|
-
|
602
|
-
def build_tree(self, backup_last, filelist, md5hashes, catalog):
|
603
|
-
"""All files are present and can be found either in the
|
604
|
-
previous backup or the catalog. Just build the structure.
|
605
|
-
"""
|
606
|
-
|
607
|
-
treepath_last = None
|
608
|
-
if backup_last:
|
609
|
-
treepath_last = backup_last.get_treepath()
|
610
|
-
filessince = self.get_files_since(backup_last, catalog)
|
611
|
-
log = Log(self.logpath_abs, 'at')
|
612
|
-
log.write('+++begin+++')
|
613
|
-
verbose_log('dst: creating tree %s' % self.treepath)
|
614
|
-
|
615
|
-
# Create directories (they are in depth last order)
|
616
|
-
# Set permissions later
|
617
|
-
verbose_log('dst: making directories...')
|
618
|
-
for filepath_rel, s in filelist:
|
619
|
-
if stat.S_ISDIR(s[MODE]):
|
620
|
-
verbose_log('dst: making dir %s' % filepath_rel)
|
621
|
-
dirpath_abs = join(self.treepath, filepath_rel)
|
622
|
-
os.mkdir(dirpath_abs)
|
623
|
-
|
624
|
-
# Link in files
|
625
|
-
verbose_log('dst: linking files...')
|
626
|
-
for n in xrange(len(filelist)):
|
627
|
-
|
628
|
-
# Skip dirs
|
629
|
-
filepath_rel, s = filelist[n]
|
630
|
-
if stat.S_ISDIR(s[MODE]):
|
631
|
-
continue
|
632
|
-
verbose_log('dst: inspecting file %s' % filepath_rel)
|
633
|
-
|
634
|
-
# If there is no hash, it's in the last backup, otherwise it's
|
635
|
-
# in the catalog
|
636
|
-
if not md5hashes[n]:
|
637
|
-
verbose_log('dst: found in last backup: %s' % filepath_rel)
|
638
|
-
linkpath_abs = join(treepath_last, filepath_rel)
|
639
|
-
else:
|
640
|
-
verbose_log('dst: found in catalog: %s' % filepath_rel)
|
641
|
-
linkpath_abs = catalog.file_from_hashstat(md5hashes[n], s)
|
642
|
-
|
643
|
-
# Only log files new to the catalog since last tree. This
|
644
|
-
# ensures file renames, dups, moves etc don't show up as new
|
645
|
-
# in the tree log
|
646
|
-
if filessince.has_key(filepath_rel):
|
647
|
-
log.write('%s: %s' % (filessince[filepath_rel], filepath_rel))
|
648
|
-
else:
|
649
|
-
log.write('link: %s' % filepath_rel)
|
650
|
-
|
651
|
-
# Hard-link the file
|
652
|
-
verbose_log('dst: hardlinking %s to %s' % (join(self.treepath, filepath_rel), linkpath_abs))
|
653
|
-
os.link(linkpath_abs, join(self.treepath, filepath_rel))
|
654
|
-
|
655
|
-
# Set permissions for directories depth-first.
|
656
|
-
verbose_log('dst: setting permissions on directories...')
|
657
|
-
for n in xrange(len(filelist) - 1, -1, -1):
|
658
|
-
dirpath_rel, s = filelist[n]
|
659
|
-
if stat.S_ISDIR(s[MODE]):
|
660
|
-
verbose_log('dst: setting permissions on: %s' % dirpath_rel)
|
661
|
-
dirpath_abs = join(self.treepath, dirpath_rel)
|
662
|
-
os.utime(dirpath_abs, (s[MTIME], s[MTIME]))
|
663
|
-
os.chown(dirpath_abs, s[UID], s[GID])
|
664
|
-
os.chmod(dirpath_abs, s[MODE] & CHMOD_BITS)
|
665
|
-
|
666
|
-
verbose_log('dst: done creating tree %s' % self.treepath)
|
667
|
-
log.write('+++end+++')
|
668
|
-
|
669
|
-
# Manager
|
670
|
-
|
671
|
-
class Manager:
|
672
|
-
"""Manages Backup instances
|
673
|
-
"""
|
674
|
-
def __init__(self, path):
|
675
|
-
self.path = os.path.abspath(path)
|
676
|
-
if not os.path.exists(self.path):
|
677
|
-
os.mkdir(self.path)
|
678
|
-
self.catalog = Catalog(join(self.path, '.catalog'))
|
679
|
-
|
680
|
-
def get_path(self):
|
681
|
-
return self.path
|
682
|
-
|
683
|
-
def new_backup(self):
|
684
|
-
dirpath = join(self.path, time.strftime(date_format, time.localtime()))
|
685
|
-
os.mkdir(dirpath)
|
686
|
-
return Backup(dirpath)
|
687
|
-
|
688
|
-
def delete_backup(self, backup):
|
689
|
-
dirpath_abs = join(self.path, backup.get_dirname())
|
690
|
-
if os.path.exists(dirpath_abs):
|
691
|
-
for root, dirs, files in os.walk(dirpath_abs, topdown=False):
|
692
|
-
for name in files:
|
693
|
-
os.remove(join(root, name))
|
694
|
-
for name in dirs:
|
695
|
-
os.rmdir(join(root, name))
|
696
|
-
os.rmdir(dirpath_abs)
|
697
|
-
|
698
|
-
def get_backup(self, backup):
|
699
|
-
return Backup(join(self.path, backup))
|
700
|
-
|
701
|
-
def get_backups(self):
|
702
|
-
list = []
|
703
|
-
for item in os.listdir(self.path):
|
704
|
-
s = os.stat(join(self.path, item))
|
705
|
-
if not stat.S_ISDIR(s.st_mode):
|
706
|
-
continue
|
707
|
-
try:
|
708
|
-
time.strptime(item, date_format)
|
709
|
-
list.append(item)
|
710
|
-
except:
|
711
|
-
pass
|
712
|
-
list.sort()
|
713
|
-
return [Backup(join(self.path, item)) for item in list]
|
714
|
-
|
715
|
-
# Helpers
|
716
|
-
|
717
|
-
def dump_arg(x):
|
718
|
-
s = '"'
|
719
|
-
for c in x:
|
720
|
-
if c in '\\$"`':
|
721
|
-
s = s + '\\'
|
722
|
-
s = s + c
|
723
|
-
s = s + '"'
|
724
|
-
return s
|
725
|
-
|
726
|
-
def start_server(src, dst, is_source):
|
727
|
-
# Command line for server
|
728
|
-
|
729
|
-
cmd1 = "python -c 'import sys;import cPickle;exec(cPickle.Unpickler(sys.stdin).load())' --server"
|
730
|
-
if is_source:
|
731
|
-
cmd1 = "%s --source" % cmd1
|
732
|
-
for arg in sys.argv[1:-2]:
|
733
|
-
cmd1 = '%s %s' % (cmd1, arg)
|
734
|
-
cmd1 = "%s %s %s" % (cmd1, dump_arg(src['string']), dump_arg(dst['string']))
|
735
|
-
|
736
|
-
# Remote?
|
737
|
-
|
738
|
-
addr = dst
|
739
|
-
if is_source:
|
740
|
-
addr = src
|
741
|
-
|
742
|
-
# Add ssh and args if remote
|
743
|
-
if addr['remote']:
|
744
|
-
ssh_args = '%s %s' % (addr['remote'], dump_arg(cmd1))
|
745
|
-
if have_option('-e'):
|
746
|
-
cmd2 = '%s %s' % (get_option_value('-e'), ssh_args)
|
747
|
-
else:
|
748
|
-
cmd2 = 'ssh %s' % ssh_args
|
749
|
-
else:
|
750
|
-
cmd2 = cmd1
|
751
|
-
|
752
|
-
# Start and pass this code
|
753
|
-
verbose_log('command: %s' % cmd2)
|
754
|
-
fdin, fdout = os.popen2(cmd2, mode='b')
|
755
|
-
init_io(fdin, fdout)
|
756
|
-
f = open(sys.argv[0])
|
757
|
-
send_object(f.read())
|
758
|
-
f.close()
|
759
|
-
|
760
|
-
def is_mode_ok(mode):
|
761
|
-
if stat.S_ISBLK(mode):
|
762
|
-
return False
|
763
|
-
if stat.S_ISCHR(mode):
|
764
|
-
return False
|
765
|
-
if stat.S_ISFIFO(mode):
|
766
|
-
return False
|
767
|
-
if stat.S_ISSOCK(mode):
|
768
|
-
return False
|
769
|
-
return True
|
770
|
-
|
771
|
-
def build_filelist_from_tree(treepath):
|
772
|
-
class ListBuilder:
|
773
|
-
def __init__(self, basepath):
|
774
|
-
self.lenbase = len('%s%s' % (basepath, os.sep))
|
775
|
-
|
776
|
-
def callback(self, arg, dirpath, filelist):
|
777
|
-
for file in filelist:
|
778
|
-
# Sometimes a stat may fail, like if there are broken
|
779
|
-
# symlinks in the file system
|
780
|
-
try:
|
781
|
-
# Collect stat values instead of stat objects. It's 6
|
782
|
-
# times smaller (measured) and mutuable
|
783
|
-
# (for uid/gid mapping at the dest)
|
784
|
-
filepath = join(dirpath, file)
|
785
|
-
s = os.stat(filepath)
|
786
|
-
if not is_mode_ok(s.st_mode):
|
787
|
-
continue
|
788
|
-
arg.append((filepath[self.lenbase:], [s.st_mode, s.st_size, s.st_mtime, s.st_uid, s.st_gid]))
|
789
|
-
except:
|
790
|
-
pass
|
791
|
-
|
792
|
-
treepath_abs = os.path.abspath(treepath)
|
793
|
-
filelist = []
|
794
|
-
os.path.walk(treepath_abs, ListBuilder(treepath_abs).callback, filelist)
|
795
|
-
return filelist
|
796
|
-
|
797
|
-
def build_filelist_from_file(treepath, file):
|
798
|
-
filelist = []
|
799
|
-
for line in file.readlines():
|
800
|
-
filepath_rel = line.rstrip('\n')
|
801
|
-
s = os.stat(join(treepath, filepath_rel))
|
802
|
-
if not is_mode_ok(s.st_mode):
|
803
|
-
continue
|
804
|
-
filelist.append((filepath_rel, [s.st_mode, s.st_size, s.st_mtime, s.st_uid, s.st_gid]))
|
805
|
-
return filelist
|
806
|
-
|
807
|
-
def build_filelist(treepath):
|
808
|
-
verbose_log('building filelist...')
|
809
|
-
for n in xrange(len(sys.argv)):
|
810
|
-
if sys.argv[n] == '--filelist':
|
811
|
-
if sys.argv[n + 1] == '-':
|
812
|
-
return build_filelist_from_file(treepath, sys.stdin)
|
813
|
-
else:
|
814
|
-
file = open(sys.argv[n + 1])
|
815
|
-
filelist = build_filelist_from_file(treepath, file)
|
816
|
-
file.close()
|
817
|
-
return filelist
|
818
|
-
return build_filelist_from_tree(treepath)
|
819
|
-
|
820
|
-
def build_uidgidmap(filelist):
|
821
|
-
"""Build a map of uid's to names and gid's to names
|
822
|
-
so mapping can occur at the destination
|
823
|
-
"""
|
824
|
-
import pwd
|
825
|
-
import grp
|
826
|
-
uidname_map = {}
|
827
|
-
gidname_map = {}
|
828
|
-
for filepath_rel, s in filelist:
|
829
|
-
if not uidname_map.has_key(s[UID]):
|
830
|
-
try:
|
831
|
-
uidname_map[s[UID]] = pwd.getpwuid(s[UID])[0]
|
832
|
-
except:
|
833
|
-
uidname_map[s[UID]] = str(s[UID])
|
834
|
-
if not gidname_map.has_key(s[GID]):
|
835
|
-
try:
|
836
|
-
gidname_map[s[GID]] = grp.getgrgid(s[GID])[0]
|
837
|
-
except:
|
838
|
-
gidname_map[s[GID]] = str(s[GID])
|
839
|
-
return uidname_map, gidname_map
|
840
|
-
|
841
|
-
def map_uidgid(filelist, idname_map):
|
842
|
-
"""Fix up uid / gid to dest values
|
843
|
-
"""
|
844
|
-
|
845
|
-
# If root and --numeric-ids specified, keep the numeric
|
846
|
-
# ids
|
847
|
-
|
848
|
-
if os.getuid() == 0 and have_option('--numeric-ids'):
|
849
|
-
return
|
850
|
-
|
851
|
-
# First build a uid->uid map. If not root, valid
|
852
|
-
# uid mapping is only current user. If root, attempt
|
853
|
-
# to map uid, if that fails keep the uid.
|
854
|
-
|
855
|
-
import pwd
|
856
|
-
import grp
|
857
|
-
|
858
|
-
uid_user = os.getuid()
|
859
|
-
uidname_map = idname_map[0]
|
860
|
-
uiduid_map = {}
|
861
|
-
for uid_source in uidname_map.keys():
|
862
|
-
if uid_user == 0:
|
863
|
-
try:
|
864
|
-
uid_dest = pwd.getpwnam(uidname_map[uid_source])[2]
|
865
|
-
uiduid_map[uid_source] = uid_dest
|
866
|
-
except:
|
867
|
-
uiduid_map[uid_source] = uid_source
|
868
|
-
else:
|
869
|
-
uiduid_map[uid_source] = uid_user
|
870
|
-
|
871
|
-
# Build gid->gid map. If not root, valid gid mapping is any group
|
872
|
-
# this user is a part of. First build a list of valid name->gids
|
873
|
-
# mappings
|
874
|
-
|
875
|
-
gid_user = os.getgid()
|
876
|
-
gid_name = grp.getgrgid(gid_user)[0]
|
877
|
-
namegid_map = {}
|
878
|
-
for group in grp.getgrall():
|
879
|
-
if uid_user == 0 or gid_name in group[3]:
|
880
|
-
namegid_map[group[0]] = group[2]
|
881
|
-
|
882
|
-
# Now build a gid map to valid gids for this user
|
883
|
-
|
884
|
-
gidname_map = idname_map[1]
|
885
|
-
gidgid_map = {}
|
886
|
-
for gid_source in gidname_map.keys():
|
887
|
-
gid_sourcename = gidname_map[gid_source]
|
888
|
-
if namegid_map.has_key(gid_sourcename):
|
889
|
-
gidgid_map[gid_source] = namegid_map[gid_sourcename]
|
890
|
-
else:
|
891
|
-
gidgid_map[gid_source] = gid_user
|
892
|
-
|
893
|
-
# Now map filelist entries
|
894
|
-
|
895
|
-
for filepath_rel, s in filelist:
|
896
|
-
# Continue if nothing to do. Unlikely in the mapping case
|
897
|
-
|
898
|
-
if uiduid_map[s[UID]] == s[UID] and gidgid_map[s[GID]] == s[GID]:
|
899
|
-
continue
|
900
|
-
|
901
|
-
# Map entries
|
902
|
-
|
903
|
-
s[UID] = uiduid_map[s[UID]]
|
904
|
-
s[GID] = gidgid_map[s[GID]]
|
905
|
-
|
906
|
-
def serve_files(treepath, filelist):
|
907
|
-
"""Serve requested files.
|
908
|
-
"""
|
909
|
-
global fd_recv
|
910
|
-
|
911
|
-
while True:
|
912
|
-
# Which file?
|
913
|
-
|
914
|
-
n = recv_object()
|
915
|
-
if n == -1:
|
916
|
-
break
|
917
|
-
|
918
|
-
# Calc hash and return it
|
919
|
-
|
920
|
-
verbose_log('src: calc hash for %s' % filelist[n][0])
|
921
|
-
filepath_rel, s = filelist[n]
|
922
|
-
filepath_abs = join(treepath, filepath_rel)
|
923
|
-
try:
|
924
|
-
f = open(filepath_abs)
|
925
|
-
m = md5.new()
|
926
|
-
while True:
|
927
|
-
bytes = f.read(1024 * 1024)
|
928
|
-
if len(bytes) == 0:
|
929
|
-
break
|
930
|
-
m.update(bytes)
|
931
|
-
f.close()
|
932
|
-
send_object(m.hexdigest())
|
933
|
-
except:
|
934
|
-
verbose_log('src: error calcing hash for %s' % filelist[n][0])
|
935
|
-
send_object(None)
|
936
|
-
|
937
|
-
# False means don't need the file
|
938
|
-
|
939
|
-
if not recv_object():
|
940
|
-
verbose_log('src: skipping file %s' % filelist[n][0])
|
941
|
-
continue
|
942
|
-
|
943
|
-
# Send size with data chunks in case the file is changing
|
944
|
-
# while this occurs
|
945
|
-
|
946
|
-
verbose_log('src: sending file %s' % filelist[n][0])
|
947
|
-
try:
|
948
|
-
f = open(filepath_abs)
|
949
|
-
while True:
|
950
|
-
bytes = f.read(1024 * 1024)
|
951
|
-
fd_send.write(struct.pack('!i', len(bytes)))
|
952
|
-
if len(bytes) == 0:
|
953
|
-
break
|
954
|
-
fd_send.write(bytes)
|
955
|
-
fd_send.flush()
|
956
|
-
f.close()
|
957
|
-
except:
|
958
|
-
verbose_log('src: error sending file %s' % filelist[n][0])
|
959
|
-
fd_send.write(struct.pack('!i', -1))
|
960
|
-
|
961
|
-
verbose_log('src: send complete %s' % filelist[n][0])
|
962
|
-
|
963
|
-
def serve_hashes(treepath, filelist):
|
964
|
-
"""Serve requested hashes
|
965
|
-
"""
|
966
|
-
hashrequests = recv_object()
|
967
|
-
hashlist = []
|
968
|
-
for n in xrange(len(hashrequests)):
|
969
|
-
filepath_rel, s = filelist[hashrequests[n]]
|
970
|
-
filepath_abs = join(treepath, filepath_rel)
|
971
|
-
f = open(filepath_abs)
|
972
|
-
m = md5.new()
|
973
|
-
while True:
|
974
|
-
bytes = f.read(1024 * 1024)
|
975
|
-
if len(bytes) == 0:
|
976
|
-
break
|
977
|
-
m.update(bytes)
|
978
|
-
f.close()
|
979
|
-
hashlist.append(m.hexdigest())
|
980
|
-
send_object(hashlist)
|
981
|
-
|
982
|
-
def is_stat_equal(filepath_abs, s):
|
983
|
-
try:
|
984
|
-
s2 = os.stat(filepath_abs)
|
985
|
-
if (s[MODE] & CHMOD_BITS) == (s2.st_mode & CHMOD_BITS) and s[SIZE] == s2.st_size and s[MTIME] == s2.st_mtime and s[UID] == s2.st_uid and s[GID] == s2.st_gid:
|
986
|
-
return True
|
987
|
-
except:
|
988
|
-
pass
|
989
|
-
return False
|
990
|
-
|
991
|
-
def is_tree_equal(filelist, treepath_last):
|
992
|
-
verbose_log('checking for need to build tree...')
|
993
|
-
if not treepath_last:
|
994
|
-
verbose_log('tree not equal: no last tree!')
|
995
|
-
return False
|
996
|
-
filelist_old = build_filelist_from_tree(treepath_last)
|
997
|
-
if len(filelist) != len(filelist_old):
|
998
|
-
verbose_log('tree not equal: filelists different sizes!')
|
999
|
-
return False
|
1000
|
-
dict_new = dict(filelist)
|
1001
|
-
dict_old = dict(filelist_old)
|
1002
|
-
for key in dict_new.keys():
|
1003
|
-
different = False
|
1004
|
-
if not dict_old.has_key(key):
|
1005
|
-
different = True
|
1006
|
-
else:
|
1007
|
-
s_old = dict_old[key]
|
1008
|
-
s_new = dict_new[key]
|
1009
|
-
different = False
|
1010
|
-
if stat.S_ISDIR(s_old[MODE]):
|
1011
|
-
if s_old[MODE] != s_new[MODE] or s_old[MTIME] != s_new[MTIME] or s_old[UID] != s_new[UID] or s_old[GID] != s_new[GID]:
|
1012
|
-
different = True
|
1013
|
-
else:
|
1014
|
-
if s_old != s_new:
|
1015
|
-
different = True
|
1016
|
-
if different:
|
1017
|
-
verbose_log('tree not equal: stats different %s' % key)
|
1018
|
-
if dict_old.has_key(key):
|
1019
|
-
verbose_log('old %s' % str(dict_old[key]))
|
1020
|
-
verbose_log('new %s' % str(dict_new[key]))
|
1021
|
-
return False
|
1022
|
-
verbose_log('no need to build tree - it would be identical to the last tree');
|
1023
|
-
return True
|
1024
|
-
|
1025
|
-
def execute(src, dst, is_source):
|
1026
|
-
if is_source:
|
1027
|
-
# Sending side
|
1028
|
-
# Create filelist, calc name map, send both
|
1029
|
-
|
1030
|
-
srcpath = os.path.abspath(os.path.expanduser(src['path']))
|
1031
|
-
filelist = build_filelist(srcpath)
|
1032
|
-
send_object(filelist)
|
1033
|
-
idname_map = build_uidgidmap(filelist)
|
1034
|
-
send_object(idname_map)
|
1035
|
-
|
1036
|
-
# Which command
|
1037
|
-
|
1038
|
-
if have_option('--showfiles'):
|
1039
|
-
serve_hashes(srcpath, filelist)
|
1040
|
-
else:
|
1041
|
-
serve_files(srcpath, filelist)
|
1042
|
-
|
1043
|
-
results = recv_object()
|
1044
|
-
subdir = recv_object()
|
1045
|
-
else:
|
1046
|
-
# Receiving side
|
1047
|
-
# Recv filelist and name mapping, perform uid/gid mapping
|
1048
|
-
filelist = recv_object()
|
1049
|
-
idname_map = recv_object()
|
1050
|
-
map_uidgid(filelist, idname_map)
|
1051
|
-
manager = Manager(os.path.expanduser(dst['path']))
|
1052
|
-
catalog = manager.catalog
|
1053
|
-
backups = manager.get_backups()
|
1054
|
-
treepath_last = None
|
1055
|
-
backup_last = None
|
1056
|
-
if len(backups) != 0:
|
1057
|
-
backup_last = backups[-1]
|
1058
|
-
treepath_last = backup_last.get_treepath()
|
1059
|
-
|
1060
|
-
# If --lock specified, only one receiver at a time.
|
1061
|
-
# This temp file will get deleted before the script ends,
|
1062
|
-
# unless the power cord is pulled. On Linux and Macs, /tmp
|
1063
|
-
# gets cleared at boot, so backup will be unlocked. On
|
1064
|
-
# Windows, there isn't an equivalent. Also note flock
|
1065
|
-
# doesn't work in some filesystems such as nfs.
|
1066
|
-
# For these reasons, locking is optional.
|
1067
|
-
|
1068
|
-
if have_option('--lock'):
|
1069
|
-
lock_file = LockFile('lockfile.lb')
|
1070
|
-
if not lock_file.lock():
|
1071
|
-
results = 'Attempt to lock failed.'
|
1072
|
-
send_object(-1)
|
1073
|
-
send_object(results)
|
1074
|
-
send_object(None)
|
1075
|
-
return results, None
|
1076
|
-
|
1077
|
-
# Command?
|
1078
|
-
|
1079
|
-
if have_option('--showfiles'):
|
1080
|
-
showfiles = catalog.get_showfiles(filelist, treepath_last)
|
1081
|
-
results = '\n'.join([filelist[n][0] for n in showfiles])
|
1082
|
-
subdir = None
|
1083
|
-
else:
|
1084
|
-
# Calc when the server should stop; used for --minutes control
|
1085
|
-
|
1086
|
-
end_time = 0
|
1087
|
-
for n in xrange(len(sys.argv)):
|
1088
|
-
if sys.argv[n] == '--minutes':
|
1089
|
-
end_time = int(time.time()) + int(sys.argv[n + 1]) * 60
|
1090
|
-
break
|
1091
|
-
|
1092
|
-
# Update catalog
|
1093
|
-
|
1094
|
-
complete, transferred, md5hashes = catalog.update(filelist, treepath_last, end_time)
|
1095
|
-
if complete:
|
1096
|
-
results = 'catalog update complete, %d bytes transferred.' % transferred
|
1097
|
-
else:
|
1098
|
-
results = 'catalog update not complete. %d bytes transferred.' % transferred
|
1099
|
-
|
1100
|
-
# Count stats
|
1101
|
-
|
1102
|
-
verbose_log('catalog stats:')
|
1103
|
-
new = 0
|
1104
|
-
copy = 0
|
1105
|
-
for entry in catalog.parse_log(catalog.get_logfiles()[-1][1]):
|
1106
|
-
if entry[0] == 'copy':
|
1107
|
-
copy += 1
|
1108
|
-
elif entry[0] == 'new':
|
1109
|
-
new += 1
|
1110
|
-
results += '\ncatalog: %d new %d copied.' % (new, copy)
|
1111
|
-
|
1112
|
-
# Create structure if complete
|
1113
|
-
# Don't create if --catalogonly specified
|
1114
|
-
# Don't create if new tree would be identical to old tree
|
1115
|
-
|
1116
|
-
subdir = None
|
1117
|
-
if complete and not have_option('--catalogonly') and not is_tree_equal(filelist, treepath_last):
|
1118
|
-
backup_new = manager.new_backup()
|
1119
|
-
backup_new.build_tree(backup_last, filelist, md5hashes, catalog)
|
1120
|
-
subdir = backup_new.get_treepath()
|
1121
|
-
results += '\ntree created: %s' % subdir
|
1122
|
-
|
1123
|
-
# 'latest' link
|
1124
|
-
latest_link = join(manager.get_path(), 'latest')
|
1125
|
-
if os.path.exists(latest_link):
|
1126
|
-
os.remove(latest_link)
|
1127
|
-
os.symlink(backup_new.get_dirname(), join(manager.get_path(), 'latest'))
|
1128
|
-
|
1129
|
-
# tree stats
|
1130
|
-
|
1131
|
-
new = 0
|
1132
|
-
copy = 0
|
1133
|
-
link = 0
|
1134
|
-
for entry in backup_new.parse_log():
|
1135
|
-
if entry[0] == 'copy':
|
1136
|
-
copy += 1
|
1137
|
-
elif entry[0] == 'new':
|
1138
|
-
new += 1
|
1139
|
-
elif entry[0] == 'link':
|
1140
|
-
link += 1
|
1141
|
-
results += '\ntree: %d new %d copied %d linked.' % (new, copy, link)
|
1142
|
-
else:
|
1143
|
-
results += '\ntree not created.'
|
1144
|
-
|
1145
|
-
# Send results
|
1146
|
-
|
1147
|
-
send_object(results)
|
1148
|
-
send_object(subdir)
|
1149
|
-
|
1150
|
-
return results, subdir
|
1151
|
-
|
1152
|
-
def parse_address(string):
|
1153
|
-
"""Parse these formats:
|
1154
|
-
dir
|
1155
|
-
user@host:dir
|
1156
|
-
|
1157
|
-
Return dictionary:
|
1158
|
-
remote : user@host or empty
|
1159
|
-
path : path portion
|
1160
|
-
string : whole string
|
1161
|
-
"""
|
1162
|
-
|
1163
|
-
addr = {}
|
1164
|
-
addr['string'] = string
|
1165
|
-
if string.find(':') != -1:
|
1166
|
-
addr['remote'], addr['path'] = string.split(':')
|
1167
|
-
else:
|
1168
|
-
addr['remote'] = ''
|
1169
|
-
addr['path'] = string
|
1170
|
-
|
1171
|
-
# Check to see if we are in quotes
|
1172
|
-
# Unicode might be an issue here..
|
1173
|
-
addr['path'] = shlex.split(addr['path'])[0]
|
1174
|
-
|
1175
|
-
return addr
|
1176
|
-
|
1177
|
-
def have_option(option):
|
1178
|
-
for s in sys.argv:
|
1179
|
-
if s == option:
|
1180
|
-
return True
|
1181
|
-
return False
|
1182
|
-
|
1183
|
-
def get_option_value(option):
|
1184
|
-
for n in xrange(len(sys.argv)):
|
1185
|
-
if sys.argv[n] == option:
|
1186
|
-
return sys.argv[n + 1]
|
1187
|
-
return None
|
1188
|
-
|
1189
|
-
def error(string):
|
1190
|
-
sys.stderr.write("*** " + string + "\n")
|
1191
|
-
sys.exit(1)
|
1192
|
-
|
1193
|
-
class LockFile:
|
1194
|
-
def __init__(self, file_name):
|
1195
|
-
# /tmp gets cleared at system boot on *nix systems,
|
1196
|
-
# so the file will get cleared if the system reboots.
|
1197
|
-
# On Windows all bets are off.
|
1198
|
-
self.file_name = join(tempfile.gettempdir(), file_name)
|
1199
|
-
self.file = None
|
1200
|
-
|
1201
|
-
def lock(self):
|
1202
|
-
# Fail if locked twice. No need to reference count
|
1203
|
-
if self.file:
|
1204
|
-
return False
|
1205
|
-
|
1206
|
-
# Attempt an exclusive, non-blocking lock
|
1207
|
-
# Doesn't work on NFS
|
1208
|
-
self.file = file(self.file_name, 'w+')
|
1209
|
-
try:
|
1210
|
-
fcntl.flock(self.file, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
1211
|
-
except IOError, e:
|
1212
|
-
self.file.close()
|
1213
|
-
self.file = None
|
1214
|
-
return False
|
1215
|
-
return True
|
1216
|
-
|
1217
|
-
def unlock(self):
|
1218
|
-
if self.file:
|
1219
|
-
self.file.close()
|
1220
|
-
self.file = None
|
1221
|
-
os.unlink(self.file_name)
|
1222
|
-
|
1223
|
-
def __del__(self):
|
1224
|
-
# Gets called if script is control-c'd
|
1225
|
-
self.unlock()
|
1226
|
-
|
1227
|
-
# Main code
|
1228
|
-
|
1229
|
-
if __name__ == '__main__':
|
1230
|
-
# Print help
|
1231
|
-
|
1232
|
-
if len(sys.argv) == 1:
|
1233
|
-
print __doc__
|
1234
|
-
sys.exit(1)
|
1235
|
-
|
1236
|
-
if len(sys.argv) < 3:
|
1237
|
-
error('Too few parameters.')
|
1238
|
-
|
1239
|
-
# Parse addresses
|
1240
|
-
|
1241
|
-
src = parse_address(sys.argv[-2:-1][0])
|
1242
|
-
dst = parse_address(sys.argv[-1:][0])
|
1243
|
-
|
1244
|
-
if have_option('--ssh-i') or have_option('--ssh-C') or have_option('--ssh-p'):
|
1245
|
-
error("--ssh-x style options have been deprecated in favor of -e (rsync style). Please change your command.")
|
1246
|
-
|
1247
|
-
# Is this the server?
|
1248
|
-
|
1249
|
-
if have_option('--server'):
|
1250
|
-
init_io(sys.stdout, sys.stdin)
|
1251
|
-
execute(src, dst, have_option('--source'))
|
1252
|
-
sys.exit(0)
|
1253
|
-
|
1254
|
-
# Client starting. Only one remote allowed.
|
1255
|
-
|
1256
|
-
if src['remote'] and dst['remote']:
|
1257
|
-
error('Source and Dest cannot both be remote.')
|
1258
|
-
|
1259
|
-
# The source generates the file list, the dest asks for new files
|
1260
|
-
# The server can talk through stderr to the console
|
1261
|
-
|
1262
|
-
if not src['remote']:
|
1263
|
-
# Client is source, server is dest
|
1264
|
-
|
1265
|
-
start_server(src, dst, False)
|
1266
|
-
results, subdir = execute(src, dst, True)
|
1267
|
-
|
1268
|
-
else:
|
1269
|
-
# Server is source, client is dest
|
1270
|
-
|
1271
|
-
start_server(src, dst, True)
|
1272
|
-
results, subdir = execute(src, dst, False)
|
1273
|
-
|
1274
|
-
# Print results
|
1275
|
-
|
1276
|
-
print results
|
1277
|
-
|
1278
|
-
# Verification
|
1279
|
-
|
1280
|
-
if subdir != None:
|
1281
|
-
srcpath = '%s/' % os.path.normpath(src['path'])
|
1282
|
-
if (src['remote']):
|
1283
|
-
srcpath = src['remote'] + ':' + repr(srcpath)
|
1284
|
-
dstpath = os.path.normpath(join(dst['path'], subdir))
|
1285
|
-
if (dst['remote']):
|
1286
|
-
dstpath = dst['remote'] + ':' + repr(dstpath)
|
1287
|
-
if os.getuid() == 0 and have_option('--numeric-ids'):
|
1288
|
-
rsync_cmd = 'rsync -av --numeric-ids --dry-run %s %s' % (dump_arg(srcpath), dump_arg(dstpath))
|
1289
|
-
else:
|
1290
|
-
rsync_cmd = 'rsync -av --dry-run %s %s' % (dump_arg(srcpath), dump_arg(dstpath))
|
1291
|
-
|
1292
|
-
if have_option('--verify'):
|
1293
|
-
print rsync_cmd
|
1294
|
-
sys.stdout.flush()
|
1295
|
-
os.system(rsync_cmd)
|
1296
|
-
else:
|
1297
|
-
print 'to cross-verify:'
|
1298
|
-
print rsync_cmd
|
1299
|
-
|
1300
|
-
# Close server
|
1301
|
-
|
1302
|
-
fd_send.close()
|
1303
|
-
fd_recv.close()
|
1304
|
-
sys.exit(0)
|