dbox 0.4.4 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -9,7 +9,7 @@ module Dbox
9
9
  DB_FILE = ".dropbox.db"
10
10
  DB_TMPFILE = ".dropbox.db.tmp"
11
11
 
12
- attr_accessor :local_path
12
+ attr_accessor :local_path, :remote_path, :root
13
13
 
14
14
  def self.create(remote_path, local_path)
15
15
  api.create_dir(remote_path)
@@ -60,6 +60,10 @@ module Dbox
60
60
  end
61
61
  end
62
62
 
63
+ def self.destroy!(local_path)
64
+ FileUtils.rm(db_file(local_path)) if exists?(local_path)
65
+ end
66
+
63
67
  # IMPORTANT: DropboxDb.new is private. Please use DropboxDb.create, DropboxDb.clone, or DropboxDb.load as the entry point.
64
68
  private_class_method :new
65
69
  def initialize(local_path, res)
@@ -233,11 +237,21 @@ module Dbox
233
237
  def update_remote; raise RuntimeError, "Not implemented"; end
234
238
 
235
239
  def modified?(res)
236
- out = !(@revision == res["revision"] && @modified_at == parse_time(res["modified"]))
237
- log.debug "#{path}.modified? r#{@revision} =? r#{res["revision"]}, #{@modified_at} =? #{parse_time(res["modified"])} => #{out}"
240
+ out = !(@revision == res["revision"] && time_to_s(@modified_at) == time_to_s(res["modified"]))
241
+ log.debug "#{path} modified? => #{out}"
238
242
  out
239
243
  end
240
244
 
245
+ def time_to_s(t)
246
+ case t
247
+ when Time
248
+ # matches dropbox time format
249
+ t.utc.strftime("%a, %d %b %Y %H:%M:%S +0000")
250
+ when String
251
+ t
252
+ end
253
+ end
254
+
241
255
  def parse_time(t)
242
256
  case t
243
257
  when Time
@@ -0,0 +1,68 @@
1
+ require "thread"
2
+
3
+ #
4
+ # Usage:
5
+ #
6
+ # puts "Creating task queue with 5 concurrent workers"
7
+ # tasks = ParallelTasks.new(5) { puts "Worker thread starting up" }
8
+ #
9
+ # puts "Starting workers"
10
+ # tasks.start
11
+ #
12
+ # puts "Making some work"
13
+ # 20.times do
14
+ # tasks.add do
15
+ # x = rand(5)
16
+ # puts "Sleeping for #{x}s"
17
+ # sleep x
18
+ # end
19
+ # end
20
+ #
21
+ # puts "Waiting for workers to finish"
22
+ # tasks.finish
23
+ #
24
+ # puts "Done"
25
+ #
26
+ class ParallelTasks
27
+ def initialize(num_workers, &initialization_proc)
28
+ @num_workers = num_workers
29
+ @initialization_proc = initialization_proc
30
+ @workers = []
31
+ @work_queue = Queue.new
32
+ @semaphore = Mutex.new
33
+ @done_making_tasks = false
34
+ end
35
+
36
+ def start
37
+ @num_workers.times do
38
+ @workers << Thread.new do
39
+ @initialization_proc.call if @initialization_proc
40
+ done = false
41
+ while !done
42
+ task = nil
43
+ @semaphore.synchronize do
44
+ unless @work_queue.empty?
45
+ task = @work_queue.pop()
46
+ else
47
+ if @done_making_tasks
48
+ done = true
49
+ else
50
+ sleep 0.1
51
+ end
52
+ end
53
+ end
54
+ task.call if task
55
+ end
56
+ end
57
+ end
58
+ end
59
+
60
+ def add(&proc)
61
+ @work_queue << proc
62
+ end
63
+
64
+ def finish
65
+ @done_making_tasks = true
66
+ @workers.each {|t| t.join }
67
+ end
68
+ end
@@ -0,0 +1,595 @@
1
+ module Dbox
2
+ class Syncer
3
+ MAX_PARALLEL_DBOX_OPS = 5
4
+
5
+ include Loggable
6
+
7
+ def self.create(remote_path, local_path)
8
+ api.create_dir(remote_path)
9
+ clone(remote_path, local_path)
10
+ end
11
+
12
+ def self.clone(remote_path, local_path)
13
+ api.metadata(remote_path) # ensure remote exists
14
+ database = Database.create(remote_path, local_path)
15
+ Pull.new(database, api).execute
16
+ end
17
+
18
+ def self.pull(local_path)
19
+ database = Database.load(local_path)
20
+ Pull.new(database, api).execute
21
+ end
22
+
23
+ def self.push(local_path)
24
+ database = Database.load(local_path)
25
+ Push.new(database, api).execute
26
+ end
27
+
28
+ def self.move(new_remote_path, local_path)
29
+ database = Database.load(local_path)
30
+ api.move(database.metadata[:remote_path], new_remote_path)
31
+ database.update_metadata(:remote_path => new_remote_path)
32
+ end
33
+
34
+ def self.api
35
+ @@_api ||= API.connect
36
+ end
37
+
38
+ class Operation
39
+ include Loggable
40
+
41
+ attr_reader :database
42
+
43
+ def initialize(database, api)
44
+ @database = database
45
+ @api = api
46
+ end
47
+
48
+ def api
49
+ Thread.current[:api] || @api
50
+ end
51
+
52
+ def clone_api_into_current_thread
53
+ Thread.current[:api] = api.clone()
54
+ end
55
+
56
+ def metadata
57
+ @_metadata ||= database.metadata
58
+ end
59
+
60
+ def local_path
61
+ metadata[:local_path]
62
+ end
63
+
64
+ def remote_path
65
+ metadata[:remote_path]
66
+ end
67
+
68
+ def local_to_relative_path(path)
69
+ if path.include?(local_path)
70
+ path.sub(local_path, "").sub(/^\//, "")
71
+ else
72
+ raise BadPath, "Not a local path: #{path}"
73
+ end
74
+ end
75
+
76
+ def remote_to_relative_path(path)
77
+ if path.include?(remote_path)
78
+ path.sub(remote_path, "").sub(/^\//, "")
79
+ else
80
+ raise BadPath, "Not a remote path: #{path}"
81
+ end
82
+ end
83
+
84
+ def relative_to_local_path(path)
85
+ if path && path.length > 0
86
+ File.join(local_path, path)
87
+ else
88
+ local_path
89
+ end
90
+ end
91
+
92
+ def relative_to_remote_path(path)
93
+ if path && path.length > 0
94
+ File.join(remote_path, path)
95
+ else
96
+ remote_path
97
+ end
98
+ end
99
+
100
+ def remove_dotfiles(contents)
101
+ contents.reject {|c| File.basename(c[:path]).start_with?(".") }
102
+ end
103
+
104
+ def current_dir_entries_as_hash(dir)
105
+ if dir[:id]
106
+ out = {}
107
+ database.contents(dir[:id]).each {|e| out[e[:path]] = e }
108
+ out
109
+ else
110
+ {}
111
+ end
112
+ end
113
+
114
+ def lookup_id_by_path(path)
115
+ @_ids ||= {}
116
+ @_ids[path] ||= database.find_by_path(path)[:id]
117
+ end
118
+
119
+ def time_to_s(t)
120
+ case t
121
+ when Time
122
+ # matches dropbox time format
123
+ t.utc.strftime("%a, %d %b %Y %H:%M:%S +0000")
124
+ when String
125
+ t
126
+ end
127
+ end
128
+
129
+ def parse_time(t)
130
+ case t
131
+ when Time
132
+ t
133
+ when String
134
+ Time.parse(t)
135
+ end
136
+ end
137
+
138
+ def saving_timestamp(path)
139
+ mtime = File.mtime(path)
140
+ yield
141
+ File.utime(Time.now, mtime, path)
142
+ end
143
+
144
+ def saving_parent_timestamp(entry, &proc)
145
+ local_path = relative_to_local_path(entry[:path])
146
+ parent = File.dirname(local_path)
147
+ saving_timestamp(parent, &proc)
148
+ end
149
+
150
+ def update_file_timestamp(entry)
151
+ File.utime(Time.now, entry[:modified], relative_to_local_path(entry[:path]))
152
+ end
153
+
154
+ def gather_remote_info(entry)
155
+ res = api.metadata(relative_to_remote_path(entry[:path]), entry[:hash])
156
+ case res
157
+ when Hash
158
+ out = process_basic_remote_props(res)
159
+ out[:id] = entry[:id] if entry[:id]
160
+ if res[:contents]
161
+ out[:contents] = remove_dotfiles(res[:contents]).map do |c|
162
+ o = process_basic_remote_props(c)
163
+ o[:parent_id] = entry[:id] if entry[:id]
164
+ o[:parent_path] = entry[:path]
165
+ o
166
+ end
167
+ end
168
+ out
169
+ when :not_modified
170
+ :not_modified
171
+ else
172
+ raise(RuntimeError, "Invalid result from server: #{res.inspect}")
173
+ end
174
+ end
175
+
176
+ def process_basic_remote_props(res)
177
+ out = {}
178
+ out[:path] = remote_to_relative_path(res[:path])
179
+ out[:modified] = parse_time(res[:modified])
180
+ out[:is_dir] = res[:is_dir]
181
+ out[:hash] = res[:hash] if res[:hash]
182
+ out[:revision] = res[:revision] if res[:revision]
183
+ out
184
+ end
185
+
186
+ def generate_tmpfilename(path)
187
+ out = File.join(local_path, ".#{path.gsub(/\W/, '-')}.part")
188
+ if File.exists?(out)
189
+ generate_tmpfilename("path#{rand(1000)}")
190
+ else
191
+ out
192
+ end
193
+ end
194
+
195
+ def remove_tmpfiles
196
+ Dir["#{local_path}/.*.part"].each {|f| FileUtils.rm(f) }
197
+ end
198
+ end
199
+
200
+ class Pull < Operation
201
+ def initialize(database, api)
202
+ super(database, api)
203
+ end
204
+
205
+ def practice
206
+ dir = database.root_dir
207
+ changes = calculate_changes(dir)
208
+ log.debug "Changes that would be executed:\n" + changes.map {|c| c.inspect }.join("\n")
209
+ end
210
+
211
+ def execute
212
+ remove_tmpfiles
213
+ dir = database.root_dir
214
+ changes = calculate_changes(dir)
215
+ log.debug "Executing changes:\n" + changes.map {|c| c.inspect }.join("\n")
216
+ changelist = { :created => [], :deleted => [], :updated => [] }
217
+
218
+ # spin up a parallel task queue
219
+ ptasks = ParallelTasks.new(MAX_PARALLEL_DBOX_OPS - 1) { clone_api_into_current_thread() }
220
+ ptasks.start
221
+
222
+ changes.each do |op, c|
223
+ case op
224
+ when :create
225
+ c[:parent_id] ||= lookup_id_by_path(c[:parent_path])
226
+ if c[:is_dir]
227
+ # directory creation cannot go in a thread, since later
228
+ # operations might depend on the directory being there
229
+ create_dir(c)
230
+ database.add_entry(c[:path], true, c[:parent_id], c[:modified], c[:revision], c[:hash])
231
+ changelist[:created] << c[:path]
232
+ else
233
+ ptasks.add do
234
+ begin
235
+ create_file(c)
236
+ database.add_entry(c[:path], false, c[:parent_id], c[:modified], c[:revision], c[:hash])
237
+ changelist[:created] << c[:path]
238
+ rescue Dbox::ServerError => e
239
+ log.error "Error while downloading #{c[:path]}: #{e.inspect}"
240
+ end
241
+ end
242
+ end
243
+ when :update
244
+ if c[:is_dir]
245
+ update_dir(c)
246
+ database.update_entry_by_path(c[:path], :modified => c[:modified], :revision => c[:revision], :hash => c[:hash])
247
+ changelist[:updated] << c[:path]
248
+ else
249
+ ptasks.add do
250
+ begin
251
+ update_file(c)
252
+ database.update_entry_by_path(c[:path], :modified => c[:modified], :revision => c[:revision], :hash => c[:hash])
253
+ changelist[:updated] << c[:path]
254
+ rescue Dbox::ServerError => e
255
+ log.error "Error while downloading #{c[:path]}: #{e.inspect}"
256
+ end
257
+ end
258
+ end
259
+ when :delete
260
+ c[:is_dir] ? delete_dir(c) : delete_file(c)
261
+ database.delete_entry_by_path(c[:path])
262
+ changelist[:deleted] << c[:path]
263
+ else
264
+ raise(RuntimeError, "Unknown operation type: #{op}")
265
+ end
266
+ end
267
+
268
+ # wait for operations to finish
269
+ ptasks.finish
270
+
271
+ # sort & return output
272
+ changelist.keys.each {|k| changelist[k].sort! }
273
+ changelist
274
+ end
275
+
276
+ def calculate_changes(dir, operation = :update)
277
+ raise(ArgumentError, "Not a directory: #{dir.inspect}") unless dir[:is_dir]
278
+
279
+ out = []
280
+ recur_dirs = []
281
+
282
+ # grab the metadata for the current dir (either off the filesystem or from Dropbox)
283
+ res = gather_remote_info(dir)
284
+ if res == :not_modified
285
+ # directory itself was not modified, but we still need to
286
+ # recur on subdirectories
287
+ recur_dirs += database.subdirs(dir[:id]).map {|d| [:update, d] }
288
+ else
289
+ raise(ArgumentError, "Not a directory: #{res.inspect}") unless res[:is_dir]
290
+
291
+ # dir may have changed -- calculate changes on contents
292
+ contents = res.delete(:contents)
293
+ if operation == :create || modified?(dir, res)
294
+ res[:parent_id] = dir[:parent_id] if dir[:parent_id]
295
+ res[:parent_path] = dir[:parent_path] if dir[:parent_path]
296
+ out << [operation, res]
297
+ end
298
+ found_paths = []
299
+ existing_entries = current_dir_entries_as_hash(dir)
300
+
301
+ # process each entry that came back from dropbox/filesystem
302
+ contents.each do |c|
303
+ found_paths << c[:path]
304
+ if entry = existing_entries[c[:path]]
305
+ c[:id] = entry[:id]
306
+ c[:modified] = parse_time(c[:modified])
307
+ if c[:is_dir]
308
+ # queue dir for later
309
+ c[:hash] = entry[:hash]
310
+ recur_dirs << [:update, c]
311
+ else
312
+ # update iff modified
313
+ out << [:update, c] if modified?(entry, c)
314
+ end
315
+ else
316
+ # create
317
+ c[:modified] = parse_time(c[:modified])
318
+ if c[:is_dir]
319
+ # queue dir for later
320
+ recur_dirs << [:create, c]
321
+ else
322
+ out << [:create, c]
323
+ end
324
+ end
325
+ end
326
+
327
+ # add any deletions
328
+ out += (existing_entries.keys.sort - found_paths.sort).map do |p|
329
+ [:delete, existing_entries[p]]
330
+ end
331
+ end
332
+
333
+ # recursively process new & existing subdirectories in parallel
334
+ threads = recur_dirs.map do |operation, dir|
335
+ Thread.new do
336
+ clone_api_into_current_thread()
337
+ Thread.current[:out] = calculate_changes(dir, operation)
338
+ end
339
+ end
340
+ threads.each {|t| t.join; out += t[:out] }
341
+
342
+ out
343
+ end
344
+
345
+ def modified?(entry, res)
346
+ out = (entry[:revision] != res[:revision]) ||
347
+ (time_to_s(entry[:modified]) != time_to_s(res[:modified]))
348
+ out ||= (entry[:hash] != res[:hash]) if res.has_key?(:hash)
349
+ log.debug "#{entry[:path]} modified? r#{entry[:revision]} vs. r#{res[:revision]}, h#{entry[:hash]} vs. h#{res[:hash]}, t#{time_to_s(entry[:modified])} vs. t#{time_to_s(res[:modified])} => #{out}"
350
+ out
351
+ end
352
+
353
+ def create_dir(dir)
354
+ local_path = relative_to_local_path(dir[:path])
355
+ log.info "Creating #{local_path}"
356
+ saving_parent_timestamp(dir) do
357
+ FileUtils.mkdir_p(local_path)
358
+ update_file_timestamp(dir)
359
+ end
360
+ end
361
+
362
+ def update_dir(dir)
363
+ update_file_timestamp(dir)
364
+ end
365
+
366
+ def delete_dir(dir)
367
+ local_path = relative_to_local_path(dir[:path])
368
+ log.info "Deleting #{local_path}"
369
+ saving_parent_timestamp(dir) do
370
+ FileUtils.rm_r(local_path)
371
+ end
372
+ end
373
+
374
+ def create_file(file)
375
+ saving_parent_timestamp(file) do
376
+ download_file(file)
377
+ end
378
+ end
379
+
380
+ def update_file(file)
381
+ download_file(file)
382
+ end
383
+
384
+ def delete_file(file)
385
+ local_path = relative_to_local_path(file[:path])
386
+ log.info "Deleting file: #{local_path}"
387
+ saving_parent_timestamp(file) do
388
+ FileUtils.rm_rf(local_path)
389
+ end
390
+ end
391
+
392
+ def download_file(file)
393
+ local_path = relative_to_local_path(file[:path])
394
+ remote_path = relative_to_remote_path(file[:path])
395
+
396
+ # stream download to temp file, then atomic move to real path
397
+ tmp = generate_tmpfilename(file[:path])
398
+ File.open(tmp, "w") do |f|
399
+ api.get_file(remote_path, f)
400
+ end
401
+ FileUtils.mv(tmp, local_path)
402
+
403
+ update_file_timestamp(file)
404
+ end
405
+ end
406
+
407
+ class Push < Operation
408
+ def initialize(database, api)
409
+ super(database, api)
410
+ end
411
+
412
+ def practice
413
+ dir = database.root_dir
414
+ changes = calculate_changes(dir)
415
+ log.debug "Changes that would be executed:\n" + changes.map {|c| c.inspect }.join("\n")
416
+ end
417
+
418
+ def execute
419
+ dir = database.root_dir
420
+ changes = calculate_changes(dir)
421
+ log.debug "Executing changes:\n" + changes.map {|c| c.inspect }.join("\n")
422
+ changelist = { :created => [], :deleted => [], :updated => [] }
423
+
424
+ # spin up a parallel task queue
425
+ ptasks = ParallelTasks.new(MAX_PARALLEL_DBOX_OPS - 1) { clone_api_into_current_thread() }
426
+ ptasks.start
427
+
428
+ changes.each do |op, c|
429
+ case op
430
+ when :create
431
+ c[:parent_id] ||= lookup_id_by_path(c[:parent_path])
432
+
433
+ if c[:is_dir]
434
+ database.add_entry(c[:path], true, c[:parent_id], nil, nil, nil)
435
+
436
+ # directory creation cannot go in a thread, since later
437
+ # operations might depend on the directory being there
438
+ create_dir(c)
439
+ force_metadata_update_from_server(c)
440
+ changelist[:created] << c[:path]
441
+ else
442
+ database.add_entry(c[:path], false, c[:parent_id], nil, nil, nil)
443
+
444
+ # spin up a thread to upload the file
445
+ ptasks.add do
446
+ begin
447
+ upload_file(c)
448
+ force_metadata_update_from_server(c)
449
+ changelist[:created] << c[:path]
450
+ rescue Dbox::ServerError => e
451
+ log.error "Error while uploading #{c[:path]}: #{e.inspect}"
452
+ end
453
+ end
454
+ end
455
+ when :update
456
+ existing = database.find_by_path(c[:path])
457
+ unless existing[:is_dir] == c[:is_dir]
458
+ raise(RuntimeError, "Mode on #{c[:path]} changed between file and dir -- not supported yet")
459
+ end
460
+
461
+ # only update files -- nothing to do to update a dir
462
+ if !c[:is_dir]
463
+
464
+ # spin up a thread to upload the file
465
+ ptasks.add do
466
+ begin
467
+ upload_file(c)
468
+ force_metadata_update_from_server(c)
469
+ changelist[:updated] << c[:path]
470
+ rescue Dbox::ServerError => e
471
+ log.error "Error while uploading #{c[:path]}: #{e.inspect}"
472
+ end
473
+ end
474
+ end
475
+ when :delete
476
+ # spin up a thread to delete the file/dir
477
+ ptasks.add do
478
+ begin
479
+ begin
480
+ if c[:is_dir]
481
+ delete_dir(c)
482
+ else
483
+ delete_file(c)
484
+ end
485
+ rescue Dbox::RemoteMissing
486
+ # safe to delete even if remote is already gone
487
+ end
488
+ database.delete_entry_by_path(c[:path])
489
+ changelist[:deleted] << c[:path]
490
+ rescue Dbox::ServerError
491
+ log.error "Error while deleting #{c[:path]}: #{e.inspect}"
492
+ end
493
+ end
494
+ else
495
+ raise(RuntimeError, "Unknown operation type: #{op}")
496
+ end
497
+ end
498
+
499
+ # wait for operations to finish
500
+ ptasks.finish
501
+
502
+ # sort & return output
503
+ changelist.keys.each {|k| changelist[k].sort! }
504
+ changelist
505
+ end
506
+
507
+ def calculate_changes(dir)
508
+ raise(ArgumentError, "Not a directory: #{dir.inspect}") unless dir[:is_dir]
509
+
510
+ out = []
511
+ recur_dirs = []
512
+
513
+ existing_entries = current_dir_entries_as_hash(dir)
514
+ child_paths = list_contents(dir).sort
515
+
516
+ child_paths.each do |p|
517
+ c = { :path => p, :modified => mtime(p), :is_dir => is_dir(p), :parent_path => dir[:path] }
518
+ if entry = existing_entries[p]
519
+ c[:id] = entry[:id]
520
+ recur_dirs << c if c[:is_dir] # queue dir for later
521
+ out << [:update, c] if modified?(entry, c) # update iff modified
522
+ else
523
+ # create
524
+ out << [:create, c]
525
+ recur_dirs << c if c[:is_dir]
526
+ end
527
+ end
528
+
529
+ # add any deletions
530
+ out += (existing_entries.keys.sort - child_paths).map do |p|
531
+ [:delete, existing_entries[p]]
532
+ end
533
+
534
+ # recursively process new & existing subdirectories
535
+ recur_dirs.each do |dir|
536
+ out += calculate_changes(dir)
537
+ end
538
+
539
+ out
540
+ end
541
+
542
+ def mtime(path)
543
+ File.mtime(relative_to_local_path(path))
544
+ end
545
+
546
+ def is_dir(path)
547
+ File.directory?(relative_to_local_path(path))
548
+ end
549
+
550
+ def modified?(entry, res)
551
+ out = time_to_s(entry[:modified]) != time_to_s(res[:modified])
552
+ log.debug "#{entry[:path]} modified? t#{time_to_s(entry[:modified])} vs. t#{time_to_s(res[:modified])} => #{out}"
553
+ out
554
+ end
555
+
556
+ def list_contents(dir)
557
+ local_path = relative_to_local_path(dir[:path])
558
+ paths = Dir.entries(local_path).reject {|s| s == "." || s == ".." || s.start_with?(".") }
559
+ paths.map {|p| local_to_relative_path(File.join(local_path, p)) }
560
+ end
561
+
562
+ def create_dir(dir)
563
+ remote_path = relative_to_remote_path(dir[:path])
564
+ log.info "Creating #{remote_path}"
565
+ api.create_dir(remote_path)
566
+ end
567
+
568
+ def delete_dir(dir)
569
+ remote_path = relative_to_remote_path(dir[:path])
570
+ api.delete_dir(remote_path)
571
+ end
572
+
573
+ def delete_file(file)
574
+ remote_path = relative_to_remote_path(file[:path])
575
+ api.delete_file(remote_path)
576
+ end
577
+
578
+ def upload_file(file)
579
+ local_path = relative_to_local_path(file[:path])
580
+ remote_path = relative_to_remote_path(file[:path])
581
+ File.open(local_path) do |f|
582
+ api.put_file(remote_path, f)
583
+ end
584
+ end
585
+
586
+ def force_metadata_update_from_server(entry)
587
+ res = gather_remote_info(entry)
588
+ unless res == :not_modified
589
+ database.update_entry_by_path(entry[:path], :modified => res[:modified], :revision => res[:revision], :hash => res[:hash])
590
+ end
591
+ update_file_timestamp(database.find_by_path(entry[:path]))
592
+ end
593
+ end
594
+ end
595
+ end