dbox 0.4.4 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,7 +9,7 @@ module Dbox
9
9
  DB_FILE = ".dropbox.db"
10
10
  DB_TMPFILE = ".dropbox.db.tmp"
11
11
 
12
- attr_accessor :local_path
12
+ attr_accessor :local_path, :remote_path, :root
13
13
 
14
14
  def self.create(remote_path, local_path)
15
15
  api.create_dir(remote_path)
@@ -60,6 +60,10 @@ module Dbox
60
60
  end
61
61
  end
62
62
 
63
+ def self.destroy!(local_path)
64
+ FileUtils.rm(db_file(local_path)) if exists?(local_path)
65
+ end
66
+
63
67
  # IMPORTANT: DropboxDb.new is private. Please use DropboxDb.create, DropboxDb.clone, or DropboxDb.load as the entry point.
64
68
  private_class_method :new
65
69
  def initialize(local_path, res)
@@ -233,11 +237,21 @@ module Dbox
233
237
  def update_remote; raise RuntimeError, "Not implemented"; end
234
238
 
235
239
  def modified?(res)
236
- out = !(@revision == res["revision"] && @modified_at == parse_time(res["modified"]))
237
- log.debug "#{path}.modified? r#{@revision} =? r#{res["revision"]}, #{@modified_at} =? #{parse_time(res["modified"])} => #{out}"
240
+ out = !(@revision == res["revision"] && time_to_s(@modified_at) == time_to_s(res["modified"]))
241
+ log.debug "#{path} modified? => #{out}"
238
242
  out
239
243
  end
240
244
 
245
+ def time_to_s(t)
246
+ case t
247
+ when Time
248
+ # matches dropbox time format
249
+ t.utc.strftime("%a, %d %b %Y %H:%M:%S +0000")
250
+ when String
251
+ t
252
+ end
253
+ end
254
+
241
255
  def parse_time(t)
242
256
  case t
243
257
  when Time
@@ -0,0 +1,68 @@
1
+ require "thread"
2
+
3
+ #
4
+ # Usage:
5
+ #
6
+ # puts "Creating task queue with 5 concurrent workers"
7
+ # tasks = ParallelTasks.new(5) { puts "Worker thread starting up" }
8
+ #
9
+ # puts "Starting workers"
10
+ # tasks.start
11
+ #
12
+ # puts "Making some work"
13
+ # 20.times do
14
+ # tasks.add do
15
+ # x = rand(5)
16
+ # puts "Sleeping for #{x}s"
17
+ # sleep x
18
+ # end
19
+ # end
20
+ #
21
+ # puts "Waiting for workers to finish"
22
+ # tasks.finish
23
+ #
24
+ # puts "Done"
25
+ #
26
+ class ParallelTasks
27
+ def initialize(num_workers, &initialization_proc)
28
+ @num_workers = num_workers
29
+ @initialization_proc = initialization_proc
30
+ @workers = []
31
+ @work_queue = Queue.new
32
+ @semaphore = Mutex.new
33
+ @done_making_tasks = false
34
+ end
35
+
36
+ def start
37
+ @num_workers.times do
38
+ @workers << Thread.new do
39
+ @initialization_proc.call if @initialization_proc
40
+ done = false
41
+ while !done
42
+ task = nil
43
+ @semaphore.synchronize do
44
+ unless @work_queue.empty?
45
+ task = @work_queue.pop()
46
+ else
47
+ if @done_making_tasks
48
+ done = true
49
+ else
50
+ sleep 0.1
51
+ end
52
+ end
53
+ end
54
+ task.call if task
55
+ end
56
+ end
57
+ end
58
+ end
59
+
60
+ def add(&proc)
61
+ @work_queue << proc
62
+ end
63
+
64
+ def finish
65
+ @done_making_tasks = true
66
+ @workers.each {|t| t.join }
67
+ end
68
+ end
@@ -0,0 +1,595 @@
1
+ module Dbox
2
+ class Syncer
3
+ MAX_PARALLEL_DBOX_OPS = 5
4
+
5
+ include Loggable
6
+
7
+ def self.create(remote_path, local_path)
8
+ api.create_dir(remote_path)
9
+ clone(remote_path, local_path)
10
+ end
11
+
12
+ def self.clone(remote_path, local_path)
13
+ api.metadata(remote_path) # ensure remote exists
14
+ database = Database.create(remote_path, local_path)
15
+ Pull.new(database, api).execute
16
+ end
17
+
18
+ def self.pull(local_path)
19
+ database = Database.load(local_path)
20
+ Pull.new(database, api).execute
21
+ end
22
+
23
+ def self.push(local_path)
24
+ database = Database.load(local_path)
25
+ Push.new(database, api).execute
26
+ end
27
+
28
+ def self.move(new_remote_path, local_path)
29
+ database = Database.load(local_path)
30
+ api.move(database.metadata[:remote_path], new_remote_path)
31
+ database.update_metadata(:remote_path => new_remote_path)
32
+ end
33
+
34
+ def self.api
35
+ @@_api ||= API.connect
36
+ end
37
+
38
+ class Operation
39
+ include Loggable
40
+
41
+ attr_reader :database
42
+
43
+ def initialize(database, api)
44
+ @database = database
45
+ @api = api
46
+ end
47
+
48
+ def api
49
+ Thread.current[:api] || @api
50
+ end
51
+
52
+ def clone_api_into_current_thread
53
+ Thread.current[:api] = api.clone()
54
+ end
55
+
56
+ def metadata
57
+ @_metadata ||= database.metadata
58
+ end
59
+
60
+ def local_path
61
+ metadata[:local_path]
62
+ end
63
+
64
+ def remote_path
65
+ metadata[:remote_path]
66
+ end
67
+
68
+ def local_to_relative_path(path)
69
+ if path.include?(local_path)
70
+ path.sub(local_path, "").sub(/^\//, "")
71
+ else
72
+ raise BadPath, "Not a local path: #{path}"
73
+ end
74
+ end
75
+
76
+ def remote_to_relative_path(path)
77
+ if path.include?(remote_path)
78
+ path.sub(remote_path, "").sub(/^\//, "")
79
+ else
80
+ raise BadPath, "Not a remote path: #{path}"
81
+ end
82
+ end
83
+
84
+ def relative_to_local_path(path)
85
+ if path && path.length > 0
86
+ File.join(local_path, path)
87
+ else
88
+ local_path
89
+ end
90
+ end
91
+
92
+ def relative_to_remote_path(path)
93
+ if path && path.length > 0
94
+ File.join(remote_path, path)
95
+ else
96
+ remote_path
97
+ end
98
+ end
99
+
100
+ def remove_dotfiles(contents)
101
+ contents.reject {|c| File.basename(c[:path]).start_with?(".") }
102
+ end
103
+
104
+ def current_dir_entries_as_hash(dir)
105
+ if dir[:id]
106
+ out = {}
107
+ database.contents(dir[:id]).each {|e| out[e[:path]] = e }
108
+ out
109
+ else
110
+ {}
111
+ end
112
+ end
113
+
114
+ def lookup_id_by_path(path)
115
+ @_ids ||= {}
116
+ @_ids[path] ||= database.find_by_path(path)[:id]
117
+ end
118
+
119
+ def time_to_s(t)
120
+ case t
121
+ when Time
122
+ # matches dropbox time format
123
+ t.utc.strftime("%a, %d %b %Y %H:%M:%S +0000")
124
+ when String
125
+ t
126
+ end
127
+ end
128
+
129
+ def parse_time(t)
130
+ case t
131
+ when Time
132
+ t
133
+ when String
134
+ Time.parse(t)
135
+ end
136
+ end
137
+
138
+ def saving_timestamp(path)
139
+ mtime = File.mtime(path)
140
+ yield
141
+ File.utime(Time.now, mtime, path)
142
+ end
143
+
144
+ def saving_parent_timestamp(entry, &proc)
145
+ local_path = relative_to_local_path(entry[:path])
146
+ parent = File.dirname(local_path)
147
+ saving_timestamp(parent, &proc)
148
+ end
149
+
150
+ def update_file_timestamp(entry)
151
+ File.utime(Time.now, entry[:modified], relative_to_local_path(entry[:path]))
152
+ end
153
+
154
+ def gather_remote_info(entry)
155
+ res = api.metadata(relative_to_remote_path(entry[:path]), entry[:hash])
156
+ case res
157
+ when Hash
158
+ out = process_basic_remote_props(res)
159
+ out[:id] = entry[:id] if entry[:id]
160
+ if res[:contents]
161
+ out[:contents] = remove_dotfiles(res[:contents]).map do |c|
162
+ o = process_basic_remote_props(c)
163
+ o[:parent_id] = entry[:id] if entry[:id]
164
+ o[:parent_path] = entry[:path]
165
+ o
166
+ end
167
+ end
168
+ out
169
+ when :not_modified
170
+ :not_modified
171
+ else
172
+ raise(RuntimeError, "Invalid result from server: #{res.inspect}")
173
+ end
174
+ end
175
+
176
+ def process_basic_remote_props(res)
177
+ out = {}
178
+ out[:path] = remote_to_relative_path(res[:path])
179
+ out[:modified] = parse_time(res[:modified])
180
+ out[:is_dir] = res[:is_dir]
181
+ out[:hash] = res[:hash] if res[:hash]
182
+ out[:revision] = res[:revision] if res[:revision]
183
+ out
184
+ end
185
+
186
+ def generate_tmpfilename(path)
187
+ out = File.join(local_path, ".#{path.gsub(/\W/, '-')}.part")
188
+ if File.exists?(out)
189
+ generate_tmpfilename("path#{rand(1000)}")
190
+ else
191
+ out
192
+ end
193
+ end
194
+
195
+ def remove_tmpfiles
196
+ Dir["#{local_path}/.*.part"].each {|f| FileUtils.rm(f) }
197
+ end
198
+ end
199
+
200
+ class Pull < Operation
201
+ def initialize(database, api)
202
+ super(database, api)
203
+ end
204
+
205
+ def practice
206
+ dir = database.root_dir
207
+ changes = calculate_changes(dir)
208
+ log.debug "Changes that would be executed:\n" + changes.map {|c| c.inspect }.join("\n")
209
+ end
210
+
211
+ def execute
212
+ remove_tmpfiles
213
+ dir = database.root_dir
214
+ changes = calculate_changes(dir)
215
+ log.debug "Executing changes:\n" + changes.map {|c| c.inspect }.join("\n")
216
+ changelist = { :created => [], :deleted => [], :updated => [] }
217
+
218
+ # spin up a parallel task queue
219
+ ptasks = ParallelTasks.new(MAX_PARALLEL_DBOX_OPS - 1) { clone_api_into_current_thread() }
220
+ ptasks.start
221
+
222
+ changes.each do |op, c|
223
+ case op
224
+ when :create
225
+ c[:parent_id] ||= lookup_id_by_path(c[:parent_path])
226
+ if c[:is_dir]
227
+ # directory creation cannot go in a thread, since later
228
+ # operations might depend on the directory being there
229
+ create_dir(c)
230
+ database.add_entry(c[:path], true, c[:parent_id], c[:modified], c[:revision], c[:hash])
231
+ changelist[:created] << c[:path]
232
+ else
233
+ ptasks.add do
234
+ begin
235
+ create_file(c)
236
+ database.add_entry(c[:path], false, c[:parent_id], c[:modified], c[:revision], c[:hash])
237
+ changelist[:created] << c[:path]
238
+ rescue Dbox::ServerError => e
239
+ log.error "Error while downloading #{c[:path]}: #{e.inspect}"
240
+ end
241
+ end
242
+ end
243
+ when :update
244
+ if c[:is_dir]
245
+ update_dir(c)
246
+ database.update_entry_by_path(c[:path], :modified => c[:modified], :revision => c[:revision], :hash => c[:hash])
247
+ changelist[:updated] << c[:path]
248
+ else
249
+ ptasks.add do
250
+ begin
251
+ update_file(c)
252
+ database.update_entry_by_path(c[:path], :modified => c[:modified], :revision => c[:revision], :hash => c[:hash])
253
+ changelist[:updated] << c[:path]
254
+ rescue Dbox::ServerError => e
255
+ log.error "Error while downloading #{c[:path]}: #{e.inspect}"
256
+ end
257
+ end
258
+ end
259
+ when :delete
260
+ c[:is_dir] ? delete_dir(c) : delete_file(c)
261
+ database.delete_entry_by_path(c[:path])
262
+ changelist[:deleted] << c[:path]
263
+ else
264
+ raise(RuntimeError, "Unknown operation type: #{op}")
265
+ end
266
+ end
267
+
268
+ # wait for operations to finish
269
+ ptasks.finish
270
+
271
+ # sort & return output
272
+ changelist.keys.each {|k| changelist[k].sort! }
273
+ changelist
274
+ end
275
+
276
+ def calculate_changes(dir, operation = :update)
277
+ raise(ArgumentError, "Not a directory: #{dir.inspect}") unless dir[:is_dir]
278
+
279
+ out = []
280
+ recur_dirs = []
281
+
282
+ # grab the metadata for the current dir (either off the filesystem or from Dropbox)
283
+ res = gather_remote_info(dir)
284
+ if res == :not_modified
285
+ # directory itself was not modified, but we still need to
286
+ # recur on subdirectories
287
+ recur_dirs += database.subdirs(dir[:id]).map {|d| [:update, d] }
288
+ else
289
+ raise(ArgumentError, "Not a directory: #{res.inspect}") unless res[:is_dir]
290
+
291
+ # dir may have changed -- calculate changes on contents
292
+ contents = res.delete(:contents)
293
+ if operation == :create || modified?(dir, res)
294
+ res[:parent_id] = dir[:parent_id] if dir[:parent_id]
295
+ res[:parent_path] = dir[:parent_path] if dir[:parent_path]
296
+ out << [operation, res]
297
+ end
298
+ found_paths = []
299
+ existing_entries = current_dir_entries_as_hash(dir)
300
+
301
+ # process each entry that came back from dropbox/filesystem
302
+ contents.each do |c|
303
+ found_paths << c[:path]
304
+ if entry = existing_entries[c[:path]]
305
+ c[:id] = entry[:id]
306
+ c[:modified] = parse_time(c[:modified])
307
+ if c[:is_dir]
308
+ # queue dir for later
309
+ c[:hash] = entry[:hash]
310
+ recur_dirs << [:update, c]
311
+ else
312
+ # update iff modified
313
+ out << [:update, c] if modified?(entry, c)
314
+ end
315
+ else
316
+ # create
317
+ c[:modified] = parse_time(c[:modified])
318
+ if c[:is_dir]
319
+ # queue dir for later
320
+ recur_dirs << [:create, c]
321
+ else
322
+ out << [:create, c]
323
+ end
324
+ end
325
+ end
326
+
327
+ # add any deletions
328
+ out += (existing_entries.keys.sort - found_paths.sort).map do |p|
329
+ [:delete, existing_entries[p]]
330
+ end
331
+ end
332
+
333
+ # recursively process new & existing subdirectories in parallel
334
+ threads = recur_dirs.map do |operation, dir|
335
+ Thread.new do
336
+ clone_api_into_current_thread()
337
+ Thread.current[:out] = calculate_changes(dir, operation)
338
+ end
339
+ end
340
+ threads.each {|t| t.join; out += t[:out] }
341
+
342
+ out
343
+ end
344
+
345
+ def modified?(entry, res)
346
+ out = (entry[:revision] != res[:revision]) ||
347
+ (time_to_s(entry[:modified]) != time_to_s(res[:modified]))
348
+ out ||= (entry[:hash] != res[:hash]) if res.has_key?(:hash)
349
+ log.debug "#{entry[:path]} modified? r#{entry[:revision]} vs. r#{res[:revision]}, h#{entry[:hash]} vs. h#{res[:hash]}, t#{time_to_s(entry[:modified])} vs. t#{time_to_s(res[:modified])} => #{out}"
350
+ out
351
+ end
352
+
353
+ def create_dir(dir)
354
+ local_path = relative_to_local_path(dir[:path])
355
+ log.info "Creating #{local_path}"
356
+ saving_parent_timestamp(dir) do
357
+ FileUtils.mkdir_p(local_path)
358
+ update_file_timestamp(dir)
359
+ end
360
+ end
361
+
362
+ def update_dir(dir)
363
+ update_file_timestamp(dir)
364
+ end
365
+
366
+ def delete_dir(dir)
367
+ local_path = relative_to_local_path(dir[:path])
368
+ log.info "Deleting #{local_path}"
369
+ saving_parent_timestamp(dir) do
370
+ FileUtils.rm_r(local_path)
371
+ end
372
+ end
373
+
374
+ def create_file(file)
375
+ saving_parent_timestamp(file) do
376
+ download_file(file)
377
+ end
378
+ end
379
+
380
+ def update_file(file)
381
+ download_file(file)
382
+ end
383
+
384
+ def delete_file(file)
385
+ local_path = relative_to_local_path(file[:path])
386
+ log.info "Deleting file: #{local_path}"
387
+ saving_parent_timestamp(file) do
388
+ FileUtils.rm_rf(local_path)
389
+ end
390
+ end
391
+
392
+ def download_file(file)
393
+ local_path = relative_to_local_path(file[:path])
394
+ remote_path = relative_to_remote_path(file[:path])
395
+
396
+ # stream download to temp file, then atomic move to real path
397
+ tmp = generate_tmpfilename(file[:path])
398
+ File.open(tmp, "w") do |f|
399
+ api.get_file(remote_path, f)
400
+ end
401
+ FileUtils.mv(tmp, local_path)
402
+
403
+ update_file_timestamp(file)
404
+ end
405
+ end
406
+
407
+ class Push < Operation
408
+ def initialize(database, api)
409
+ super(database, api)
410
+ end
411
+
412
+ def practice
413
+ dir = database.root_dir
414
+ changes = calculate_changes(dir)
415
+ log.debug "Changes that would be executed:\n" + changes.map {|c| c.inspect }.join("\n")
416
+ end
417
+
418
+ def execute
419
+ dir = database.root_dir
420
+ changes = calculate_changes(dir)
421
+ log.debug "Executing changes:\n" + changes.map {|c| c.inspect }.join("\n")
422
+ changelist = { :created => [], :deleted => [], :updated => [] }
423
+
424
+ # spin up a parallel task queue
425
+ ptasks = ParallelTasks.new(MAX_PARALLEL_DBOX_OPS - 1) { clone_api_into_current_thread() }
426
+ ptasks.start
427
+
428
+ changes.each do |op, c|
429
+ case op
430
+ when :create
431
+ c[:parent_id] ||= lookup_id_by_path(c[:parent_path])
432
+
433
+ if c[:is_dir]
434
+ database.add_entry(c[:path], true, c[:parent_id], nil, nil, nil)
435
+
436
+ # directory creation cannot go in a thread, since later
437
+ # operations might depend on the directory being there
438
+ create_dir(c)
439
+ force_metadata_update_from_server(c)
440
+ changelist[:created] << c[:path]
441
+ else
442
+ database.add_entry(c[:path], false, c[:parent_id], nil, nil, nil)
443
+
444
+ # spin up a thread to upload the file
445
+ ptasks.add do
446
+ begin
447
+ upload_file(c)
448
+ force_metadata_update_from_server(c)
449
+ changelist[:created] << c[:path]
450
+ rescue Dbox::ServerError => e
451
+ log.error "Error while uploading #{c[:path]}: #{e.inspect}"
452
+ end
453
+ end
454
+ end
455
+ when :update
456
+ existing = database.find_by_path(c[:path])
457
+ unless existing[:is_dir] == c[:is_dir]
458
+ raise(RuntimeError, "Mode on #{c[:path]} changed between file and dir -- not supported yet")
459
+ end
460
+
461
+ # only update files -- nothing to do to update a dir
462
+ if !c[:is_dir]
463
+
464
+ # spin up a thread to upload the file
465
+ ptasks.add do
466
+ begin
467
+ upload_file(c)
468
+ force_metadata_update_from_server(c)
469
+ changelist[:updated] << c[:path]
470
+ rescue Dbox::ServerError => e
471
+ log.error "Error while uploading #{c[:path]}: #{e.inspect}"
472
+ end
473
+ end
474
+ end
475
+ when :delete
476
+ # spin up a thread to delete the file/dir
477
+ ptasks.add do
478
+ begin
479
+ begin
480
+ if c[:is_dir]
481
+ delete_dir(c)
482
+ else
483
+ delete_file(c)
484
+ end
485
+ rescue Dbox::RemoteMissing
486
+ # safe to delete even if remote is already gone
487
+ end
488
+ database.delete_entry_by_path(c[:path])
489
+ changelist[:deleted] << c[:path]
490
+ rescue Dbox::ServerError
491
+ log.error "Error while deleting #{c[:path]}: #{e.inspect}"
492
+ end
493
+ end
494
+ else
495
+ raise(RuntimeError, "Unknown operation type: #{op}")
496
+ end
497
+ end
498
+
499
+ # wait for operations to finish
500
+ ptasks.finish
501
+
502
+ # sort & return output
503
+ changelist.keys.each {|k| changelist[k].sort! }
504
+ changelist
505
+ end
506
+
507
+ def calculate_changes(dir)
508
+ raise(ArgumentError, "Not a directory: #{dir.inspect}") unless dir[:is_dir]
509
+
510
+ out = []
511
+ recur_dirs = []
512
+
513
+ existing_entries = current_dir_entries_as_hash(dir)
514
+ child_paths = list_contents(dir).sort
515
+
516
+ child_paths.each do |p|
517
+ c = { :path => p, :modified => mtime(p), :is_dir => is_dir(p), :parent_path => dir[:path] }
518
+ if entry = existing_entries[p]
519
+ c[:id] = entry[:id]
520
+ recur_dirs << c if c[:is_dir] # queue dir for later
521
+ out << [:update, c] if modified?(entry, c) # update iff modified
522
+ else
523
+ # create
524
+ out << [:create, c]
525
+ recur_dirs << c if c[:is_dir]
526
+ end
527
+ end
528
+
529
+ # add any deletions
530
+ out += (existing_entries.keys.sort - child_paths).map do |p|
531
+ [:delete, existing_entries[p]]
532
+ end
533
+
534
+ # recursively process new & existing subdirectories
535
+ recur_dirs.each do |dir|
536
+ out += calculate_changes(dir)
537
+ end
538
+
539
+ out
540
+ end
541
+
542
+ def mtime(path)
543
+ File.mtime(relative_to_local_path(path))
544
+ end
545
+
546
+ def is_dir(path)
547
+ File.directory?(relative_to_local_path(path))
548
+ end
549
+
550
+ def modified?(entry, res)
551
+ out = time_to_s(entry[:modified]) != time_to_s(res[:modified])
552
+ log.debug "#{entry[:path]} modified? t#{time_to_s(entry[:modified])} vs. t#{time_to_s(res[:modified])} => #{out}"
553
+ out
554
+ end
555
+
556
+ def list_contents(dir)
557
+ local_path = relative_to_local_path(dir[:path])
558
+ paths = Dir.entries(local_path).reject {|s| s == "." || s == ".." || s.start_with?(".") }
559
+ paths.map {|p| local_to_relative_path(File.join(local_path, p)) }
560
+ end
561
+
562
+ def create_dir(dir)
563
+ remote_path = relative_to_remote_path(dir[:path])
564
+ log.info "Creating #{remote_path}"
565
+ api.create_dir(remote_path)
566
+ end
567
+
568
+ def delete_dir(dir)
569
+ remote_path = relative_to_remote_path(dir[:path])
570
+ api.delete_dir(remote_path)
571
+ end
572
+
573
+ def delete_file(file)
574
+ remote_path = relative_to_remote_path(file[:path])
575
+ api.delete_file(remote_path)
576
+ end
577
+
578
+ def upload_file(file)
579
+ local_path = relative_to_local_path(file[:path])
580
+ remote_path = relative_to_remote_path(file[:path])
581
+ File.open(local_path) do |f|
582
+ api.put_file(remote_path, f)
583
+ end
584
+ end
585
+
586
+ def force_metadata_update_from_server(entry)
587
+ res = gather_remote_info(entry)
588
+ unless res == :not_modified
589
+ database.update_entry_by_path(entry[:path], :modified => res[:modified], :revision => res[:revision], :hash => res[:hash])
590
+ end
591
+ update_file_timestamp(database.find_by_path(entry[:path]))
592
+ end
593
+ end
594
+ end
595
+ end