dbox 0.5.3 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,6 +3,7 @@ module Dbox
3
3
 
4
4
  class Database
5
5
  include Loggable
6
+ include Utils
6
7
 
7
8
  DB_FILENAME = ".dbox.sqlite3"
8
9
 
@@ -61,16 +62,20 @@ module Dbox
61
62
  path varchar(255) UNIQUE NOT NULL,
62
63
  is_dir boolean NOT NULL,
63
64
  parent_id integer REFERENCES entries(id) ON DELETE CASCADE,
64
- hash varchar(255),
65
+ local_hash varchar(255),
66
+ remote_hash varchar(255),
65
67
  modified datetime,
66
- revision integer
68
+ revision varchar(255)
67
69
  );
68
70
  CREATE INDEX IF NOT EXISTS entry_parent_ids ON entries(parent_id);
69
71
  })
70
72
  end
71
73
 
72
74
  def migrate
75
+ # removing local_path from metadata
73
76
  if metadata[:version] < 2
77
+ log.info "Migrating to database schema v2"
78
+
74
79
  @db.execute_batch(%{
75
80
  BEGIN TRANSACTION;
76
81
  ALTER TABLE metadata RENAME TO metadata_old;
@@ -85,15 +90,105 @@ module Dbox
85
90
  COMMIT;
86
91
  })
87
92
  end
93
+
94
+ # migrating to new Dropbox API 1.0 (from integer revisions to
95
+ # string revisions)
96
+ if metadata[:version] < 3
97
+ log.info "Migrating to database schema v3"
98
+
99
+ api = API.connect
100
+ new_revisions = {}
101
+
102
+ # fetch the new revision IDs from dropbox
103
+ find_entries().each do |entry|
104
+ path = relative_to_remote_path(entry[:path])
105
+ begin
106
+ data = api.metadata(path, nil, false)
107
+ # record nev revision ("rev") iff old revisions ("revision") match
108
+ if entry[:revision] == data["revision"]
109
+ new_revisions[entry[:id]] = data["rev"]
110
+ end
111
+ rescue Dbox::ServerError => e
112
+ log.error e
113
+ end
114
+ end
115
+
116
+ # modify the table to have a string for revision (blanked out
117
+ # for each entry)
118
+ @db.execute_batch(%{
119
+ BEGIN TRANSACTION;
120
+ ALTER TABLE entries RENAME TO entries_old;
121
+ CREATE TABLE entries (
122
+ id integer PRIMARY KEY AUTOINCREMENT NOT NULL,
123
+ path varchar(255) UNIQUE NOT NULL,
124
+ is_dir boolean NOT NULL,
125
+ parent_id integer REFERENCES entries(id) ON DELETE CASCADE,
126
+ hash varchar(255),
127
+ modified datetime,
128
+ revision varchar(255)
129
+ );
130
+ INSERT INTO entries SELECT id, path, is_dir, parent_id, hash, modified, null FROM entries_old;
131
+ })
132
+
133
+ # copy in the new revision IDs
134
+ new_revisions.each do |id, revision|
135
+ update_entry_by_id(id, :revision => revision)
136
+ end
137
+
138
+ # drop old table and commit
139
+ @db.execute_batch(%{
140
+ DROP TABLE entries_old;
141
+ UPDATE metadata SET version = 3;
142
+ COMMIT;
143
+ })
144
+ end
145
+
146
+ if metadata[:version] < 4
147
+ log.info "Migrating to database schema v4"
148
+
149
+ # add local_hash column, rename hash to remote_hash
150
+ @db.execute_batch(%{
151
+ BEGIN TRANSACTION;
152
+ ALTER TABLE entries RENAME TO entries_old;
153
+ CREATE TABLE entries (
154
+ id integer PRIMARY KEY AUTOINCREMENT NOT NULL,
155
+ path varchar(255) UNIQUE NOT NULL,
156
+ is_dir boolean NOT NULL,
157
+ parent_id integer REFERENCES entries(id) ON DELETE CASCADE,
158
+ local_hash varchar(255),
159
+ remote_hash varchar(255),
160
+ modified datetime,
161
+ revision varchar(255)
162
+ );
163
+ INSERT INTO entries SELECT id, path, is_dir, parent_id, null, hash, modified, revision FROM entries_old;
164
+ })
165
+
166
+ # calculate hashes on files with same timestamp as we have (as that was the previous mechanism used to check freshness)
167
+ find_entries().each do |entry|
168
+ unless entry[:is_dir]
169
+ path = relative_to_local_path(entry[:path])
170
+ if times_equal?(File.mtime(path), entry[:modified])
171
+ update_entry_by_id(entry[:id], :local_hash => calculate_hash(path))
172
+ end
173
+ end
174
+ end
175
+
176
+ # drop old table and commit
177
+ @db.execute_batch(%{
178
+ DROP TABLE entries_old;
179
+ UPDATE metadata SET version = 4;
180
+ COMMIT;
181
+ })
182
+ end
88
183
  end
89
184
 
90
185
  METADATA_COLS = [ :remote_path, :version ] # don't need to return id
91
- ENTRY_COLS = [ :id, :path, :is_dir, :parent_id, :hash, :modified, :revision ]
186
+ ENTRY_COLS = [ :id, :path, :is_dir, :parent_id, :local_hash, :remote_hash, :modified, :revision ]
92
187
 
93
188
  def bootstrap(remote_path)
94
189
  @db.execute(%{
95
190
  INSERT INTO metadata (remote_path, version) VALUES (?, ?);
96
- }, remote_path, 2)
191
+ }, remote_path, 4)
97
192
  @db.execute(%{
98
193
  INSERT INTO entries (path, is_dir) VALUES (?, ?)
99
194
  }, "", 1)
@@ -116,6 +211,10 @@ module Dbox
116
211
  out
117
212
  end
118
213
 
214
+ def remote_path
215
+ metadata()[:remote_path]
216
+ end
217
+
119
218
  def update_metadata(fields)
120
219
  set_str = fields.keys.map {|k| "#{k}=?" }.join(",")
121
220
  @db.execute(%{
@@ -142,8 +241,8 @@ module Dbox
142
241
  find_entries("WHERE parent_id=? AND is_dir=1", dir_id)
143
242
  end
144
243
 
145
- def add_entry(path, is_dir, parent_id, modified, revision, hash)
146
- insert_entry(:path => path, :is_dir => is_dir, :parent_id => parent_id, :modified => modified, :revision => revision, :hash => hash)
244
+ def add_entry(path, is_dir, parent_id, modified, revision, remote_hash, local_hash)
245
+ insert_entry(:path => path, :is_dir => is_dir, :parent_id => parent_id, :modified => modified, :revision => revision, :remote_hash => remote_hash, :local_hash => local_hash)
147
246
  end
148
247
 
149
248
  def update_entry_by_id(id, fields)
@@ -172,7 +271,7 @@ module Dbox
172
271
 
173
272
  def migrate_entry_from_old_db_format(entry, parent = nil)
174
273
  # insert entry into sqlite db
175
- add_entry(entry.path, entry.dir?, (parent ? parent[:id] : nil), entry.modified_at, entry.revision, nil)
274
+ add_entry(entry.path, entry.dir?, (parent ? parent[:id] : nil), entry.modified_at, entry.revision, nil, nil)
176
275
 
177
276
  # recur on children
178
277
  if entry.dir?
@@ -233,7 +332,6 @@ module Dbox
233
332
  h = make_fields(ENTRY_COLS, res)
234
333
  h[:is_dir] = (h[:is_dir] == 1)
235
334
  h[:modified] = Time.at(h[:modified]) if h[:modified]
236
- h.delete(:hash) unless h[:is_dir]
237
335
  h
238
336
  else
239
337
  nil
@@ -1,6 +1,7 @@
1
1
  module Dbox
2
2
  class Syncer
3
3
  MAX_PARALLEL_DBOX_OPS = 5
4
+ MIN_BYTES_TO_STREAM_DOWNLOAD = 1024 * 100 # 100kB
4
5
 
5
6
  include Loggable
6
7
 
@@ -37,6 +38,7 @@ module Dbox
37
38
 
38
39
  class Operation
39
40
  include Loggable
41
+ include Utils
40
42
 
41
43
  attr_reader :database
42
44
 
@@ -65,38 +67,6 @@ module Dbox
65
67
  metadata[:remote_path]
66
68
  end
67
69
 
68
- def local_to_relative_path(path)
69
- if path.include?(local_path)
70
- path.sub(local_path, "").sub(/^\//, "")
71
- else
72
- raise BadPath, "Not a local path: #{path}"
73
- end
74
- end
75
-
76
- def remote_to_relative_path(path)
77
- if path.include?(remote_path)
78
- path.sub(remote_path, "").sub(/^\//, "")
79
- else
80
- raise BadPath, "Not a remote path: #{path}"
81
- end
82
- end
83
-
84
- def relative_to_local_path(path)
85
- if path && path.length > 0
86
- File.join(local_path, path)
87
- else
88
- local_path
89
- end
90
- end
91
-
92
- def relative_to_remote_path(path)
93
- if path && path.length > 0
94
- File.join(remote_path, path)
95
- else
96
- remote_path
97
- end
98
- end
99
-
100
70
  def remove_dotfiles(contents)
101
71
  contents.reject {|c| File.basename(c[:path]).start_with?(".") }
102
72
  end
@@ -116,29 +86,11 @@ module Dbox
116
86
  @_ids[path] ||= database.find_by_path(path)[:id]
117
87
  end
118
88
 
119
- def time_to_s(t)
120
- case t
121
- when Time
122
- # matches dropbox time format
123
- t.utc.strftime("%a, %d %b %Y %H:%M:%S +0000")
124
- when String
125
- t
126
- end
127
- end
128
-
129
- def parse_time(t)
130
- case t
131
- when Time
132
- t
133
- when String
134
- Time.parse(t)
135
- end
136
- end
137
-
138
89
  def saving_timestamp(path)
139
90
  mtime = File.mtime(path)
140
- yield
91
+ res = yield
141
92
  File.utime(Time.now, mtime, path)
93
+ res
142
94
  end
143
95
 
144
96
  def saving_parent_timestamp(entry, &proc)
@@ -152,7 +104,7 @@ module Dbox
152
104
  end
153
105
 
154
106
  def gather_remote_info(entry)
155
- res = api.metadata(relative_to_remote_path(entry[:path]), entry[:hash])
107
+ res = api.metadata(relative_to_remote_path(entry[:path]), entry[:remote_hash])
156
108
  case res
157
109
  when Hash
158
110
  out = process_basic_remote_props(res)
@@ -175,11 +127,12 @@ module Dbox
175
127
 
176
128
  def process_basic_remote_props(res)
177
129
  out = {}
178
- out[:path] = remote_to_relative_path(res[:path])
179
- out[:modified] = parse_time(res[:modified])
180
- out[:is_dir] = res[:is_dir]
181
- out[:hash] = res[:hash] if res[:hash]
182
- out[:revision] = res[:revision] if res[:revision]
130
+ out[:path] = remote_to_relative_path(res[:path])
131
+ out[:modified] = parse_time(res[:modified])
132
+ out[:is_dir] = res[:is_dir]
133
+ out[:remote_hash] = res[:hash] if res[:hash]
134
+ out[:revision] = res[:rev] if res[:rev]
135
+ out[:size] = res[:bytes] if res[:bytes]
183
136
  out
184
137
  end
185
138
 
@@ -228,14 +181,19 @@ module Dbox
228
181
  # directory creation cannot go in a thread, since later
229
182
  # operations might depend on the directory being there
230
183
  create_dir(c)
231
- database.add_entry(c[:path], true, c[:parent_id], c[:modified], c[:revision], c[:hash])
184
+ database.add_entry(c[:path], true, c[:parent_id], c[:modified], c[:revision], c[:remote_hash], nil)
232
185
  changelist[:created] << c[:path]
233
186
  else
234
187
  ptasks.add do
235
188
  begin
236
- create_file(c)
237
- database.add_entry(c[:path], false, c[:parent_id], c[:modified], c[:revision], c[:hash])
189
+ res = create_file(c)
190
+ local_hash = calculate_hash(relative_to_local_path(c[:path]))
191
+ database.add_entry(c[:path], false, c[:parent_id], c[:modified], c[:revision], c[:remote_hash], local_hash)
238
192
  changelist[:created] << c[:path]
193
+ if res.kind_of?(Array) && res[0] == :conflict
194
+ changelist[:conflicts] ||= []
195
+ changelist[:conflicts] << res[1]
196
+ end
239
197
  rescue Dbox::ServerError => e
240
198
  log.error "Error while downloading #{c[:path]}: #{e.inspect}"
241
199
  parent_ids_of_failed_entries << c[:parent_id]
@@ -246,14 +204,19 @@ module Dbox
246
204
  when :update
247
205
  if c[:is_dir]
248
206
  update_dir(c)
249
- database.update_entry_by_path(c[:path], :modified => c[:modified], :revision => c[:revision], :hash => c[:hash])
207
+ database.update_entry_by_path(c[:path], :modified => c[:modified], :revision => c[:revision], :remote_hash => c[:remote_hash])
250
208
  changelist[:updated] << c[:path]
251
209
  else
252
210
  ptasks.add do
253
211
  begin
254
- update_file(c)
255
- database.update_entry_by_path(c[:path], :modified => c[:modified], :revision => c[:revision], :hash => c[:hash])
212
+ res = update_file(c)
213
+ local_hash = calculate_hash(relative_to_local_path(c[:path]))
214
+ database.update_entry_by_path(c[:path], :modified => c[:modified], :revision => c[:revision], :remote_hash => c[:remote_hash], :local_hash => local_hash)
256
215
  changelist[:updated] << c[:path]
216
+ if res.kind_of?(Array) && res[0] == :conflict
217
+ changelist[:conflicts] ||= []
218
+ changelist[:conflicts] << res[1]
219
+ end
257
220
  rescue Dbox::ServerError => e
258
221
  log.error "Error while downloading #{c[:path]}: #{e.inspect}"
259
222
  parent_ids_of_failed_entries << c[:parent_id]
@@ -276,11 +239,11 @@ module Dbox
276
239
  # clear hashes on any dirs with children that failed so that
277
240
  # they are processed again on next pull
278
241
  parent_ids_of_failed_entries.uniq.each do |id|
279
- database.update_entry_by_id(id, :hash => nil)
242
+ database.update_entry_by_id(id, :remote_hash => nil)
280
243
  end
281
244
 
282
245
  # sort & return output
283
- changelist.keys.each {|k| changelist[k].sort! }
246
+ changelist.keys.each {|k| k == :conflicts ? changelist[k].sort! {|c1, c2| c1[:original] <=> c2[:original] } : changelist[k].sort! }
284
247
  changelist
285
248
  end
286
249
 
@@ -317,7 +280,7 @@ module Dbox
317
280
  c[:modified] = parse_time(c[:modified])
318
281
  if c[:is_dir]
319
282
  # queue dir for later
320
- c[:hash] = entry[:hash]
283
+ c[:remote_hash] = entry[:remote_hash]
321
284
  recur_dirs << [:update, c]
322
285
  else
323
286
  # update iff modified
@@ -355,9 +318,9 @@ module Dbox
355
318
 
356
319
  def modified?(entry, res)
357
320
  out = (entry[:revision] != res[:revision]) ||
358
- (time_to_s(entry[:modified]) != time_to_s(res[:modified]))
359
- out ||= (entry[:hash] != res[:hash]) if res.has_key?(:hash)
360
- log.debug "#{entry[:path]} modified? r#{entry[:revision]} vs. r#{res[:revision]}, h#{entry[:hash]} vs. h#{res[:hash]}, t#{time_to_s(entry[:modified])} vs. t#{time_to_s(res[:modified])} => #{out}"
321
+ !times_equal?(entry[:modified], res[:modified])
322
+ out ||= (entry[:remote_hash] != res[:remote_hash]) if res.has_key?(:remote_hash)
323
+ log.debug "#{entry[:path]} modified? r#{entry[:revision]} vs. r#{res[:revision]}, h#{entry[:remote_hash]} vs. h#{res[:remote_hash]}, t#{time_to_s(entry[:modified])} vs. t#{time_to_s(res[:modified])} => #{out}"
361
324
  out
362
325
  end
363
326
 
@@ -404,15 +367,43 @@ module Dbox
404
367
  local_path = relative_to_local_path(file[:path])
405
368
  remote_path = relative_to_remote_path(file[:path])
406
369
 
407
- # stream download to temp file, then atomic move to real path
370
+ # check to ensure we aren't overwriting an untracked file or a
371
+ # file with local modifications
372
+ clobbering = false
373
+ if entry = database.find_by_path(file[:path])
374
+ clobbering = calculate_hash(local_path) != entry[:local_hash]
375
+ else
376
+ clobbering = File.exists?(local_path)
377
+ end
378
+
379
+ # stream files larger than the minimum
380
+ stream = file[:size] && file[:size] > MIN_BYTES_TO_STREAM_DOWNLOAD
381
+
382
+ # download to temp file
408
383
  tmp = generate_tmpfilename(file[:path])
409
384
  File.open(tmp, "w") do |f|
410
- api.get_file(remote_path, f)
385
+ api.get_file(remote_path, f, stream)
386
+ end
387
+
388
+ # rename old file if clobbering
389
+ if clobbering && File.exists?(local_path)
390
+ backup_path = find_nonconflicting_path(local_path)
391
+ FileUtils.mv(local_path, backup_path)
392
+ backup_relpath = local_to_relative_path(backup_path)
393
+ log.warn "#{file[:path]} had a conflict and the existing copy was renamed to #{backup_relpath} locally"
411
394
  end
412
- FileUtils.mv(tmp, local_path)
413
395
 
396
+ # atomic move over to the real file, and update the timestamp
397
+ FileUtils.mv(tmp, local_path)
414
398
  update_file_timestamp(file)
399
+
400
+ if backup_relpath
401
+ [:conflict, { :original => file[:path], :renamed => backup_relpath }]
402
+ else
403
+ true
404
+ end
415
405
  end
406
+
416
407
  end
417
408
 
418
409
  class Push < Operation
@@ -445,17 +436,24 @@ module Dbox
445
436
  # directory creation cannot go in a thread, since later
446
437
  # operations might depend on the directory being there
447
438
  create_dir(c)
448
- database.add_entry(c[:path], true, c[:parent_id], nil, nil, nil)
439
+ database.add_entry(c[:path], true, c[:parent_id], nil, nil, nil, nil)
449
440
  force_metadata_update_from_server(c)
450
441
  changelist[:created] << c[:path]
451
442
  else
452
443
  # spin up a thread to upload the file
453
444
  ptasks.add do
454
445
  begin
455
- upload_file(c)
456
- database.add_entry(c[:path], false, c[:parent_id], nil, nil, nil)
457
- force_metadata_update_from_server(c)
458
- changelist[:created] << c[:path]
446
+ local_hash = calculate_hash(relative_to_local_path(c[:path]))
447
+ res = upload_file(c)
448
+ database.add_entry(c[:path], false, c[:parent_id], nil, nil, nil, local_hash)
449
+ if c[:path] == res[:path]
450
+ force_metadata_update_from_server(c)
451
+ changelist[:created] << c[:path]
452
+ else
453
+ log.warn "#{c[:path]} had a conflict and was renamed to #{res[:path]} on the server"
454
+ changelist[:conflicts] ||= []
455
+ changelist[:conflicts] << { :original => c[:path], :renamed => res[:path] }
456
+ end
459
457
  rescue Dbox::ServerError => e
460
458
  log.error "Error while uploading #{c[:path]}: #{e.inspect}"
461
459
  changelist[:failed] << { :operation => :create, :path => c[:path], :error => e }
@@ -474,9 +472,17 @@ module Dbox
474
472
  # spin up a thread to upload the file
475
473
  ptasks.add do
476
474
  begin
477
- upload_file(c)
478
- force_metadata_update_from_server(c)
479
- changelist[:updated] << c[:path]
475
+ local_hash = calculate_hash(relative_to_local_path(c[:path]))
476
+ res = upload_file(c)
477
+ database.update_entry_by_path(c[:path], :local_hash => local_hash)
478
+ if c[:path] == res[:path]
479
+ force_metadata_update_from_server(c)
480
+ changelist[:updated] << c[:path]
481
+ else
482
+ log.warn "#{c[:path]} had a conflict and was renamed to #{res[:path]} on the server"
483
+ changelist[:conflicts] ||= []
484
+ changelist[:conflicts] << { :original => c[:path], :renamed => res[:path] }
485
+ end
480
486
  rescue Dbox::ServerError => e
481
487
  log.error "Error while uploading #{c[:path]}: #{e.inspect}"
482
488
  changelist[:failed] << { :operation => :update, :path => c[:path], :error => e }
@@ -526,7 +532,7 @@ module Dbox
526
532
  child_paths = list_contents(dir).sort
527
533
 
528
534
  child_paths.each do |p|
529
- c = { :path => p, :modified => mtime(p), :is_dir => is_dir(p), :parent_path => dir[:path] }
535
+ c = { :path => p, :modified => mtime(p), :is_dir => is_dir(p), :parent_path => dir[:path], :local_hash => calculate_hash(relative_to_local_path(p)) }
530
536
  if entry = existing_entries[p]
531
537
  c[:id] = entry[:id]
532
538
  recur_dirs << c if c[:is_dir] # queue dir for later
@@ -560,8 +566,16 @@ module Dbox
560
566
  end
561
567
 
562
568
  def modified?(entry, res)
563
- out = time_to_s(entry[:modified]) != time_to_s(res[:modified])
564
- log.debug "#{entry[:path]} modified? t#{time_to_s(entry[:modified])} vs. t#{time_to_s(res[:modified])} => #{out}"
569
+ out = true
570
+ if entry[:is_dir]
571
+ out = !times_equal?(entry[:modified], res[:modified])
572
+ log.debug "#{entry[:path]} modified? t#{time_to_s(entry[:modified])} vs. t#{time_to_s(res[:modified])} => #{out}"
573
+ else
574
+ eh = entry[:local_hash]
575
+ rh = res[:local_hash]
576
+ out = !(eh && rh && eh == rh)
577
+ log.debug "#{entry[:path]} modified? #{eh} vs. #{rh} => #{out}"
578
+ end
565
579
  out
566
580
  end
567
581
 
@@ -591,14 +605,17 @@ module Dbox
591
605
  local_path = relative_to_local_path(file[:path])
592
606
  remote_path = relative_to_remote_path(file[:path])
593
607
  File.open(local_path) do |f|
594
- api.put_file(remote_path, f)
608
+ db_entry = database.find_by_path(file[:path])
609
+ last_revision = db_entry ? db_entry[:revision] : nil
610
+ res = api.put_file(remote_path, f, last_revision)
611
+ process_basic_remote_props(res)
595
612
  end
596
613
  end
597
614
 
598
615
  def force_metadata_update_from_server(entry)
599
616
  res = gather_remote_info(entry)
600
617
  unless res == :not_modified
601
- database.update_entry_by_path(entry[:path], :modified => res[:modified], :revision => res[:revision], :hash => res[:hash])
618
+ database.update_entry_by_path(entry[:path], :modified => res[:modified], :revision => res[:revision], :remote_hash => res[:remote_hash])
602
619
  end
603
620
  update_file_timestamp(database.find_by_path(entry[:path]))
604
621
  end