store-digest 0.3.0 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,8 @@ module Store::Digest::Meta::LMDB
9
9
  include Store::Digest::Meta
10
10
  include Store::Digest::Trait::RootDir
11
11
 
12
+ autoload :V0, 'store/digest/meta/lmdb/v0'
13
+ autoload :V1, 'store/digest/meta/lmdb/v1'
12
14
 
13
15
  private
14
16
 
@@ -21,51 +23,32 @@ module Store::Digest::Meta::LMDB
21
23
  "sha-512": 64,
22
24
  }.freeze
23
25
 
24
- FORMAT = 'Q>NNNNCZ*Z*Z*Z*'.freeze
25
- RECORD = %i[
26
- size ctime mtime ptime dtime flags type language charset encoding].freeze
27
- INTS = %i[
28
- size ctime mtime ptime dtime flags].map { |k| [k, :to_i] }.to_h.freeze
29
- PACK = {
30
- # control records
31
- objects: 'Q>',
32
- deleted: 'Q>',
33
- bytes: 'Q>',
34
- # object records
35
- size: 'Q>',
36
- ctime: ?N, # - also used in control
37
- mtime: ?N, # - ditto
38
- ptime: ?N,
39
- dtime: ?N,
40
- flags: ?C,
41
- type: 'Z*',
42
- language: 'Z*',
43
- charset: 'Z*',
44
- encoding: 'Z*',
45
- }.transform_values(&:freeze).freeze
46
-
47
- # NOTE these are all internal methods meant to be used inside other
48
- # transactions so they do not run in transactions themselves
26
+ LMDB_OPTS = %i[mode maxreaders maxdbs mapsize]
49
27
 
28
+ LMDB_FLAGS =
29
+ %i[fixedmap nosubdir nosync rdonly nometasync writemap mapasync notls]
50
30
 
51
31
  def meta_get_stats
52
- @lmdb.transaction do
32
+ # XXX this should be a read transaction
33
+ lmdb.transaction? true do |txn|
34
+ control = lmdb[:control]
53
35
  h = %i[ctime mtime objects deleted bytes].map do |k|
54
- [k, @dbs[:control][k.to_s].unpack1(PACK[k])]
36
+ [k, db_decode(control[k.to_s], k)]
55
37
  end.to_h
56
38
 
57
39
  # fix the times
58
40
  %i[ctime mtime].each { |t| h[t] = Time.at h[t] }
59
41
 
60
42
  # get counts on all the countables
61
- h.merge!(%i[type language charset encoding].map do |d|
62
- ["#{d}s".to_sym,
63
- @dbs[d].keys.map { |k| [k, @dbs[d].cardinality(k)] }.to_h]
64
- end.to_h)
43
+ h.merge!(
44
+ %i[type language charset encoding].map do |d|
45
+ db = lmdb[d]
46
+ ["#{d}s".to_sym, db.keys.map { |k| [k, db.cardinality(k)] }.to_h]
47
+ end.to_h)
65
48
 
66
49
  # would love to do min/max size/dates/etc but that is going to
67
50
  # take some lower-level cursor finessing
68
-
51
+ # txn.commit
69
52
  h
70
53
  end
71
54
  end
@@ -81,8 +64,10 @@ module Store::Digest::Meta::LMDB
81
64
  raise ArgumentError, 'Mapsize must be a positive integer' unless
82
65
  mapsize.is_a? Integer and mapsize > 0
83
66
 
84
- lmdbopts = { mode: 0666 & ~umask, mapsize: mapsize }
85
- @lmdb = ::LMDB.new dir, lmdbopts
67
+ @lmdb_opts = {
68
+ mode: 0666 & ~umask,
69
+ mapsize: mapsize,
70
+ }.merge(options.slice(*(LMDB_OPTS + LMDB_FLAGS)))
86
71
 
87
72
  algos = options[:algorithms] || DIGESTS.keys
88
73
  raise ArgumentError, "Invalid algorithm specification #{algos}" unless
@@ -92,53 +77,60 @@ module Store::Digest::Meta::LMDB
92
77
  raise ArgumentError, "Invalid primary algorithm #{popt}" unless
93
78
  popt.is_a? Symbol and DIGESTS[popt]
94
79
 
95
- @lmdb.transaction do
80
+ lmdb.transaction? do
96
81
  # load up the control database
97
- @dbs = { control: @lmdb.database('control', create: true) }
82
+ control = lmdb.database('control', create: true)
98
83
 
99
84
  # if control is empty or version is 1, extend V1
100
- if @dbs[:control].empty?
85
+ if control.empty?
101
86
  # set to v1 for next time
102
- @dbs[:control]['version'] = ?1
87
+ control['version'] = ?1
103
88
  extend V1
104
- elsif @dbs[:control]['version'] == ?1
89
+ elsif control['version'] == ?1
105
90
  extend V1
106
- elsif @dbs[:control]['version'].nil?
91
+ elsif control['version'].nil?
107
92
  # if version is empty, extend v0
93
+ @dbs = { control: control }
108
94
  extend V0
109
95
  else
110
96
  # otherwise error
111
- v = @dbs[:control]['version']
112
- raise CorruptStateError,
113
- "Control database has unrecognized version #{v}"
97
+ v = control['version']
98
+ raise CorruptStateError, "Control database has unrecognized version #{v}"
114
99
  end
115
100
 
116
101
  if a = algorithms
117
102
  raise ArgumentError,
118
- "Supplied algorithms #{algos.sort} do not match instantiated #{a}" if
103
+ "Supplied algorithms #{algos.sort} do not match instantiated #{a}" if
119
104
  algos.sort != a
120
105
  else
121
106
  a = algos.sort
122
- @dbs[:control]['algorithms'] = a.join ?,
107
+ control['algorithms'] = a.join ?,
123
108
  end
124
109
 
125
110
  if pri = primary
126
111
  raise ArgumentError,
127
- "Supplied algorithm #{popt} does not match instantiated #{pri}" if
112
+ "Supplied algorithm #{popt} does not match instantiated #{pri}" if
128
113
  popt != pri
129
114
  else
130
115
  pri = popt
131
- @dbs[:control]['primary'] = popt.to_s
116
+ control['primary'] = popt.to_s
132
117
  end
133
-
134
118
  setup_dbs
135
119
  end
136
120
 
137
- @lmdb.sync
121
+ lmdb.sync
138
122
  end
139
123
 
140
124
  public
141
125
 
126
+ # Return the LMDB handle for the given process.
127
+ #
128
+ # @return [LMDB::Environment]
129
+ #
130
+ def lmdb
131
+ (@lmdb ||= {})[Process.pid] ||= ::LMDB.new dir, @lmdb_opts
132
+ end
133
+
142
134
  # Wrap the block in a transaction. Trying to start a read-write
143
135
  # transaction (or do a write operation, as they are wrapped by
144
136
  # transactions internally) within a read-only transaction will
@@ -148,8 +140,8 @@ module Store::Digest::Meta::LMDB
148
140
  # @param block [Proc] the code to run.
149
141
  #
150
142
  def transaction readonly: false, &block
151
- @lmdb.transaction(readonly) do
152
- # we do not want to transmit
143
+ lmdb.transaction?(readonly) do
144
+ # we do not want to transmit the transaction
153
145
  block.call
154
146
  end
155
147
  end
@@ -157,8 +149,8 @@ module Store::Digest::Meta::LMDB
157
149
  # Return the set of algorithms initialized in the database.
158
150
  # @return [Array] the algorithms
159
151
  def algorithms
160
- @algorithms ||= @lmdb.transaction do
161
- if ret = @dbs[:control]['algorithms']
152
+ @algorithms ||= lmdb.transaction? true do
153
+ if ret = lmdb[:control]['algorithms']
162
154
  ret.strip.downcase.split(/\s*,+\s*/).map(&:to_sym)
163
155
  end
164
156
  end
@@ -167,8 +159,8 @@ module Store::Digest::Meta::LMDB
167
159
  # Return the primary digest algorithm.
168
160
  # @return [Symbol] the primary algorithm
169
161
  def primary
170
- @primary ||= @lmdb.transaction do
171
- if ret = @dbs[:control]['primary']
162
+ @primary ||= lmdb.transaction? true do
163
+ if ret = lmdb[:control]['primary']
172
164
  ret.strip.downcase.to_sym
173
165
  end
174
166
  end
@@ -177,9 +169,9 @@ module Store::Digest::Meta::LMDB
177
169
  # Return the number of objects in the database.
178
170
  # @return [Integer]
179
171
  def objects
180
- @lmdb.transaction do
181
- if ret = @dbs[:control]['objects']
182
- ret.unpack1 'Q>' # 64-bit unsigned network-endian integer
172
+ lmdb.transaction? true do
173
+ if ret = lmdb[:control]['objects']
174
+ db_decode ret, :objects
183
175
  end
184
176
  end
185
177
  end
@@ -188,9 +180,9 @@ module Store::Digest::Meta::LMDB
188
180
  # still on record.
189
181
  # @return [Integer]
190
182
  def deleted
191
- @lmdb.transaction do
192
- if ret = @dbs[:control]['deleted']
193
- ret.unpack1 'Q>'
183
+ lmdb.transaction? true do
184
+ if ret = lmdb[:control]['deleted']
185
+ db_decode ret, :deleted
194
186
  end
195
187
  end
196
188
  end
@@ -199,9 +191,9 @@ module Store::Digest::Meta::LMDB
199
191
  # the database itself).
200
192
  # @return [Integer]
201
193
  def bytes
202
- @lmdb.transaction do
203
- if ret = @dbs[:control]['bytes']
204
- ret.unpack1 'Q>'
194
+ lmdb.transaction? true do
195
+ if ret = lmdb[:control]['bytes']
196
+ db_decode ret, :bytes
205
197
  end
206
198
  end
207
199
  end
@@ -252,10 +244,10 @@ module Store::Digest::Meta::LMDB
252
244
  end
253
245
  # find the smallest denominator
254
246
  index = params.keys.map do |k|
255
- [k, @dbs[k].size]
247
+ [k, lmdb[k].size]
256
248
  end.sort { |a, b| a[1] <=> b[1] }.map(&:first).first
257
249
  out = {}
258
- @lmdb.transaction do
250
+ lmdb.transaction? true do
259
251
  if index
260
252
  # warn params.inspect
261
253
  if INTS[index]
@@ -299,7 +291,7 @@ module Store::Digest::Meta::LMDB
299
291
  end
300
292
  else
301
293
  # if we aren't filtering at all we can just obtain everything
302
- @dbs[primary].cursor do |c|
294
+ lmdb[primary].cursor do |c|
303
295
  while rec = c.next
304
296
  u = URI("ni:///#{primary};")
305
297
  u.digest = rec.first
@@ -311,979 +303,5 @@ module Store::Digest::Meta::LMDB
311
303
 
312
304
  # now we sort
313
305
  out.values
314
- end
315
-
316
- # This is the version zero (original) database layout.
317
- module V0
318
-
319
- private
320
-
321
- def setup_dbs
322
-
323
- now = Time.now in: ?Z
324
- %w[ctime mtime].each do |t|
325
- unless @dbs[:control].has? t
326
- @dbs[:control][t] = [now.to_i].pack ?N
327
- end
328
- end
329
-
330
- # clever if i do say so myself
331
- %w[objects deleted bytes].each do |x|
332
- @dbs[:control][x] = [0].pack 'Q>' unless send(x.to_sym)
333
- end
334
-
335
- # XXX we might actually wanna dupsort the non-primary digests too
336
- dbs = RECORD.map do |k|
337
- [k, [:dupsort]]
338
- end.to_h.merge(algorithms.map { |k| [k, []] }.to_h)
339
-
340
- @dbs.merge!(dbs.map do |name, flags|
341
- [name, @lmdb.database(
342
- name.to_s, (flags + [:create]).map { |f| [f, true] }.to_h
343
- )]
344
- end.to_h).freeze
345
- end
346
-
347
- def control_add key, val
348
- if ov = @dbs[:control][key.to_s]
349
- fmt = case ov.length
350
- when 4 then ?N
351
- when 8 then 'Q>'
352
- else
353
- raise RuntimeError, "#{key} must be 4 or 8 bytes long"
354
- end
355
- ov = ov.unpack1 fmt
356
- else
357
- ov = 0
358
- end
359
-
360
- nv = ov + val
361
-
362
- @dbs[:control][key.to_s] = [nv].pack 'Q>'
363
-
364
- nv
365
- end
366
-
367
- def control_get key
368
- key = key.to_sym
369
- raise ArgumentError, "Invalid control key #{key}" unless
370
- %[ctime mtime objects deleted bytes].include? key
371
- if val = @dbs[:control][key.to_s]
372
- val.unpack1 PACK[key]
373
- end
374
- end
375
-
376
- def index_pack key
377
- case key
378
- when nil then return
379
- when Time then [key.to_i].pack ?N
380
- when Integer then [key].pack 'Q>'
381
- when String then key.b # no \0: key length is stored in the record
382
- else raise ArgumentError, "Invalid type #{key.class}"
383
- end
384
- end
385
-
386
- def index_add index, key, bin
387
- key = index_pack(key) or return
388
- # check first or it will just stupidly keep adding duplicate records
389
- @dbs[index].put key, bin unless @dbs[index].has? key, bin
390
- end
391
-
392
- def index_rm index, key, bin
393
- key = index_pack(key) or return
394
- # soft delete baleets only when there is something to baleet
395
- @dbs[index.to_sym].delete? key, bin
396
- end
397
-
398
- # return an enumerator
399
- def index_get index, min, max = nil, range: false, &block
400
- # min and max will be binary values and the cursor will return a range
401
- min = index_pack(min)
402
- max = index_pack(max)
403
- return unless min || max
404
-
405
- return enum_for :index_get, index, min, max unless block_given?
406
-
407
- body = -> c do
408
- # lmdb cursors are a pain in the ass because 'set' advances the
409
- # cursor so you can't just run the whole thing in a loop, you
410
- # have to do this instead:
411
- if rec = (min ? c.set_range(min) : c.first)
412
- return unless range or max or min == rec.first
413
- block.call(*rec)
414
- block.call(*rec) while rec = c.next_range(max || min)
415
- end
416
- end
417
-
418
- @dbs[index.to_sym].cursor(&body)
419
- nil
420
- end
421
-
422
- def inflate bin, rec
423
- rec = rec.dup
424
- digests = algorithms.map do |a|
425
- uri = URI::NI.build(scheme: 'ni', path: "/#{a}")
426
- uri.digest = a == primary ? bin : rec.slice!(0, DIGESTS[a])
427
- [a, uri]
428
- end.to_h
429
-
430
- # size ctime mtime ptime dtime flags type language charset encoding
431
- hash = RECORD.zip(rec.unpack(FORMAT)).to_h
432
- hash[:digests] = digests
433
-
434
- %i[ctime ptime mtime dtime].each do |k|
435
- hash[k] = (hash[k] == 0) ? nil : Time.at(hash[k])
436
- end
437
-
438
- %i[type language charset encoding].each do |k|
439
- hash[k] = nil if hash[k].empty?
440
- end
441
- hash
442
- end
443
-
444
- def deflate obj
445
- obj = obj.to_h unless obj.is_a? Hash
446
- algos = (algorithms - [primary]).map { |a| obj[:digests][a].digest }.join
447
- rec = RECORD.map { |k| v = obj[k]; v.send INTS.fetch(k, :to_s) }
448
- algos + rec.pack(FORMAT)
449
- end
450
-
451
- protected
452
-
453
- # Returns a metadata hash or `nil` if no changes have been made. A
454
- # common scenario is that the caller will attempt to store an object
455
- # that is already present, with the only distinction being `:ctime`
456
- # (which is always ignored) and/or `:mtime`. Setting the `:preserve`
457
- # keyword parameter to a true value will cause any new value for
458
- # `:mtime` to be ignored as well. In that case, an attempt to store
459
- # an otherwise identical record overtop of an existing one will
460
- # return `nil`.
461
- #
462
- # @param obj [Store::Digest::Object] the object to store
463
- # @param preserve [false, true] whether to preserve the mtime
464
- # @return [nil, Hash] maybe the metadata content of the object
465
- #
466
- def set_meta obj, preserve: false
467
- raise ArgumentError,
468
- 'Object does not have a complete set of digests' unless
469
- (algorithms - obj.algorithms).empty?
470
-
471
- body = -> do
472
- # noop if object is present and not deleted and no details have changed
473
- bin = obj[primary].digest
474
- newh = obj.to_h
475
- now = Time.now in: ?Z
476
-
477
- change = newh[:dtime] ? -1 : 1 # net change in records
478
- oldrec = @dbs[primary][bin]
479
- oldh = nil
480
- newh = if oldrec
481
- oldh = inflate bin, oldrec
482
- oldh.merge(newh) do |k, ov, nv|
483
- case k
484
- when :ctime then ov # never overwrite ctime
485
- when :mtime # only overwrite the mtime if specified
486
- preserve ? (ov || nv || now) : (nv || ov || now)
487
- when :ptime then nv || ov || now # XXX derive ptime?
488
- when :dtime
489
- # net change is zero if both or neither are set
490
- change = 0 if (nv && ov) || (!nv && !ov)
491
- nv
492
- else nv
493
- end
494
- end
495
- else
496
- %i[ctime mtime ptime].each { |k| newh[k] ||= now }
497
- newh
498
- end
499
- newrec = deflate newh
500
-
501
- # we have to *break* out of blocks, not return!
502
- # (ah but we can return from a lambda)
503
- return if newrec == oldrec
504
- # anyway a common scenario is a write where nothing is different
505
- # but the mtime, so thepurpose
506
-
507
- # these only need to be done if they haven't been done before
508
- (algorithms - [primary]).each do |algo|
509
- @dbs[algo][obj[algo].digest] = bin
510
- end unless oldrec
511
-
512
- # this only needs to be done if there are changes
513
- @dbs[primary][bin] = newrec
514
-
515
- # if old dtime is nil and new dtime is non-nil then we are deleting
516
- # if old dtime is non-nil and new dtime is nil then we are restoring
517
-
518
- if !oldrec
519
- # new record: increment object count (by 1), increment byte
520
- # count (by size)
521
- control_add :objects, 1
522
- if change > 0
523
- control_add :bytes, newh[:size]
524
- elsif change < 0
525
- # note objects *and* deleted counts get incremented;
526
- # allowing for the possibility that a fresh object can be
527
- # added to the store "deleted".
528
- control_add :deleted, 1
529
- end
530
- elsif change > 0
531
- # restored record: decrement deleted count (by 1), increment
532
- # byte count (by size)
533
- control_add :deleted, -1
534
- control_add :bytes, newh[:size]
535
- elsif change < 0
536
- # "deleted" record: increment deleted count (by 1), decrement
537
- # byte count (by size)
538
- control_add :deleted, 1
539
- control_add :bytes, -newh[:size]
540
- end
541
- # otherwise do nothing
542
-
543
- # note that actually *removing* a record is separate process.
544
-
545
- # okay now we update the indexes
546
- RECORD.each do |k|
547
- index_rm k, oldh[k], bin if oldh and oldh[k] and oldh[k] != newh[k]
548
- index_add k, newh[k], bin # will noop on nil
549
- end
550
-
551
- # and finally update the mtime
552
- @dbs[:control]['mtime'] = [now.to_i].pack ?N
553
-
554
- newh
555
- end
556
-
557
- @lmdb.transaction do
558
- body.call
559
- end
560
- end
561
-
562
- def get_meta obj
563
- body = -> do
564
- # find/inflate master record
565
- algo = if obj[primary]
566
- primary
567
- else
568
- raise ArgumentError, 'Object must have digests' unless
569
- obj.scanned?
570
- obj.algorithms.sort do |a, b|
571
- cmp = DIGESTS[b] <=> DIGESTS[a]
572
- cmp == 0 ? a <=> b : cmp
573
- end.first
574
- end
575
- bin = obj[algo].digest
576
-
577
- # look up the primary digest based on a secondary
578
- unless algo == primary
579
- bin = @dbs[algo][bin] or return
580
- end
581
-
582
- # actually raise maybe? because this should never happen
583
- rec = @dbs[primary][bin] or return
584
-
585
- # return just a hash of all the elements
586
- inflate bin, rec
587
- end
588
-
589
- @lmdb.transaction do
590
- body.call
591
- end
592
- end
593
-
594
- def remove_meta obj
595
- body = -> do
596
- hash = get_meta(obj) or return
597
- bin = hash[:digests][primary].digest
598
- now = Time.now in: ?Z
599
-
600
- RECORD.each { |k| index_rm k, hash[k], bin }
601
- hash[:digests].each { |algo, uri| @dbs[algo].delete uri.digest }
602
-
603
- # remove counts
604
- control_add :objects, -1
605
- if hash[:dtime]
606
- control_add :deleted, -1
607
- else
608
- control_add :bytes, -hash[:size]
609
- hash[:dtime] = now
610
- end
611
-
612
- # and finally update the mtime
613
- @dbs[:control]['mtime'] = [now.to_i].pack ?N
614
-
615
- hash
616
- end
617
-
618
- @lmdb.transaction do
619
- body.call
620
- end
621
306
  end
622
-
623
- def mark_meta_deleted obj
624
- body = -> do
625
- # the object has to be in here to delete it
626
- oldh = get_meta(obj) or return
627
- # if the object is already "deleted" we do nothing
628
- return if oldh[:dtime]
629
-
630
- bin = oldh[:digests][primary].digest
631
- now = Time.now in: ?Z
632
-
633
- newh = oldh.merge(obj.to_h) do |k, ov, nv|
634
- case k
635
- when :digests then ov # - old values are guaranteed complete
636
- when :size then ov # - we don't trust the new value
637
- when :type then ov # - this gets set by default
638
- when :dtime then now # - what we came here to do
639
- else nv || ov
640
- end
641
- end
642
-
643
- @dbs[primary][bin] = deflate newh
644
- control_add :deleted, 1
645
- control_add :bytes, -newh[:size]
646
-
647
- # okay now we update the indexes
648
- RECORD.each do |k|
649
- index_rm k, oldh[k], bin if oldh and oldh[k] and oldh[k] != newh[k]
650
- index_add k, newh[k], bin # will noop on nil
651
- end
652
-
653
- # and finally update the mtime
654
- @dbs[:control]['mtime'] = [now.to_i].pack ?N
655
-
656
- newh
657
- end
658
-
659
- @lmdb.transaction do
660
- body.call
661
- end
662
- end
663
-
664
- end
665
-
666
- # This is the version 1 database layout.
667
- module V1
668
-
669
- private
670
-
671
- # import the flags
672
- Flags = Store::Digest::Object::Flags
673
-
674
- # XXX do we want to introduce dry-types? didn't i try before and
675
- # it was a huge clusterfuck?
676
-
677
- # i think?? are there others?? lol
678
- ARCH = [''].pack(?p).size == 8 ? 64 : 32
679
- LONG = ARCH == 64 ? ?Q : ?L
680
-
681
- ENCODE_NOOP = -> x { x }
682
- DECODE_NOOP = ENCODE_NOOP
683
- ENCODE_TOKEN = -> x { x.to_s }
684
- DECODE_TOKEN = -> x { x.empty? ? nil : x }
685
- ENCODE_FLAGS = -> x { Flags.to_i x }
686
- DECODE_FLAGS = -> x { Flags.from x }
687
- if ARCH == 64
688
- # you get microsecond resolution
689
- ENCODE_TIME = -> x { x ? x.to_i * 1_000_000 + x.usec : 0 }
690
- DECODE_TIME = -> x {
691
- x == 0 ? nil : Time.at(x / 1_000_000, x % 1_000_000, :usec, in: ?Z)
692
- }
693
- else
694
- # and you do not
695
- ENCODE_TIME = -> x { x ? x.to_i : 0 }
696
- DECODE_TIME = -> x { x == 0 ? nil : Time.at(x, in: ?Z) }
697
- end
698
-
699
- # { Class => [pack, encode, decode] }
700
- COERCE = {
701
- Integer => [LONG, ENCODE_NOOP, DECODE_NOOP ],
702
- String => ['Z*', ENCODE_TOKEN, DECODE_TOKEN],
703
- Time => [LONG, ENCODE_TIME, DECODE_TIME ],
704
- Flags => [?S, ENCODE_FLAGS, DECODE_FLAGS],
705
- }
706
-
707
- # one difference between V0 records and V1 records is we don't
708
- # force network-endianness, since we can't force it for the
709
- # integer keys. the other difference is that the flags are now an
710
- # unsigned short.
711
-
712
- # control records
713
- CONTROL = {
714
- version: String,
715
- ctime: Time,
716
- mtime: Time,
717
- expiry: Integer,
718
- objects: Integer,
719
- deleted: Integer,
720
- bytes: Integer,
721
- }
722
-
723
- # object records
724
- RECORD = {
725
- size: Integer,
726
- ctime: Time,
727
- mtime: Time,
728
- ptime: Time,
729
- dtime: Time,
730
- flags: Flags,
731
- type: String,
732
- language: String,
733
- charset: String,
734
- encoding: String,
735
- }
736
-
737
- # the record string (after the hashes are removed)
738
- PACKED = RECORD.values.map { |v| COERCE[v].first }.join
739
-
740
- # Set up the V1 database layout.
741
- #
742
- # @return [void]
743
- #
744
- def setup_dbs
745
- # in the v1 layout, `primary` is only cosmetic and we have an
746
- # `entry` database keyed by (native-endian) integer
747
-
748
- now = Time.now in: ?Z
749
-
750
- %i[ctime mtime].each { |k| control_set k, now, maybe: true }
751
-
752
- # clever if i do say so myself
753
- %i[objects deleted bytes].each { |k| control_set k, 0, maybe: true }
754
-
755
- # default cache expiration
756
- control_set :expiry, 86400, maybe: true
757
-
758
- # this snarl takes the record layout (popping in a cheeky
759
- # "etime" index for cache entry expirations) and pairs it with
760
- # hash algorithm indices to attach them to database flags, which
761
- # are then shoveled en masse into the LMDB factory method.
762
- dbs = RECORD.except(:flags).merge({ etime: Time }).transform_values do |type|
763
- flags = %i[dupsort]
764
- flags += [Integer, Time].include?(type) ? %i[integerkey integerdup] : []
765
- end.merge(
766
- # these are always going to be a fixed length (hash -> size_t)
767
- algorithms.map { |k| [k, %i[dupsort]] }.to_h, # dupfixed bad?
768
- { entry: [:integerkey] }
769
- ).transform_values do |flags|
770
- (flags + [:create]).map { |flag| [flag, true] }.to_h
771
- end
772
-
773
- @dbs.merge!(dbs.map { |n, f| [n, @lmdb.database(n.to_s, f)] }.to_h)
774
- end
775
-
776
- # Encode an individual value.
777
- #
778
- # @param value [Object] the value to be encoded
779
- # @param type [Class] the value's type if not specified
780
- #
781
- # @return [String] the raw value for the database
782
- #
783
- def db_encode value, type = value.class
784
- pack, encode, _ = COERCE[type]
785
- raise ArgumentError, "Unsupported type #{type}" unless pack
786
-
787
- [encode.call(value)].pack pack
788
- end
789
-
790
- # Decode an individual value.
791
- #
792
- # @param raw [String] a raw value from the database
793
- # @param type [Class] the type to decode it into
794
- #
795
- # @return [Object] whatever `type` object was intended
796
- #
797
- def db_decode raw, type
798
- pack, _, decode = COERCE[type]
799
- raise ArgumentError, "Unsupported type #{type}" unless pack
800
-
801
- decode.call raw.unpack1(pack)
802
- end
803
-
804
- # Get the "last" (highest-ordinal) key of an integer-keyed database.
805
- #
806
- # @param db [LMDB::Database,Symbol]
807
- # @param raw [false, true] whether to decode the pointer
808
- #
809
- # @return [Integer]
810
- #
811
- def last_key db, raw: false
812
- db = @dbs[db] if db.is_a? Symbol
813
- raise ArgumentError, 'Wrong/malformed database' unless
814
- db.is_a? ::LMDB::Database and db.flags[:integerkey]
815
-
816
- # the last entry in the database should be the highest number,
817
- # but also not sure if we want to reserve zero
818
- out = db.empty? ? 0 : (db.cursor { |c| c.last }.first.unpack1(?J) + 1)
819
-
820
- # return raw pointer
821
- raw ? [out].pack(?J) : out
822
- end
823
-
824
- # Retrieve the value of a control field.
825
- #
826
- # @param key [Symbol]
827
- #
828
- # @return [Object, nil] the value of the key
829
- #
830
- def control_get key
831
- type = CONTROL[key.to_sym] or raise ArgumentError,
832
- "invalid control key #{key}"
833
-
834
- raw = @dbs[:control][key.to_s]
835
- db_decode raw, type if raw
836
- end
837
-
838
- # Set a control field with an explicit value.
839
- #
840
- # @param key [Symbol]
841
- # @param value [Object]
842
- # @param maybe [false, true] only set if uninitialized
843
- #
844
- # @return [Object] the original value passed through
845
- #
846
- def control_set key, value, maybe: false
847
- type = CONTROL[key] or raise ArgumentError, "invalid control key #{key}"
848
- raise ArgumentError,
849
- "value should be instance of #{type}" unless value.is_a? type
850
-
851
- @dbs[:control][key.to_s] = db_encode value, type unless
852
- maybe && @dbs[:control].has?(key.to_s)
853
- end
854
-
855
- # Increment an existing ({Integer}) control field by a value.
856
- #
857
- # @param key [Symbol]
858
- # @param value [Numeric]
859
- #
860
- # @raise [RuntimeError] if the field is uninitialized
861
- #
862
- # @return [Integer, Time] the new value
863
- #
864
- def control_add key, value
865
- raise "value must be numeric" unless value.is_a? Numeric
866
- type = CONTROL[key] or raise ArgumentError, "invalid control key #{key}"
867
-
868
- # value may be uninitialized
869
- raise "Attempted to change an uninitialized value" unless
870
- old = control_get(key)
871
-
872
- # early bailout
873
- return value if value == 0
874
-
875
- # overwrite the value
876
- control_set key, old + value
877
- end
878
-
879
- # Add an entry to an index.
880
- #
881
- # @note The indexes point to the integer keys in v1 rather than hashes in v0
882
- #
883
- # @param index [Symbol] the index table name
884
- # @param key [Object] the datum to become the index key
885
- # @param ptr [Integer] the key for the entry
886
- #
887
- # @return [void]
888
- #
889
- def index_add index, key, ptr
890
- # XXX just add etime here for now
891
- cls = RECORD.merge({etime: Time})[index] or raise ArgumentError,
892
- "No record for #{index}"
893
-
894
- # warn "#{index}, #{key.inspect}"
895
-
896
- key = db_encode key, cls
897
- ptr = ptr.is_a?(String) ? ptr : [ptr].pack(?J)
898
-
899
-
900
- @dbs[index.to_sym].put? key, ptr
901
- end
902
-
903
- # Remove an entry from an index.
904
- #
905
- # @param index [Symbol] the index table name
906
- # @param key [Object] the datum to become the index key
907
- # @param ptr [Integer] the key for the entry
908
- #
909
- # @return [void]
910
- #
911
- def index_rm index, key, ptr
912
- # XXX etime lol
913
- cls = RECORD.merge({etime: Time})[index] or raise ArgumentError,
914
- "No record for #{index}"
915
- key = db_encode key, cls
916
- ptr = ptr.is_a?(String) ? ptr : [ptr].pack(?J)
917
-
918
- @dbs[index.to_sym].delete? key, ptr
919
- end
920
-
921
- # the v1 record is substantively different from v0; also all the
922
- # hashes are in the v1 record whereas the primary hash is used as
923
- # the key in v0 and so is not duplicated. this also means we only
924
- # need the one argument because we don't need the information from
925
- # the key.
926
-
927
- # Return a hash of a record.
928
- #
929
- # @param raw [String] the raw record from the database
930
- #
931
- # @return [Hash]
932
- #
933
- def inflate raw
934
- # we're about to chomp through this
935
- raw = raw.dup
936
-
937
- # get the digest algos
938
- ds = algorithms.map do |a|
939
- uri = URI::NI.build(scheme: 'ni', path: "/#{a}")
940
- uri.digest = raw.slice!(0, DIGESTS[a])
941
- [a, uri]
942
- end.to_h
943
-
944
- # love this for me
945
- { digests: ds }.merge(RECORD.keys.zip(raw.unpack(PACKED)).map do |k, v|
946
- [k, COERCE[RECORD[k]].last.call(v)]
947
- end.to_h)
948
- end
949
-
950
- # Return a packed string suitable to store as a record.
951
- #
952
- # @param obj [Store::Digest::Object, Hash]
953
- #
954
- # @return [String]
955
- #
956
- def deflate obj
957
- obj = obj.to_h
958
- algos = algorithms.map { |a| obj[:digests][a].digest }.join
959
- rec = RECORD.map { |k, cls| COERCE[cls][1].call obj[k] }
960
-
961
- algos + rec.pack(PACKED)
962
- end
963
-
964
- # Get an integer entry key from a {Store::Digest::Object} or
965
- # {Hash} representation thereof, or hash of digests to {URI::NI}
966
- # objects.
967
- #
968
- # @param obj [Store::Digest::Object, Hash]
969
- # @param raw [false, true] whether to return the raw bytes
970
- #
971
- # @return [Integer, nil]
972
- #
973
- def get_ptr obj, raw: false
974
- # normalize the object and obtain a workable hash algorithm
975
- obj = obj.to_h
976
- obj = obj[:digests] if obj.key? :digests
977
-
978
- algo = if obj.key? primary
979
- primary
980
- else
981
- DIGESTS.sort do |b, a|
982
- cmp = b.last <=> a.last
983
- cmp == 0 ? a.first <=> b.first : cmp
984
- end.detect { |x| obj.key? x.first }.first
985
- end or return
986
-
987
- # warn "algo: #{algo} #{obj[algo.to_sym]} -> #{obj[algo.to_sym].hexdigest}"
988
-
989
- # wat = {}
990
- # @dbs[algo.to_sym].each { |k, v| wat[k.unpack1 'H*'] = v.unpack1 ?J }
991
-
992
- # warn wat.inspect
993
-
994
- # this is a private method so we can control what its inputs are
995
- # but it *should* map to a URI::NI; string hashes are too ambiguous
996
- uri = obj[algo.to_sym]
997
- raise ArgumentError, "Unexpected #{uri.class}" unless uri.is_a? URI::NI
998
-
999
- # now return the pointer (or nil)
1000
- out = @dbs[algo.to_sym][uri.digest] or return
1001
- raw ? out : out.unpack1(?J)
1002
- end
1003
-
1004
- # Retrieve a record from the database.
1005
- #
1006
- # @param obj [Store::Digest::Object, Hash, URI::NI, Integer] the
1007
- # entry's key, or an object from which it can be resolved
1008
- # @param raw [false, true] whether to leave the result as raw bytes
1009
- #
1010
- # @return [Hash, String, nil] inflated or raw record, if present
1011
- #
1012
- def get_meta obj, raw: false
1013
- @lmdb.transaction(true) do
1014
- # get the pointer
1015
- ptr = case obj
1016
- when String then obj
1017
- when Hash, Store::Digest::Object then get_ptr obj, raw: true
1018
- when Integer then [obj].pack ?J
1019
- when URI::NI then @dbs[obj.algorithm.to_sym][obj.digest]
1020
- else
1021
- raise ArgumentError, "Cannot process an #{obj.class}"
1022
- end
1023
-
1024
- # get the entry (or not)
1025
- break unless ptr && out = @dbs[:entry][ptr]
1026
-
1027
- # conditionally inflate the result
1028
- raw ? out : inflate(out)
1029
- end
1030
- end
1031
-
1032
- # Persist the metadata for a {Store::Digest::Object}.
1033
- #
1034
- # @param obj [Store::Digest::Object]
1035
- #
1036
- # @return [void]
1037
- #
1038
- def set_meta obj, preserve: false
1039
- # check if the object has all the hashes
1040
- raise ArgumentError,
1041
- 'Object does not have a complete set of digests' unless
1042
- (algorithms - obj.algorithms).empty?
1043
-
1044
- # since nothing changes in a content-addressable store by
1045
- # definition, the only meaningful changes involve adding
1046
- # information like `type`, `language`, `charset`, `encoding`,
1047
- # and their concomitant checked/valid flags. `size` and `ctime`
1048
- # should never change. `ptime` should be set automatically to
1049
- # `now`, and only if anything else changes. `mtime` should only
1050
- # be changed if `preserve` is false. `dtime`, if present, should
1051
- # be no greater than `now` unless the object is cache. an object
1052
- # with a `dtime` in the past is assumed to be deleted.
1053
-
1054
- @lmdb.transaction do |txn|
1055
- # initial information
1056
- now = Time.now in: ?Z
1057
- ptr = get_ptr(obj, raw: true) || last_key(:entry, raw: true)
1058
- newh = obj.to_h
1059
- oldh = nil
1060
-
1061
- # warn ptr.inspect
1062
-
1063
- # other things we reuse
1064
- delta = 0 # whether we are adding or removing a record
1065
- deleted = newh[:dtime] && newh[:dtime] <= now
1066
- is_cache = !!(newh[:flags] || [])[8] # may not be present
1067
-
1068
- # check if the entry already exists
1069
- if oldrec = @dbs[:entry][ptr]
1070
- oldh = inflate oldrec
1071
-
1072
- # the size and ctime should not change
1073
- newh[:size] = oldh[:size]
1074
- newh[:ctime] = oldh[:ctime]
1075
- newh[:ptime] ||= oldh[:ptime]
1076
- newh[:mtime] = (preserve ? (oldh[:mtime] || newh[:mtime]) :
1077
- (newh[:mtime] || oldh[:mtime])) || now
1078
-
1079
- # only the old value if the new one isn't specified
1080
- %i[type language charset encoding].each do |key|
1081
- newh[key] ||= oldh[key]
1082
- end
1083
-
1084
- # determine if the old record is a tombstone
1085
- tombstone = oldh[:dtime] && oldh[:dtime] <= now
1086
-
1087
- # OKAY HERE IS THE ALL-IMPORTANT CACHE LOGIC:
1088
- #
1089
- # we want it so that a cache object can be "solidified"
1090
- # (turned into a non-cache object), but a non-cache object
1091
- # can't be turned into a cache object. `dtime` is punned for
1092
- # cache objects as an expiration time and is likely (but not
1093
- # guaranteed) to be in the future.
1094
- #
1095
- if was_cache = oldh[:flags][8]
1096
- # we get here if there is no change in the state of the
1097
- # cache, but we could be overwriting a tombstone, so we
1098
- # want to make sure there is an expiration time.
1099
- if is_cache && !newh[:dtime]
1100
- oexp = oldh[:dtime] && oldh[:dtime] > now
1101
- newh[:dtime] = oexp || now + control_get(:expiry)
1102
- end
1103
- elsif is_cache
1104
- # the record is not cache but it could be a tombstone. we
1105
- # can overwrite it with cache if it is, but not if it
1106
- # isn't, because the implication is something is using it.
1107
- if tombstone
1108
- newh[:dtime] ||= now + control_get(:expiry)
1109
- delta = 1
1110
- else
1111
- newh[:dtime] = nil
1112
- is_cache = newh[:flags][8] = false
1113
- delta = 0
1114
- end
1115
- else
1116
- # neither is cache; we are updating something else.
1117
- # this is whatever the old one was
1118
- newh[:dtime] ||= oldh[:dtime] if deleted
1119
- end
1120
-
1121
- # accumulate which parts of the record got changed
1122
- changed = RECORD.keys.select { |k| newh[k] != oldh[k] }
1123
-
1124
- # changed.each do |change|
1125
- # warn "#{change}: #{oldh[change]} -> #{newh[change]}"
1126
- # end
1127
-
1128
- # if this is empty there is nothing to do
1129
- break if changed.empty?
1130
-
1131
- # *now* we can set the ptime
1132
- newh[:ptime] = now if newh[:ptime] == oldh[:ptime]
1133
- changed << :ptime unless changed.include? :ptime
1134
-
1135
- # we don't index the flags
1136
- (changed - [:flags]).each do |k|
1137
- index_rm k, oldh[k], ptr if oldh[k]
1138
- if k == :dtime
1139
- index_rm :etime, oldh[:dtime], ptr if was_cache
1140
- index_add :etime, newh[:dtime], ptr if is_cache
1141
- else
1142
- index_add k, newh[k], ptr if newh[k]
1143
- end
1144
- end
1145
- else
1146
-
1147
- # we are unambiguously adding a thing
1148
- delta = deleted ? 0 : 1
1149
-
1150
- newh[:ctime] ||= now
1151
- newh[:mtime] ||= now
1152
- newh[:ptime] ||= now
1153
- newh[:type] ||= 'application/octet-stream'
1154
-
1155
- # set the algo mappings
1156
- algorithms.each do |algo|
1157
- # warn "setting #{algo} -> #{obj[algo].hexdigest}"
1158
- @dbs[algo].put? obj[algo].digest, ptr
1159
- end
1160
-
1161
- # set the indices
1162
- RECORD.except(:flags).keys.each do |k|
1163
- if newh[k]
1164
- # special case for non-deleted cache
1165
- kk = k == :dtime ? (is_cache && !deleted) ? :etime : :dtime : k
1166
- index_add kk, newh[k], ptr
1167
- end
1168
- end
1169
- end
1170
-
1171
- # okay now we actually set the entry
1172
- @dbs[:entry][ptr] = deflate newh
1173
-
1174
- # now we handle the counts
1175
- if oldrec
1176
- # here we are replacing a record that could be a tombstone,
1177
- # potentially with another tombstone, so we could be adding,
1178
- # removing, or neither.
1179
- control_add :objects, delta
1180
- control_add :deleted, -delta
1181
- control_add :bytes, newh[:size] * delta
1182
- else
1183
- # here we are unconditionally adding a new record, but the
1184
- # record we could be adding could itself be a tombstone.
1185
- control_add :objects, 1
1186
-
1187
- if delta > 0
1188
- # it's an ordinary entry
1189
- control_add :bytes, newh[:size]
1190
- else
1191
- # it's a tombstone
1192
- control_add :deleted, 1
1193
- end
1194
- end
1195
-
1196
- # and finally update the mtime
1197
- control_set :mtime, now
1198
-
1199
- txn.commit
1200
-
1201
- newh
1202
- end
1203
- end
1204
-
1205
- # Set `dtime` to the current timestamp and update the indices and stats.
1206
- #
1207
- # @param obj [Store::Digest::Object, Hash, URI::NI, Integer] the
1208
- # entry's key, or an object from which it can be resolved
1209
- #
1210
- # @return [Hash, nil] the record, if it exists
1211
- #
1212
- def mark_meta_deleted obj
1213
- @lmdb.transaction do
1214
- # nothing to do if there's no entry
1215
- ptr = get_ptr(obj, raw: true) or break
1216
- rec = get_meta ptr
1217
- now = Time.now in: ?Z
1218
-
1219
- # it's already deleted and we don't need to do anything
1220
- break if rec[:dtime] and rec[:dtime] < now
1221
-
1222
- # grab this to get the index
1223
- old = rec[:dtime]
1224
-
1225
- # set the new dtime
1226
- rec[:dtime] = now
1227
-
1228
- # update the entry
1229
- @dbs[:entry][ptr] = deflate rec
1230
-
1231
- # deal with the indices
1232
- %i[dtime etime].each { |k| index_rm k, old, ptr } if old
1233
- index_add :dtime, now, ptr
1234
-
1235
- # deal with the stats/mtime
1236
- control_add :deleted, 1
1237
- control_add :bytes, -rec[:size]
1238
- control_set :mtime, now
1239
-
1240
- rec
1241
- end
1242
- end
1243
-
1244
- # Purge the metadata entry from the database and remove it from
1245
- # the indices.
1246
- #
1247
- # @param obj [Store::Digest::Object, Hash, URI::NI, Integer] the
1248
- # entry's key, or an object from which it can be resolved
1249
- #
1250
- # @return [Hash, nil] the record, if it exists
1251
- #
1252
- def remove_meta obj
1253
- @lmdb.transaction do
1254
- # nothing to do if there's no entry
1255
- ptr = get_ptr(obj) or break
1256
- rec = get_meta ptr
1257
- now = Time.now in: ?Z
1258
-
1259
- # overwrite the dtime
1260
- deleted = rec[:dtime] and rec[:dtime] < now
1261
- rec[:dtime] = now
1262
-
1263
- # deal with indices
1264
- RECORD.merge({etime: nil}).except(:flags).keys.each do |key|
1265
- index_rm key, rec[key], ptr
1266
- end
1267
-
1268
- # deal with the hashes
1269
- algorithms.each do |algo|
1270
- # XXX this *should* match?
1271
- uri = rec[:digests][algo]
1272
- @dbs[algo].delete? uri.digest, ptr
1273
- end
1274
-
1275
- # deal with stats
1276
- control_add :objects, -1
1277
- if deleted
1278
- control_add :deleted, -1
1279
- else
1280
- control_add :bytes, -rec[:size]
1281
- end
1282
-
1283
- # the deleted record
1284
- rec
1285
- end
1286
- end
1287
- end
1288
-
1289
307
  end