cc12703-diskcache 5.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
diskcache/fanout.py ADDED
@@ -0,0 +1,687 @@
1
+ """Fanout cache automatically shards keys and values."""
2
+
3
+ import contextlib as cl
4
+ import functools
5
+ import itertools as it
6
+ import operator
7
+ import os.path as op
8
+ import sqlite3
9
+ import tempfile
10
+ import time
11
+
12
+ from .core import DEFAULT_SETTINGS, ENOVAL, Cache, Disk, Timeout
13
+ from .persistent import Deque, Index
14
+
15
+
16
+ class FanoutCache:
17
+ """Cache that shards keys and values."""
18
+
19
+ def __init__(
20
+ self, directory=None, shards=8, timeout=0.010, disk=Disk, **settings
21
+ ):
22
+ """Initialize cache instance.
23
+
24
+ :param str directory: cache directory
25
+ :param int shards: number of shards to distribute writes
26
+ :param float timeout: SQLite connection timeout
27
+ :param disk: `Disk` instance for serialization
28
+ :param settings: any of `DEFAULT_SETTINGS`
29
+
30
+ """
31
+ if directory is None:
32
+ directory = tempfile.mkdtemp(prefix='diskcache-')
33
+ directory = str(directory)
34
+ directory = op.expanduser(directory)
35
+ directory = op.expandvars(directory)
36
+
37
+ default_size_limit = DEFAULT_SETTINGS['size_limit']
38
+ size_limit = settings.pop('size_limit', default_size_limit) / shards
39
+
40
+ self._count = shards
41
+ self._directory = directory
42
+ self._disk = disk
43
+ self._shards = tuple(
44
+ Cache(
45
+ directory=op.join(directory, '%03d' % num),
46
+ timeout=timeout,
47
+ disk=disk,
48
+ size_limit=size_limit,
49
+ **settings,
50
+ )
51
+ for num in range(shards)
52
+ )
53
+ self._hash = self._shards[0].disk.hash
54
+ self._caches = {}
55
+ self._deques = {}
56
+ self._indexes = {}
57
+
58
+ @property
59
+ def directory(self):
60
+ """Cache directory."""
61
+ return self._directory
62
+
63
+ def __getattr__(self, name):
64
+ safe_names = {'timeout', 'disk'}
65
+ valid_name = name in DEFAULT_SETTINGS or name in safe_names
66
+ assert valid_name, 'cannot access {} in cache shard'.format(name)
67
+ return getattr(self._shards[0], name)
68
+
69
+ @cl.contextmanager
70
+ def transact(self, retry=True):
71
+ """Context manager to perform a transaction by locking the cache.
72
+
73
+ While the cache is locked, no other write operation is permitted.
74
+ Transactions should therefore be as short as possible. Read and write
75
+ operations performed in a transaction are atomic. Read operations may
76
+ occur concurrent to a transaction.
77
+
78
+ Transactions may be nested and may not be shared between threads.
79
+
80
+ Blocks until transactions are held on all cache shards by retrying as
81
+ necessary.
82
+
83
+ >>> cache = FanoutCache()
84
+ >>> with cache.transact(): # Atomically increment two keys.
85
+ ... _ = cache.incr('total', 123.4)
86
+ ... _ = cache.incr('count', 1)
87
+ >>> with cache.transact(): # Atomically calculate average.
88
+ ... average = cache['total'] / cache['count']
89
+ >>> average
90
+ 123.4
91
+
92
+ :return: context manager for use in `with` statement
93
+
94
+ """
95
+ assert retry, 'retry must be True in FanoutCache'
96
+ with cl.ExitStack() as stack:
97
+ for shard in self._shards:
98
+ shard_transaction = shard.transact(retry=True)
99
+ stack.enter_context(shard_transaction)
100
+ yield
101
+
102
+ def set(self, key, value, expire=None, read=False, tag=None, retry=False):
103
+ """Set `key` and `value` item in cache.
104
+
105
+ When `read` is `True`, `value` should be a file-like object opened
106
+ for reading in binary mode.
107
+
108
+ If database timeout occurs then fails silently unless `retry` is set to
109
+ `True` (default `False`).
110
+
111
+ :param key: key for item
112
+ :param value: value for item
113
+ :param float expire: seconds until the key expires
114
+ (default None, no expiry)
115
+ :param bool read: read value as raw bytes from file (default False)
116
+ :param str tag: text to associate with key (default None)
117
+ :param bool retry: retry if database timeout occurs (default False)
118
+ :return: True if item was set
119
+
120
+ """
121
+ index = self._hash(key) % self._count
122
+ shard = self._shards[index]
123
+ try:
124
+ return shard.set(key, value, expire, read, tag, retry)
125
+ except Timeout:
126
+ return False
127
+
128
+ def __setitem__(self, key, value):
129
+ """Set `key` and `value` item in cache.
130
+
131
+ Calls :func:`FanoutCache.set` internally with `retry` set to `True`.
132
+
133
+ :param key: key for item
134
+ :param value: value for item
135
+
136
+ """
137
+ index = self._hash(key) % self._count
138
+ shard = self._shards[index]
139
+ shard[key] = value
140
+
141
+ def touch(self, key, expire=None, retry=False):
142
+ """Touch `key` in cache and update `expire` time.
143
+
144
+ If database timeout occurs then fails silently unless `retry` is set to
145
+ `True` (default `False`).
146
+
147
+ :param key: key for item
148
+ :param float expire: seconds until the key expires
149
+ (default None, no expiry)
150
+ :param bool retry: retry if database timeout occurs (default False)
151
+ :return: True if key was touched
152
+
153
+ """
154
+ index = self._hash(key) % self._count
155
+ shard = self._shards[index]
156
+ try:
157
+ return shard.touch(key, expire, retry)
158
+ except Timeout:
159
+ return False
160
+
161
+ def add(self, key, value, expire=None, read=False, tag=None, retry=False):
162
+ """Add `key` and `value` item to cache.
163
+
164
+ Similar to `set`, but only add to cache if key not present.
165
+
166
+ This operation is atomic. Only one concurrent add operation for given
167
+ key from separate threads or processes will succeed.
168
+
169
+ When `read` is `True`, `value` should be a file-like object opened
170
+ for reading in binary mode.
171
+
172
+ If database timeout occurs then fails silently unless `retry` is set to
173
+ `True` (default `False`).
174
+
175
+ :param key: key for item
176
+ :param value: value for item
177
+ :param float expire: seconds until the key expires
178
+ (default None, no expiry)
179
+ :param bool read: read value as bytes from file (default False)
180
+ :param str tag: text to associate with key (default None)
181
+ :param bool retry: retry if database timeout occurs (default False)
182
+ :return: True if item was added
183
+
184
+ """
185
+ index = self._hash(key) % self._count
186
+ shard = self._shards[index]
187
+ try:
188
+ return shard.add(key, value, expire, read, tag, retry)
189
+ except Timeout:
190
+ return False
191
+
192
+ def incr(self, key, delta=1, default=0, retry=False):
193
+ """Increment value by delta for item with key.
194
+
195
+ If key is missing and default is None then raise KeyError. Else if key
196
+ is missing and default is not None then use default for value.
197
+
198
+ Operation is atomic. All concurrent increment operations will be
199
+ counted individually.
200
+
201
+ Assumes value may be stored in a SQLite column. Most builds that target
202
+ machines with 64-bit pointer widths will support 64-bit signed
203
+ integers.
204
+
205
+ If database timeout occurs then fails silently unless `retry` is set to
206
+ `True` (default `False`).
207
+
208
+ :param key: key for item
209
+ :param int delta: amount to increment (default 1)
210
+ :param int default: value if key is missing (default 0)
211
+ :param bool retry: retry if database timeout occurs (default False)
212
+ :return: new value for item on success else None
213
+ :raises KeyError: if key is not found and default is None
214
+
215
+ """
216
+ index = self._hash(key) % self._count
217
+ shard = self._shards[index]
218
+ try:
219
+ return shard.incr(key, delta, default, retry)
220
+ except Timeout:
221
+ return None
222
+
223
+ def decr(self, key, delta=1, default=0, retry=False):
224
+ """Decrement value by delta for item with key.
225
+
226
+ If key is missing and default is None then raise KeyError. Else if key
227
+ is missing and default is not None then use default for value.
228
+
229
+ Operation is atomic. All concurrent decrement operations will be
230
+ counted individually.
231
+
232
+ Unlike Memcached, negative values are supported. Value may be
233
+ decremented below zero.
234
+
235
+ Assumes value may be stored in a SQLite column. Most builds that target
236
+ machines with 64-bit pointer widths will support 64-bit signed
237
+ integers.
238
+
239
+ If database timeout occurs then fails silently unless `retry` is set to
240
+ `True` (default `False`).
241
+
242
+ :param key: key for item
243
+ :param int delta: amount to decrement (default 1)
244
+ :param int default: value if key is missing (default 0)
245
+ :param bool retry: retry if database timeout occurs (default False)
246
+ :return: new value for item on success else None
247
+ :raises KeyError: if key is not found and default is None
248
+
249
+ """
250
+ index = self._hash(key) % self._count
251
+ shard = self._shards[index]
252
+ try:
253
+ return shard.decr(key, delta, default, retry)
254
+ except Timeout:
255
+ return None
256
+
257
+ def get(
258
+ self,
259
+ key,
260
+ default=None,
261
+ read=False,
262
+ expire_time=False,
263
+ tag=False,
264
+ retry=False,
265
+ ):
266
+ """Retrieve value from cache. If `key` is missing, return `default`.
267
+
268
+ If database timeout occurs then returns `default` unless `retry` is set
269
+ to `True` (default `False`).
270
+
271
+ :param key: key for item
272
+ :param default: return value if key is missing (default None)
273
+ :param bool read: if True, return file handle to value
274
+ (default False)
275
+ :param float expire_time: if True, return expire_time in tuple
276
+ (default False)
277
+ :param tag: if True, return tag in tuple (default False)
278
+ :param bool retry: retry if database timeout occurs (default False)
279
+ :return: value for item if key is found else default
280
+
281
+ """
282
+ index = self._hash(key) % self._count
283
+ shard = self._shards[index]
284
+ try:
285
+ return shard.get(key, default, read, expire_time, tag, retry)
286
+ except (Timeout, sqlite3.OperationalError):
287
+ return default
288
+
289
+ def __getitem__(self, key):
290
+ """Return corresponding value for `key` from cache.
291
+
292
+ Calls :func:`FanoutCache.get` internally with `retry` set to `True`.
293
+
294
+ :param key: key for item
295
+ :return: value for item
296
+ :raises KeyError: if key is not found
297
+
298
+ """
299
+ index = self._hash(key) % self._count
300
+ shard = self._shards[index]
301
+ return shard[key]
302
+
303
+ def read(self, key):
304
+ """Return file handle corresponding to `key` from cache.
305
+
306
+ :param key: key for item
307
+ :return: file open for reading in binary mode
308
+ :raises KeyError: if key is not found
309
+
310
+ """
311
+ handle = self.get(key, default=ENOVAL, read=True, retry=True)
312
+ if handle is ENOVAL:
313
+ raise KeyError(key)
314
+ return handle
315
+
316
+ def __contains__(self, key):
317
+ """Return `True` if `key` matching item is found in cache.
318
+
319
+ :param key: key for item
320
+ :return: True if key is found
321
+
322
+ """
323
+ index = self._hash(key) % self._count
324
+ shard = self._shards[index]
325
+ return key in shard
326
+
327
+ def pop(
328
+ self, key, default=None, expire_time=False, tag=False, retry=False
329
+ ): # noqa: E501
330
+ """Remove corresponding item for `key` from cache and return value.
331
+
332
+ If `key` is missing, return `default`.
333
+
334
+ Operation is atomic. Concurrent operations will be serialized.
335
+
336
+ If database timeout occurs then fails silently unless `retry` is set to
337
+ `True` (default `False`).
338
+
339
+ :param key: key for item
340
+ :param default: return value if key is missing (default None)
341
+ :param float expire_time: if True, return expire_time in tuple
342
+ (default False)
343
+ :param tag: if True, return tag in tuple (default False)
344
+ :param bool retry: retry if database timeout occurs (default False)
345
+ :return: value for item if key is found else default
346
+
347
+ """
348
+ index = self._hash(key) % self._count
349
+ shard = self._shards[index]
350
+ try:
351
+ return shard.pop(key, default, expire_time, tag, retry)
352
+ except Timeout:
353
+ return default
354
+
355
+ def delete(self, key, retry=False):
356
+ """Delete corresponding item for `key` from cache.
357
+
358
+ Missing keys are ignored.
359
+
360
+ If database timeout occurs then fails silently unless `retry` is set to
361
+ `True` (default `False`).
362
+
363
+ :param key: key for item
364
+ :param bool retry: retry if database timeout occurs (default False)
365
+ :return: True if item was deleted
366
+
367
+ """
368
+ index = self._hash(key) % self._count
369
+ shard = self._shards[index]
370
+ try:
371
+ return shard.delete(key, retry)
372
+ except Timeout:
373
+ return False
374
+
375
+ def __delitem__(self, key):
376
+ """Delete corresponding item for `key` from cache.
377
+
378
+ Calls :func:`FanoutCache.delete` internally with `retry` set to `True`.
379
+
380
+ :param key: key for item
381
+ :raises KeyError: if key is not found
382
+
383
+ """
384
+ index = self._hash(key) % self._count
385
+ shard = self._shards[index]
386
+ del shard[key]
387
+
388
+ def check(self, fix=False, retry=False):
389
+ """Check database and file system consistency.
390
+
391
+ Intended for use in testing and post-mortem error analysis.
392
+
393
+ While checking the cache table for consistency, a writer lock is held
394
+ on the database. The lock blocks other cache clients from writing to
395
+ the database. For caches with many file references, the lock may be
396
+ held for a long time. For example, local benchmarking shows that a
397
+ cache with 1,000 file references takes ~60ms to check.
398
+
399
+ If database timeout occurs then fails silently unless `retry` is set to
400
+ `True` (default `False`).
401
+
402
+ :param bool fix: correct inconsistencies
403
+ :param bool retry: retry if database timeout occurs (default False)
404
+ :return: list of warnings
405
+ :raises Timeout: if database timeout occurs
406
+
407
+ """
408
+ warnings = (shard.check(fix, retry) for shard in self._shards)
409
+ return functools.reduce(operator.iadd, warnings, [])
410
+
411
+ def expire(self, retry=False):
412
+ """Remove expired items from cache.
413
+
414
+ If database timeout occurs then fails silently unless `retry` is set to
415
+ `True` (default `False`).
416
+
417
+ :param bool retry: retry if database timeout occurs (default False)
418
+ :return: count of items removed
419
+
420
+ """
421
+ return self._remove('expire', args=(time.time(),), retry=retry)
422
+
423
+ def create_tag_index(self):
424
+ """Create tag index on cache database.
425
+
426
+ Better to initialize cache with `tag_index=True` than use this.
427
+
428
+ :raises Timeout: if database timeout occurs
429
+
430
+ """
431
+ for shard in self._shards:
432
+ shard.create_tag_index()
433
+
434
+ def drop_tag_index(self):
435
+ """Drop tag index on cache database.
436
+
437
+ :raises Timeout: if database timeout occurs
438
+
439
+ """
440
+ for shard in self._shards:
441
+ shard.drop_tag_index()
442
+
443
+ def evict(self, tag, retry=False):
444
+ """Remove items with matching `tag` from cache.
445
+
446
+ If database timeout occurs then fails silently unless `retry` is set to
447
+ `True` (default `False`).
448
+
449
+ :param str tag: tag identifying items
450
+ :param bool retry: retry if database timeout occurs (default False)
451
+ :return: count of items removed
452
+
453
+ """
454
+ return self._remove('evict', args=(tag,), retry=retry)
455
+
456
+ def cull(self, retry=False):
457
+ """Cull items from cache until volume is less than size limit.
458
+
459
+ If database timeout occurs then fails silently unless `retry` is set to
460
+ `True` (default `False`).
461
+
462
+ :param bool retry: retry if database timeout occurs (default False)
463
+ :return: count of items removed
464
+
465
+ """
466
+ return self._remove('cull', retry=retry)
467
+
468
+ def clear(self, retry=False):
469
+ """Remove all items from cache.
470
+
471
+ If database timeout occurs then fails silently unless `retry` is set to
472
+ `True` (default `False`).
473
+
474
+ :param bool retry: retry if database timeout occurs (default False)
475
+ :return: count of items removed
476
+
477
+ """
478
+ return self._remove('clear', retry=retry)
479
+
480
+ def _remove(self, name, args=(), retry=False):
481
+ total = 0
482
+ for shard in self._shards:
483
+ method = getattr(shard, name)
484
+ while True:
485
+ try:
486
+ count = method(*args, retry=retry)
487
+ total += count
488
+ except Timeout as timeout:
489
+ total += timeout.args[0]
490
+ else:
491
+ break
492
+ return total
493
+
494
+ def stats(self, enable=True, reset=False):
495
+ """Return cache statistics hits and misses.
496
+
497
+ :param bool enable: enable collecting statistics (default True)
498
+ :param bool reset: reset hits and misses to 0 (default False)
499
+ :return: (hits, misses)
500
+
501
+ """
502
+ results = [shard.stats(enable, reset) for shard in self._shards]
503
+ total_hits = sum(hits for hits, _ in results)
504
+ total_misses = sum(misses for _, misses in results)
505
+ return total_hits, total_misses
506
+
507
+ def volume(self):
508
+ """Return estimated total size of cache on disk.
509
+
510
+ :return: size in bytes
511
+
512
+ """
513
+ return sum(shard.volume() for shard in self._shards)
514
+
515
+ def close(self):
516
+ """Close database connection."""
517
+ for shard in self._shards:
518
+ shard.close()
519
+ self._caches.clear()
520
+ self._deques.clear()
521
+ self._indexes.clear()
522
+
523
+ def __enter__(self):
524
+ return self
525
+
526
+ def __exit__(self, *exception):
527
+ self.close()
528
+
529
+ def __getstate__(self):
530
+ return (self._directory, self._count, self.timeout, type(self.disk))
531
+
532
+ def __setstate__(self, state):
533
+ self.__init__(*state)
534
+
535
+ def __iter__(self):
536
+ """Iterate keys in cache including expired items."""
537
+ iterators = (iter(shard) for shard in self._shards)
538
+ return it.chain.from_iterable(iterators)
539
+
540
+ def __reversed__(self):
541
+ """Reverse iterate keys in cache including expired items."""
542
+ iterators = (reversed(shard) for shard in reversed(self._shards))
543
+ return it.chain.from_iterable(iterators)
544
+
545
+ def __len__(self):
546
+ """Count of items in cache including expired items."""
547
+ return sum(len(shard) for shard in self._shards)
548
+
549
+ def reset(self, key, value=ENOVAL):
550
+ """Reset `key` and `value` item from Settings table.
551
+
552
+ If `value` is not given, it is reloaded from the Settings
553
+ table. Otherwise, the Settings table is updated.
554
+
555
+ Settings attributes on cache objects are lazy-loaded and
556
+ read-only. Use `reset` to update the value.
557
+
558
+ Settings with the ``sqlite_`` prefix correspond to SQLite
559
+ pragmas. Updating the value will execute the corresponding PRAGMA
560
+ statement.
561
+
562
+ :param str key: Settings key for item
563
+ :param value: value for item (optional)
564
+ :return: updated value for item
565
+
566
+ """
567
+ for shard in self._shards:
568
+ while True:
569
+ try:
570
+ result = shard.reset(key, value)
571
+ except Timeout:
572
+ pass
573
+ else:
574
+ break
575
+ return result
576
+
577
+ def cache(self, name, timeout=60, disk=None, **settings):
578
+ """Return Cache with given `name` in subdirectory.
579
+
580
+ If disk is none (default), uses the fanout cache disk.
581
+
582
+ >>> fanout_cache = FanoutCache()
583
+ >>> cache = fanout_cache.cache('test')
584
+ >>> cache.set('abc', 123)
585
+ True
586
+ >>> cache.get('abc')
587
+ 123
588
+ >>> len(cache)
589
+ 1
590
+ >>> cache.delete('abc')
591
+ True
592
+
593
+ :param str name: subdirectory name for Cache
594
+ :param float timeout: SQLite connection timeout
595
+ :param disk: Disk type or subclass for serialization
596
+ :param settings: any of DEFAULT_SETTINGS
597
+ :return: Cache with given name
598
+
599
+ """
600
+ _caches = self._caches
601
+
602
+ try:
603
+ return _caches[name]
604
+ except KeyError:
605
+ parts = name.split('/')
606
+ directory = op.join(self._directory, 'cache', *parts)
607
+ temp = Cache(
608
+ directory=directory,
609
+ timeout=timeout,
610
+ disk=self._disk if disk is None else Disk,
611
+ **settings,
612
+ )
613
+ _caches[name] = temp
614
+ return temp
615
+
616
+ def deque(self, name, maxlen=None):
617
+ """Return Deque with given `name` in subdirectory.
618
+
619
+ >>> cache = FanoutCache()
620
+ >>> deque = cache.deque('test')
621
+ >>> deque.extend('abc')
622
+ >>> deque.popleft()
623
+ 'a'
624
+ >>> deque.pop()
625
+ 'c'
626
+ >>> len(deque)
627
+ 1
628
+
629
+ :param str name: subdirectory name for Deque
630
+ :param maxlen: max length (default None, no max)
631
+ :return: Deque with given name
632
+
633
+ """
634
+ _deques = self._deques
635
+
636
+ try:
637
+ return _deques[name]
638
+ except KeyError:
639
+ parts = name.split('/')
640
+ directory = op.join(self._directory, 'deque', *parts)
641
+ cache = Cache(
642
+ directory=directory,
643
+ disk=self._disk,
644
+ eviction_policy='none',
645
+ )
646
+ deque = Deque.fromcache(cache, maxlen=maxlen)
647
+ _deques[name] = deque
648
+ return deque
649
+
650
+ def index(self, name):
651
+ """Return Index with given `name` in subdirectory.
652
+
653
+ >>> cache = FanoutCache()
654
+ >>> index = cache.index('test')
655
+ >>> index['abc'] = 123
656
+ >>> index['def'] = 456
657
+ >>> index['ghi'] = 789
658
+ >>> index.popitem()
659
+ ('ghi', 789)
660
+ >>> del index['abc']
661
+ >>> len(index)
662
+ 1
663
+ >>> index['def']
664
+ 456
665
+
666
+ :param str name: subdirectory name for Index
667
+ :return: Index with given name
668
+
669
+ """
670
+ _indexes = self._indexes
671
+
672
+ try:
673
+ return _indexes[name]
674
+ except KeyError:
675
+ parts = name.split('/')
676
+ directory = op.join(self._directory, 'index', *parts)
677
+ cache = Cache(
678
+ directory=directory,
679
+ disk=self._disk,
680
+ eviction_policy='none',
681
+ )
682
+ index = Index.fromcache(cache)
683
+ _indexes[name] = index
684
+ return index
685
+
686
+
687
+ FanoutCache.memoize = Cache.memoize # type: ignore