cs-fileutils 20240630__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cs/fileutils.py ADDED
@@ -0,0 +1,1871 @@
1
+ #!/usr/bin/python
2
+ #
3
+ # Assorted convenience functions for files and filenames/pathnames.
4
+ # - Cameron Simpson <cs@cskk.id.au>
5
+
6
+ ''' My grab bag of convenience functions for files and filenames/pathnames.
7
+ '''
8
+
9
+ # pylint: disable=too-many-lines
10
+
11
+ from __future__ import with_statement, print_function, absolute_import
12
+ from contextlib import contextmanager
13
+ import errno
14
+ from functools import partial
15
+ import gzip
16
+ import os
17
+ from os import SEEK_CUR, SEEK_END, SEEK_SET, O_RDONLY, read, rename
18
+ try:
19
+ from os import pread
20
+ except ImportError:
21
+ pread = None
22
+ from os.path import (
23
+ abspath,
24
+ basename,
25
+ dirname,
26
+ exists as existspath,
27
+ isabs as isabspath,
28
+ isdir,
29
+ join as joinpath,
30
+ splitext,
31
+ )
32
+ import shutil
33
+ import stat
34
+ import sys
35
+ from tempfile import TemporaryFile, NamedTemporaryFile, mkstemp
36
+ from threading import Lock, RLock
37
+ import time
38
+
39
+ from cs.buffer import CornuCopyBuffer
40
+ from cs.context import stackattrs
41
+ from cs.deco import cachedmethod, decorator, fmtdoc, strable
42
+ from cs.filestate import FileState
43
+ from cs.fs import shortpath
44
+ from cs.gimmicks import TimeoutError # pylint: disable=redefined-builtin
45
+ from cs.lex import as_lines, cutsuffix, common_prefix
46
+ from cs.logutils import error, warning, debug
47
+ from cs.pfx import Pfx, pfx, pfx_call
48
+ from cs.progress import Progress, progressbar
49
+ from cs.py3 import ustr, bytes, pread # pylint: disable=redefined-builtin
50
+ from cs.range import Range
51
+ from cs.resources import RunState, uses_runstate
52
+ from cs.result import CancellationError
53
+ from cs.threads import locked
54
+ from cs.units import BINARY_BYTES_SCALE
55
+
56
+ __version__ = '20240630'
57
+
58
+ DISTINFO = {
59
+ 'keywords': ["python2", "python3"],
60
+ 'classifiers': [
61
+ "Programming Language :: Python",
62
+ "Programming Language :: Python :: 2",
63
+ "Programming Language :: Python :: 3",
64
+ ],
65
+ 'install_requires': [
66
+ 'cs.buffer',
67
+ 'cs.context',
68
+ 'cs.deco',
69
+ 'cs.filestate',
70
+ 'cs.fs>=shortpath',
71
+ 'cs.gimmicks>=TimeoutError',
72
+ 'cs.lex>=20200914',
73
+ 'cs.logutils',
74
+ 'cs.pfx>=pfx_call',
75
+ 'cs.progress',
76
+ 'cs.py3',
77
+ 'cs.range',
78
+ 'cs.resources',
79
+ 'cs.result',
80
+ 'cs.threads',
81
+ 'cs.units',
82
+ ],
83
+ }
84
+
85
+ DEFAULT_POLL_INTERVAL = 1.0
86
+ DEFAULT_READSIZE = 131072
87
+ DEFAULT_TAIL_PAUSE = 0.25
88
+
89
+ def seekable(fp):
90
+ ''' Try to test whether a filelike object is seekable.
91
+
92
+ First try the `IOBase.seekable` method, otherwise try getting a file
93
+ descriptor from `fp.fileno` and `os.stat()`ing that,
94
+ otherwise return `False`.
95
+ '''
96
+ try:
97
+ test = fp.seekable
98
+ except AttributeError:
99
+ try:
100
+ getfd = fp.fileno
101
+ except AttributeError:
102
+ return False
103
+ test = lambda: stat.S_ISREG(os.fstat(getfd()).st_mode)
104
+ return test()
105
+
106
+ def saferename(oldpath, newpath):
107
+ ''' Rename a path using `os.rename()`,
108
+ but raise an exception if the target path already exists.
109
+ Note: slightly racey.
110
+ '''
111
+ try:
112
+ os.lstat(newpath)
113
+ raise OSError(errno.EEXIST)
114
+ except OSError as e:
115
+ if e.errno != errno.ENOENT:
116
+ raise
117
+ os.rename(oldpath, newpath)
118
+
119
+ def trysaferename(oldpath, newpath):
120
+ ''' A `saferename()` that returns `True` on success,
121
+ `False` on failure.
122
+ '''
123
+ try:
124
+ saferename(oldpath, newpath)
125
+ except OSError:
126
+ return False
127
+ ##except Exception:
128
+ ## raise
129
+ return True
130
+
131
+ def compare(f1, f2, mode="rb"):
132
+ ''' Compare the contents of two file-like objects `f1` and `f2` for equality.
133
+
134
+ If `f1` or `f2` is a string, open the named file using `mode`
135
+ (default: `"rb"`).
136
+ '''
137
+ if isinstance(f1, str):
138
+ with open(f1, mode) as f1fp:
139
+ return compare(f1fp, f2, mode)
140
+ if isinstance(f2, str):
141
+ with open(f2, mode) as f2fp:
142
+ return compare(f1, f2fp, mode)
143
+ return f1.read() == f2.read()
144
+
145
+ # pylint: disable=too-many-locals,too-many-branches,too-many-statements
146
+ @contextmanager
147
+ def NamedTemporaryCopy(f, progress=False, progress_label=None, **kw):
148
+ ''' A context manager yielding a temporary copy of `filename`
149
+ as returned by `NamedTemporaryFile(**kw)`.
150
+
151
+ Parameters:
152
+ * `f`: the name of the file to copy, or an open binary file,
153
+ or a `CornuCopyBuffer`
154
+ * `progress`: an optional progress indicator, default `False`;
155
+ if a `bool`, show a progress bar for the copy phase if true;
156
+ if an `int`, show a progress bar for the copy phase
157
+ if the file size equals or exceeds the value;
158
+ otherwise it should be a `cs.progress.Progress` instance
159
+ * `progress_label`: option progress bar label,
160
+ only used if a progress bar is made
161
+ Other keyword parameters are passed to `tempfile.NamedTemporaryFile`.
162
+ '''
163
+ if isinstance(f, str):
164
+ # copy named file
165
+ filename = f
166
+ progress_label = (
167
+ "copy " + repr(filename) if progress_label is None else progress_label
168
+ )
169
+ # should we use shutil.copy() and display no progress?
170
+ if progress is False:
171
+ fast_mode = True
172
+ else:
173
+ with Pfx("stat(%r)", filename):
174
+ S = os.stat(filename)
175
+ fast_mode = stat.S_ISREG(S.st_mode)
176
+ if fast_mode:
177
+ with NamedTemporaryFile(**kw) as T:
178
+ with Pfx("shutil.copy(%r,%r)", filename, T.name):
179
+ shutil.copy(filename, T.name)
180
+ yield T
181
+ else:
182
+ with Pfx("open(%r)", filename):
183
+ with open(filename, 'rb') as f2:
184
+ with NamedTemporaryCopy(f2, progress=progress,
185
+ progress_label=progress_label, **kw) as T:
186
+ yield T
187
+ return
188
+ prefix = kw.pop('prefix', None)
189
+ if prefix is None:
190
+ prefix = 'NamedTemporaryCopy'
191
+ # prepare the buffer and try to infer the length
192
+ if isinstance(f, CornuCopyBuffer):
193
+ length = None
194
+ bfr = f
195
+ else:
196
+ if isinstance(f, int):
197
+ fd = f
198
+ bfr = CornuCopyBuffer.from_fd(fd)
199
+ else:
200
+ bfr = CornuCopyBuffer.from_file(f)
201
+ try:
202
+ fd = f.fileno()
203
+ except AttributeError:
204
+ fd = None
205
+ if fd is None:
206
+ length = None
207
+ else:
208
+ S = os.fstat(fd)
209
+ length = S.st_size if stat.S_ISREG(S.st_mode) else None
210
+ # determine whether we need a progress bar
211
+ if isinstance(progress, bool):
212
+ need_bar = progress
213
+ progress = None
214
+ elif isinstance(progress, int):
215
+ need_bar = length is None or length >= progress
216
+ progress = None
217
+ else:
218
+ need_bar = False
219
+ assert isinstance(progress, Progress)
220
+ with NamedTemporaryFile(prefix=prefix, **kw) as T:
221
+ it = (
222
+ bfr if need_bar else progressbar(
223
+ bfr,
224
+ label=progress_label,
225
+ total=length,
226
+ itemlenfunc=len,
227
+ units_scale=BINARY_BYTES_SCALE,
228
+ )
229
+ )
230
+ nbs = 0
231
+ for bs in it:
232
+ while bs:
233
+ nwritten = T.write(bs)
234
+ if progress is not None:
235
+ progress += nwritten
236
+ if nwritten != len(bs):
237
+ warning(
238
+ "NamedTemporaryCopy: %r.write(%d bytes) => %d",
239
+ T.name,
240
+ len(bs),
241
+ nwritten,
242
+ )
243
+ bs = bs[nwritten:]
244
+ else:
245
+ bs = b''
246
+ nbs += nwritten
247
+ bfr.close()
248
+ T.flush()
249
+ if length is not None and nbs != length:
250
+ warning(
251
+ "NamedTemporaryCopy: given length=%s, wrote %d bytes to %r",
252
+ length,
253
+ nbs,
254
+ T.name,
255
+ )
256
+ yield T
257
+
258
+ # pylint: disable=too-many-arguments
259
+ def rewrite(
260
+ filepath,
261
+ srcf,
262
+ mode='w',
263
+ backup_ext=None,
264
+ do_rename=False,
265
+ do_diff=None,
266
+ empty_ok=False,
267
+ overwrite_anyway=False
268
+ ):
269
+ ''' Rewrite the file `filepath` with data from the file object `srcf`.
270
+
271
+ Parameters:
272
+ * `filepath`: the name of the file to rewrite.
273
+ * `srcf`: the source file containing the new content.
274
+ * `mode`: the write-mode for the file, default `'w'` (for text);
275
+ use `'wb'` for binary data.
276
+ * `empty_ok`: if true (default `False`),
277
+ do not raise `ValueError` if the new data are empty.
278
+ * `overwrite_anyway`: if true (default `False`),
279
+ skip the content check and overwrite unconditionally.
280
+ * `backup_ext`: if a nonempty string,
281
+ take a backup of the original at `filepath + backup_ext`.
282
+ * `do_diff`: if not `None`, call `do_diff(filepath,tempfile)`.
283
+ * `do_rename`: if true (default `False`),
284
+ rename the temp file to `filepath`
285
+ after copying the permission bits.
286
+ Otherwise (default), copy the tempfile to `filepath`;
287
+ this preserves the file's inode and permissions etc.
288
+ '''
289
+ with Pfx("rewrite(%r)", filepath):
290
+ with NamedTemporaryFile(dir=dirname(filepath), mode=mode) as T:
291
+ T.write(srcf.read())
292
+ T.flush()
293
+ if not empty_ok:
294
+ st = os.stat(T.name)
295
+ if st.st_size == 0:
296
+ raise ValueError("no data in temp file")
297
+ if do_diff or not overwrite_anyway:
298
+ # need to compare data
299
+ if compare(T.name, filepath):
300
+ # data the same, do nothing
301
+ return
302
+ if do_diff:
303
+ # call the supplied differ
304
+ do_diff(filepath, T.name)
305
+ if do_rename:
306
+ # rename new file into old path
307
+ # tries to preserve perms, but does nothing for other metadata
308
+ shutil.copymode(filepath, T.name)
309
+ if backup_ext:
310
+ os.link(filepath, filepath + backup_ext)
311
+ os.rename(T.name, filepath)
312
+ else:
313
+ # overwrite old file - preserves perms, ownership, hard links
314
+ if backup_ext:
315
+ shutil.copy2(filepath, filepath + backup_ext)
316
+ shutil.copyfile(T.name, filepath)
317
+
318
+ @contextmanager
319
+ def rewrite_cmgr(filepath, mode='w', **kw):
320
+ ''' Rewrite a file, presented as a context manager.
321
+
322
+ Parameters:
323
+ * `mode`: file write mode, defaulting to "w" for text.
324
+
325
+ Other keyword parameters are passed to `rewrite()`.
326
+
327
+ Example:
328
+
329
+ with rewrite_cmgr(pathname, do_rename=True) as f:
330
+ ... write new content to f ...
331
+ '''
332
+ with NamedTemporaryFile(mode=mode) as T:
333
+ yield T
334
+ T.flush()
335
+ with open(T.name, 'rb') as f:
336
+ rewrite(filepath, mode='wb', srcf=f, **kw)
337
+
338
+ def abspath_from_file(path, from_file):
339
+ ''' Return the absolute path of `path` with respect to `from_file`,
340
+ as one might do for an include file.
341
+ '''
342
+ if not isabspath(path):
343
+ if not isabspath(from_file):
344
+ from_file = abspath(from_file)
345
+ path = joinpath(dirname(from_file), path)
346
+ return path
347
+
348
+ def poll_file(path, old_state, reload_file, missing_ok=False):
349
+ ''' Watch a file for modification by polling its state as obtained
350
+ by `FileState()`.
351
+ Call `reload_file(path)` if the state changes.
352
+ Return `(new_state,reload_file(path))` if the file was modified
353
+ and was unchanged (stable state) before and after the reload_file().
354
+ Otherwise return `(None,None)`.
355
+
356
+ This may raise an `OSError` if the `path` cannot be `os.stat()`ed
357
+ and of course for any exceptions that occur calling `reload_file`.
358
+
359
+ If `missing_ok` is true then a failure to `os.stat()` which
360
+ raises `OSError` with `ENOENT` will just return `(None,None)`.
361
+ '''
362
+ try:
363
+ new_state = FileState(path)
364
+ except OSError as e:
365
+ if e.errno == errno.ENOENT:
366
+ if missing_ok:
367
+ return None, None
368
+ raise
369
+ if old_state is None or old_state != new_state:
370
+ # first stat or changed stat
371
+ R = reload_file(path)
372
+ try:
373
+ new_new_state = FileState(path)
374
+ except OSError as e:
375
+ if e.errno == errno.ENOENT:
376
+ if missing_ok:
377
+ return None, None
378
+ raise
379
+ # make sure file was unchanged
380
+ if new_new_state == new_state:
381
+ return new_state, R
382
+ return None, None
383
+
384
+ @decorator
385
+ def file_based(
386
+ func,
387
+ attr_name=None,
388
+ filename=None,
389
+ poll_delay=None,
390
+ sig_func=None,
391
+ **dkw
392
+ ):
393
+ ''' A decorator which caches a value obtained from a file.
394
+
395
+ In addition to all the keyword arguments for `@cs.deco.cachedmethod`,
396
+ this decorator also accepts the following arguments:
397
+ * `attr_name`: the name for the associated attribute, used as
398
+ the basis for the internal cache value attribute
399
+ * `filename`: the filename to monitor.
400
+ Default from the `._{attr_name}__filename` attribute.
401
+ This value will be passed to the method as the `filename` keyword
402
+ parameter.
403
+ * `poll_delay`: delay between file polls, default `DEFAULT_POLL_INTERVAL`.
404
+ * `sig_func`: signature function used to encapsulate the relevant
405
+ information about the file; default
406
+ cs.filestate.FileState({filename}).
407
+
408
+ If the decorated function raises OSError with errno == ENOENT,
409
+ this returns None. Other exceptions are reraised.
410
+ '''
411
+ if attr_name is None:
412
+ attr_name = func.__name__
413
+ filename_attr = '_' + attr_name + '__filename'
414
+ filename0 = filename
415
+ if poll_delay is None:
416
+ poll_delay = DEFAULT_POLL_INTERVAL
417
+ sig_func = dkw.pop('sig_func', None)
418
+ if sig_func is None:
419
+
420
+ def sig_func(self):
421
+ ''' The default signature function: `FileState(filename,missing_ok=True)`.
422
+ '''
423
+ filename = filename0
424
+ if filename is None:
425
+ filename = getattr(self, filename_attr)
426
+ return FileState(filename, missing_ok=True)
427
+
428
+ def wrap0(self, *a, **kw):
429
+ ''' Inner wrapper for `func`.
430
+ '''
431
+ filename = kw.pop('filename', None)
432
+ if filename is None:
433
+ if filename0 is None:
434
+ filename = getattr(self, filename_attr)
435
+ else:
436
+ filename = filename0
437
+ kw['filename'] = filename
438
+ try:
439
+ return func(self, *a, **kw)
440
+ except OSError as e:
441
+ if e.errno == errno.ENOENT:
442
+ return None
443
+ raise
444
+
445
+ dkw['attr_name'] = attr_name
446
+ dkw['poll_delay'] = poll_delay
447
+ dkw['sig_func'] = sig_func
448
+ return cachedmethod(**dkw)(wrap0)
449
+
450
+ @decorator
451
+ def file_property(func, **dkw):
452
+ ''' A property whose value reloads if a file changes.
453
+ '''
454
+ return property(file_based(func, **dkw))
455
+
456
+ def files_property(func):
457
+ ''' A property whose value reloads if any of a list of files changes.
458
+
459
+ Note: this is just the default mode for `make_files_property`.
460
+
461
+ `func` accepts the file path and returns the new value.
462
+ The underlying attribute name is `'_'+func.__name__`,
463
+ the default from `make_files_property()`.
464
+ The attribute *{attr_name}*`_lock` is a mutex controlling access to the property.
465
+ The attributes *{attr_name}*`_filestates` and *{attr_name}*`_paths` track the
466
+ associated file states.
467
+ The attribute *{attr_name}*`_lastpoll` tracks the last poll time.
468
+
469
+ The decorated function is passed the current list of files
470
+ and returns the new list of files and the associated value.
471
+
472
+ One example use would be a configuration file with recurive
473
+ include operations; the inner function would parse the first
474
+ file in the list, and the parse would accumulate this filename
475
+ and those of any included files so that they can be monitored,
476
+ triggering a fresh parse if one changes.
477
+
478
+ Example:
479
+
480
+ class C(object):
481
+ def __init__(self):
482
+ self._foo_path = '.foorc'
483
+ @files_property
484
+ def foo(self,paths):
485
+ new_paths, result = parse(paths[0])
486
+ return new_paths, result
487
+
488
+ The load function is called on the first access and on every
489
+ access thereafter where an associated file's `FileState` has
490
+ changed and the time since the last successful load exceeds
491
+ the poll_rate (1s). An attempt at avoiding races is made by
492
+ ignoring reloads that raise exceptions and ignoring reloads
493
+ where files that were stat()ed during the change check have
494
+ changed state after the load.
495
+ '''
496
+ return make_files_property()(func)
497
+
498
+ # pylint: disable=too-many-statements
499
+ @fmtdoc
500
+ def make_files_property(
501
+ attr_name=None, unset_object=None, poll_rate=DEFAULT_POLL_INTERVAL
502
+ ):
503
+ ''' Construct a decorator that watches multiple associated files.
504
+
505
+ Parameters:
506
+ * `attr_name`: the underlying attribute, default: `'_'+func.__name__`
507
+ * `unset_object`: the sentinel value for "uninitialised", default: `None`
508
+ * `poll_rate`: how often in seconds to poll the file for changes,
509
+ default from `DEFAULT_POLL_INTERVAL`: `{DEFAULT_POLL_INTERVAL}`
510
+
511
+ The attribute *attr_name*`_lock` controls access to the property.
512
+ The attributes *attr_name*`_filestates` and *attr_name*`_paths` track the
513
+ associated files' state.
514
+ The attribute *attr_name*`_lastpoll` tracks the last poll time.
515
+
516
+ The decorated function is passed the current list of files
517
+ and returns the new list of files and the associated value.
518
+
519
+ One example use would be a configuration file with recursive
520
+ include operations; the inner function would parse the first
521
+ file in the list, and the parse would accumulate this filename
522
+ and those of any included files so that they can be monitored,
523
+ triggering a fresh parse if one changes.
524
+
525
+ Example:
526
+
527
+ class C(object):
528
+ def __init__(self):
529
+ self._foo_path = '.foorc'
530
+ @files_property
531
+ def foo(self,paths):
532
+ new_paths, result = parse(paths[0])
533
+ return new_paths, result
534
+
535
+ The load function is called on the first access and on every
536
+ access thereafter where an associated file's `FileState` has
537
+ changed and the time since the last successful load exceeds
538
+ the `poll_rate`.
539
+
540
+ An attempt at avoiding races is made by
541
+ ignoring reloads that raise exceptions and ignoring reloads
542
+ where files that were `os.stat()`ed during the change check have
543
+ changed state after the load.
544
+ '''
545
+
546
+ # pylint: disable=too-many-statements
547
+ def made_files_property(func):
548
+ if attr_name is None:
549
+ attr_value = '_' + func.__name__
550
+ else:
551
+ attr_value = attr_name
552
+ attr_lock = attr_value + '_lock'
553
+ attr_filestates = attr_value + '_filestates'
554
+ attr_paths = attr_value + '_paths'
555
+ attr_lastpoll = attr_value + '_lastpoll'
556
+
557
+ # pylint: disable=too-many-statements,too-many-branches
558
+ def getprop(self):
559
+ ''' Try to reload the property value from the file if the property value
560
+ is stale and the file has been modified since the last reload.
561
+ '''
562
+ with getattr(self, attr_lock):
563
+ now = time.time()
564
+ then = getattr(self, attr_lastpoll, None)
565
+ if then is None or then + poll_rate <= now:
566
+ setattr(self, attr_lastpoll, now)
567
+ old_paths = getattr(self, attr_paths)
568
+ old_filestates = getattr(self, attr_filestates, None)
569
+ preload_filestate_map = {}
570
+ if old_filestates is None:
571
+ changed = True
572
+ else:
573
+ changed = False
574
+ # Instead of breaking out of the loop below on the first change
575
+ # found we actually stat every file path because we want to
576
+ # maximise the coverage of the stability check after the load.
577
+ for path, old_filestate in zip(old_paths, old_filestates):
578
+ try:
579
+ new_filestate = FileState(path)
580
+ except OSError:
581
+ changed = True
582
+ else:
583
+ preload_filestate_map[path] = new_filestate
584
+ if old_filestate != new_filestate:
585
+ changed = True
586
+ if changed:
587
+ try:
588
+ new_paths, new_value = func(self, old_paths)
589
+ new_filestates = [FileState(new_path) for new_path in new_paths]
590
+ except NameError:
591
+ raise
592
+ except AttributeError:
593
+ raise
594
+ except Exception as e: # pylint: disable=broad-except
595
+ new_value = getattr(self, attr_value, unset_object)
596
+ if new_value is unset_object:
597
+ raise
598
+ debug(
599
+ "exception reloading .%s, keeping cached value: %s",
600
+ attr_value, e
601
+ )
602
+ else:
603
+ # examine new filestates in case they changed during load
604
+ # _if_ we knew about them from the earlier load
605
+ stable = True
606
+ for path, new_filestate in zip(new_paths, new_filestates):
607
+ if path in preload_filestate_map:
608
+ if preload_filestate_map[path] != new_filestate:
609
+ stable = False
610
+ break
611
+ if stable:
612
+ setattr(self, attr_value, new_value)
613
+ setattr(self, attr_paths, new_paths)
614
+ setattr(self, attr_filestates, new_filestates)
615
+ return getattr(self, attr_value, unset_object)
616
+
617
+ return property(getprop)
618
+
619
+ return made_files_property
620
+
621
+ # pylint: disable=too-many-branches
622
+ @uses_runstate
623
+ @pfx
624
+ def makelockfile(
625
+ path,
626
+ *,
627
+ ext=None,
628
+ poll_interval=None,
629
+ timeout=None,
630
+ runstate: RunState,
631
+ keepopen=False,
632
+ max_interval=37,
633
+ ):
634
+ ''' Create a lockfile and return its path.
635
+
636
+ The lockfile can be removed with `os.remove`.
637
+ This is the core functionality supporting the `lockfile()`
638
+ context manager.
639
+
640
+ Parameters:
641
+ * `path`: the base associated with the lock file,
642
+ often the filesystem object whose access is being managed.
643
+ * `ext`: the extension to the base used to construct the lockfile name.
644
+ Default: ".lock"
645
+ * `timeout`: maximum time to wait before failing.
646
+ Default: `None` (wait forever).
647
+ Note that zero is an accepted value
648
+ and requires the lock to succeed on the first attempt.
649
+ * `poll_interval`: polling frequency when timeout is not 0.
650
+ * `runstate`: optional `RunState` duck instance supporting cancellation.
651
+ Note that if a cancelled `RunState` is provided
652
+ no attempt will be made to make the lockfile.
653
+ * `keepopen`: optional flag, default `False`:
654
+ if true, do not close the lockfile and return `(lockpath,lockfd)`
655
+ being the lock file path and the open file descriptor
656
+ '''
657
+ if poll_interval is None:
658
+ poll_interval = DEFAULT_POLL_INTERVAL
659
+ if ext is None:
660
+ ext = '.lock'
661
+ if timeout is not None and timeout < 0:
662
+ raise ValueError("timeout should be None or >= 0, not %r" % (timeout,))
663
+ start = None
664
+ lockpath = path + ext
665
+ with Pfx("makelockfile: %r", lockpath):
666
+ while True:
667
+ if runstate.cancelled:
668
+ warning(
669
+ "%s cancelled; pid %d waited %ds", runstate, os.getpid(),
670
+ 0 if start is None else time.time() - start
671
+ )
672
+ raise CancellationError("lock acquisition cancelled")
673
+ try:
674
+ lockfd = os.open(lockpath, os.O_CREAT | os.O_EXCL | os.O_RDWR, 0)
675
+ except OSError as e:
676
+ if e.errno != errno.EEXIST:
677
+ raise
678
+ if timeout is not None and timeout <= 0:
679
+ # immediate failure
680
+ # pylint: disable=raise-missing-from
681
+ raise TimeoutError("pid %d timed out" % (os.getpid(),), timeout)
682
+ now = time.time()
683
+ # post: timeout is None or timeout > 0
684
+ if start is None:
685
+ # first try - set up counters
686
+ start = now
687
+ complaint_last = start
688
+ complaint_interval = 2 * max(DEFAULT_POLL_INTERVAL, poll_interval)
689
+ else:
690
+ if now - complaint_last >= complaint_interval:
691
+ warning("pid %d waited %ds", os.getpid(), now - start)
692
+ complaint_last = now
693
+ complaint_interval = min(complaint_interval * 2, max_interval)
694
+ # post: start is set
695
+ if timeout is None:
696
+ sleep_for = poll_interval
697
+ else:
698
+ sleep_for = min(poll_interval, start + timeout - now)
699
+ # test for timeout
700
+ if sleep_for <= 0:
701
+ # pylint: disable=raise-missing-from
702
+ raise TimeoutError("pid %d timed out" % (os.getpid(),), timeout)
703
+ time.sleep(sleep_for)
704
+ continue
705
+ else:
706
+ break
707
+ if keepopen:
708
+ return lockpath, lockfd
709
+ os.close(lockfd)
710
+ return lockpath
711
+
712
+ @contextmanager
713
+ @uses_runstate
714
+ def lockfile(
715
+ path, *, ext=None, poll_interval=None, timeout=None, runstate: RunState
716
+ ):
717
+ ''' A context manager which takes and holds a lock file.
718
+ An open file descriptor is kept for the lock file as well
719
+ to aid locating the process holding the lock file using eg `lsof`.
720
+
721
+ Parameters:
722
+ * `path`: the base associated with the lock file.
723
+ * `ext`: the extension to the base used to construct the lock file name.
724
+ Default: `'.lock'`
725
+ * `timeout`: maximum time to wait before failing.
726
+ Default: `None` (wait forever).
727
+ * `poll_interval`: polling frequency when timeout is not `0`.
728
+ * `runstate`: optional `RunState` duck instance supporting cancellation.
729
+ '''
730
+ lockpath, lockfd = makelockfile(
731
+ path,
732
+ ext=ext,
733
+ poll_interval=poll_interval,
734
+ timeout=timeout,
735
+ runstate=runstate,
736
+ keepopen=True,
737
+ )
738
+ try:
739
+ yield lockpath
740
+ finally:
741
+ try:
742
+ pfx_call(os.remove, lockpath)
743
+ except FileNotFoundError as e:
744
+ warning("lock file already removed: %s", e)
745
+ pfx_call(os.close, lockfd)
746
+
747
+ def crop_name(name, ext=None, name_max=255):
748
+ ''' Crop a file basename so as not to exceed `name_max` in length.
749
+ Return the original `name` if it already short enough.
750
+ Otherwise crop `name` before the file extension
751
+ to make it short enough.
752
+
753
+ Parameters:
754
+ * `name`: the file basename to crop
755
+ * `ext`: optional file extension;
756
+ the default is to infer the extension with `os.path.splitext`.
757
+ * `name_max`: optional maximum length, default: `255`
758
+ '''
759
+ if ext is None:
760
+ base, ext = splitext(name)
761
+ else:
762
+ base = cutsuffix(name, ext)
763
+ if base is name:
764
+ base, ext = splitext(name)
765
+ max_base_len = name_max - len(ext)
766
+ if max_base_len < 0:
767
+ raise ValueError(
768
+ "cannot crop name before ext %r to <=%s: name=%r" %
769
+ (ext, name_max, name)
770
+ )
771
+ if len(base) <= max_base_len:
772
+ return name
773
+ return base[:max_base_len] + ext
774
+
775
+ def max_suffix(dirpath, prefix):
776
+ ''' Compute the highest existing numeric suffix
777
+ for names starting with `prefix`.
778
+
779
+ This is generally used as a starting point for picking
780
+ a new numeric suffix.
781
+ '''
782
+ prefix = ustr(prefix)
783
+ maxn = None
784
+ pfxlen = len(prefix)
785
+ for e in os.listdir(dirpath):
786
+ e = ustr(e)
787
+ if len(e) <= pfxlen or not e.startswith(prefix):
788
+ continue
789
+ tail = e[pfxlen:]
790
+ if tail.isdigit():
791
+ n = int(tail)
792
+ if maxn is None:
793
+ maxn = n
794
+ elif maxn < n:
795
+ maxn = n
796
+ return maxn
797
+
798
+ # pylint: disable=too-many-branches
799
+ def mkdirn(path, sep=''):
800
+ ''' Create a new directory named `path+sep+n`,
801
+ where `n` exceeds any name already present.
802
+
803
+ Parameters:
804
+ * `path`: the basic directory path.
805
+ * `sep`: a separator between `path` and `n`.
806
+ Default: `''`
807
+ '''
808
+ with Pfx("mkdirn(path=%r, sep=%r)", path, sep):
809
+ if os.sep in sep:
810
+ raise ValueError("sep contains os.sep (%r)" % (os.sep,))
811
+ opath = path
812
+ if not path:
813
+ path = '.' + os.sep
814
+
815
+ if path.endswith(os.sep):
816
+ if sep:
817
+ raise ValueError(
818
+ "mkdirn(path=%r, sep=%r): using non-empty sep"
819
+ " with a trailing %r seems nonsensical" % (path, sep, os.sep)
820
+ )
821
+ dirpath = path[:-len(os.sep)]
822
+ prefix = ''
823
+ else:
824
+ dirpath = dirname(path)
825
+ if not dirpath:
826
+ dirpath = '.'
827
+ prefix = basename(path) + sep
828
+
829
+ if not isdir(dirpath):
830
+ error("parent not a directory: %r", dirpath)
831
+ return None
832
+
833
+ # do a quick scan of the directory to find
834
+ # if any names of the desired form already exist
835
+ # in order to start after them
836
+ maxn = max_suffix(dirpath, prefix)
837
+ if maxn is None:
838
+ newn = 0
839
+ else:
840
+ newn = maxn
841
+
842
+ while True:
843
+ newn += 1
844
+ newpath = path + sep + str(newn)
845
+ try:
846
+ os.mkdir(newpath)
847
+ except OSError as e:
848
+ if e.errno == errno.EEXIST:
849
+ # taken, try new value
850
+ continue
851
+ error("mkdir(%s): %s", newpath, e)
852
+ return None
853
+ if not opath:
854
+ newpath = basename(newpath)
855
+ return newpath
856
+
857
+ def tmpdir():
858
+ ''' Return the pathname of the default temporary directory for scratch data,
859
+ the environment variable `$TMPDIR` or `'/tmp'`.
860
+ '''
861
+ return os.environ.get('TMPDIR', '/tmp')
862
+
863
+ def tmpdirn(tmp=None):
864
+ ''' Make a new temporary directory with a numeric suffix.
865
+ '''
866
+ if tmp is None:
867
+ tmp = tmpdir()
868
+ return mkdirn(joinpath(tmp, basename(sys.argv[0])))
869
+
870
+ def find(path, select=None, sort_names=True):
871
+ ''' Walk a directory tree `path`
872
+ yielding selected paths.
873
+
874
+ Note: not selecting a directory prunes all its descendants.
875
+ '''
876
+ if select is None:
877
+ select = lambda _: True
878
+ for dirpath, dirnames, filenames in os.walk(path):
879
+ if select(dirpath):
880
+ yield dirpath
881
+ else:
882
+ dirnames[:] = []
883
+ continue
884
+ if sort_names:
885
+ dirnames[:] = sorted(dirnames)
886
+ filenames[:] = sorted(filenames)
887
+ for filename in filenames:
888
+ filepath = joinpath(dirpath, filename)
889
+ if select(filepath):
890
+ yield filepath
891
+ dirnames[:] = [
892
+ dirname for dirname in dirnames if select(joinpath(dirpath, dirname))
893
+ ]
894
+
895
+ def findup(path, test, first=False):
896
+ ''' Test the pathname `abspath(path)` and each of its ancestors
897
+ against the callable `test`,
898
+ yielding paths satisfying the test.
899
+
900
+ If `first` is true (default `False`)
901
+ this function always yields exactly one value,
902
+ either the first path satisfying the test or `None`.
903
+ This mode supports a use such as:
904
+
905
+ matched_path = next(findup(path, test, first=True))
906
+ # post condition: matched_path will be `None` on no match
907
+ # otherwise the first matching path
908
+ '''
909
+ path = abspath(path)
910
+ while True:
911
+ if test(path):
912
+ yield path
913
+ if first:
914
+ return
915
+ up = dirname(path)
916
+ if up == path:
917
+ break
918
+ path = up
919
+ if first:
920
+ yield None
921
+
922
+ def common_path_prefix(*paths):
923
+ ''' Return the common path prefix of the `paths`.
924
+
925
+ Note that the common prefix of `'/a/b/c1'` and `'/a/b/c2'`
926
+ is `'/a/b/'`, _not_ `'/a/b/c'`.
927
+
928
+ Callers may find it useful to preadjust the supplied paths
929
+ with `normpath`, `abspath` or `realpath` from `os.path`;
930
+ see the `os.path` documentation for the various caveats
931
+ which go with those functions.
932
+
933
+ Examples:
934
+
935
+ >>> # the obvious
936
+ >>> common_path_prefix('', '')
937
+ ''
938
+ >>> common_path_prefix('/', '/')
939
+ '/'
940
+ >>> common_path_prefix('a', 'a')
941
+ 'a'
942
+ >>> common_path_prefix('a', 'b')
943
+ ''
944
+ >>> # nonempty directory path prefixes end in os.sep
945
+ >>> common_path_prefix('/', '/a')
946
+ '/'
947
+ >>> # identical paths include the final basename
948
+ >>> common_path_prefix('p/a', 'p/a')
949
+ 'p/a'
950
+ >>> # the comparison does not normalise paths
951
+ >>> common_path_prefix('p//a', 'p//a')
952
+ 'p//a'
953
+ >>> common_path_prefix('p//a', 'p//b')
954
+ 'p//'
955
+ >>> common_path_prefix('p//a', 'p/a')
956
+ 'p/'
957
+ >>> common_path_prefix('p/a', 'p/b')
958
+ 'p/'
959
+ >>> # the comparison strips complete unequal path components
960
+ >>> common_path_prefix('p/a1', 'p/a2')
961
+ 'p/'
962
+ >>> common_path_prefix('p/a/b1', 'p/a/b2')
963
+ 'p/a/'
964
+ >>> # contrast with cs.lex.common_prefix
965
+ >>> common_prefix('abc/def', 'abc/def1')
966
+ 'abc/def'
967
+ >>> common_path_prefix('abc/def', 'abc/def1')
968
+ 'abc/'
969
+ >>> common_prefix('abc/def', 'abc/def1', 'abc/def2')
970
+ 'abc/def'
971
+ >>> common_path_prefix('abc/def', 'abc/def1', 'abc/def2')
972
+ 'abc/'
973
+ '''
974
+ prefix = common_prefix(*paths)
975
+ if not prefix.endswith(os.sep):
976
+ path0 = paths[0]
977
+ if not all(map(lambda path: path == path0, paths)):
978
+ # strip basename from prefix
979
+ base = basename(prefix)
980
+ prefix = prefix[:-len(base)]
981
+ return prefix
982
+
983
+ class Pathname(str):
984
+ ''' Subclass of str presenting convenience properties useful for
985
+ format strings related to file paths.
986
+ '''
987
+
988
+ _default_prefixes = (('$HOME/', '~/'),)
989
+
990
+ def __format__(self, fmt_spec):
991
+ ''' Calling format(<Pathname>, fmt_spec) treat `fmt_spec` as a new style
992
+ formatting string with a single positional parameter of `self`.
993
+ '''
994
+ if fmt_spec == '':
995
+ return str(self)
996
+ return fmt_spec.format(self)
997
+
998
+ @property
999
+ def dirname(self):
1000
+ ''' The dirname of the Pathname.
1001
+ '''
1002
+ return Pathname(dirname(self))
1003
+
1004
+ @property
1005
+ def basename(self):
1006
+ ''' The basename of this Pathname.
1007
+ '''
1008
+ return Pathname(basename(self))
1009
+
1010
+ @property
1011
+ def abs(self):
1012
+ ''' The absolute form of this Pathname.
1013
+ '''
1014
+ return Pathname(abspath(self))
1015
+
1016
+ @property
1017
+ def isabs(self):
1018
+ ''' Whether this Pathname is an absolute Pathname.
1019
+ '''
1020
+ return isabspath(self)
1021
+
1022
+ @property
1023
+ def short(self):
1024
+ ''' The shortened form of this Pathname.
1025
+ '''
1026
+ return self.shorten()
1027
+
1028
+ def shorten(self, prefixes=None):
1029
+ ''' Shorten a Pathname using ~ and ~user.
1030
+ '''
1031
+ return shortpath(self, prefixes=prefixes)
1032
+
1033
+ def iter_fd(fd, **kw):
1034
+ ''' Iterate over data from the file descriptor `fd`.
1035
+ '''
1036
+ for bs in CornuCopyBuffer.from_fd(fd, **kw):
1037
+ yield bs
1038
+
1039
+ def iter_file(f, **kw):
1040
+ ''' Iterate over data from the file `f`.
1041
+ '''
1042
+ for bs in CornuCopyBuffer.from_file(f, **kw):
1043
+ yield bs
1044
+
1045
+ def byteses_as_fd(bss, **kw):
1046
+ ''' Deliver the iterable of bytes `bss` as a readable file descriptor.
1047
+ Return the file descriptor.
1048
+ Any keyword arguments are passed to `CornuCopyBuffer.as_fd`.
1049
+
1050
+ Example:
1051
+
1052
+ # present a passphrase for use as in input file descrptor
1053
+ # for a subprocess
1054
+ rfd = byteses_as_fd([(passphrase + '\n').encode()])
1055
+ '''
1056
+ return CornuCopyBuffer(bss).as_fd(**kw)
1057
+
1058
+ def datafrom_fd(fd, offset=None, readsize=None, aligned=True, maxlength=None):
1059
+ ''' General purpose reader for file descriptors yielding data from `offset`.
1060
+ **Note**: This does not move the file descriptor position
1061
+ **if** the file is seekable.
1062
+
1063
+ Parameters:
1064
+ * `fd`: the file descriptor from which to read.
1065
+ * `offset`: the offset from which to read.
1066
+ If omitted, use the current file descriptor position.
1067
+ * `readsize`: the read size, default: `DEFAULT_READSIZE`
1068
+ * `aligned`: if true (the default), the first read is sized
1069
+ to align the new offset with a multiple of `readsize`.
1070
+ * `maxlength`: if specified yield no more than this many bytes of data.
1071
+ '''
1072
+ try:
1073
+ cur_offset = os.lseek(fd, 0, SEEK_CUR)
1074
+ is_seekable = True
1075
+ except OSError:
1076
+ cur_offset = 0 # guess
1077
+ is_seekable = False
1078
+ if offset is None:
1079
+ offset = cur_offset
1080
+ if readsize is None:
1081
+ readsize = DEFAULT_READSIZE
1082
+ if aligned:
1083
+ # do an initial read to align all subsequent reads
1084
+ alignsize = offset % readsize
1085
+ if alignsize > 0:
1086
+ if maxlength is not None:
1087
+ alignsize = min(maxlength, alignsize)
1088
+ bs = pread(fd, alignsize, offset) if is_seekable else read(fd, alignsize)
1089
+ if not bs:
1090
+ return
1091
+ yield bs
1092
+ bslen = len(bs)
1093
+ offset += bslen
1094
+ if maxlength is not None:
1095
+ maxlength -= bslen
1096
+ while maxlength is None or maxlength > 0:
1097
+ if maxlength is not None:
1098
+ readsize = min(readsize, maxlength)
1099
+ bs = pread(fd, readsize, offset) if is_seekable else read(fd, readsize)
1100
+ if not bs:
1101
+ return
1102
+ yield bs
1103
+ bslen = len(bs)
1104
+ offset += bslen
1105
+ if maxlength is not None:
1106
+ maxlength -= bslen
1107
+
1108
+ @strable(open_func=lambda filename: os.open(filename, flags=O_RDONLY))
1109
+ def datafrom(f, offset=None, readsize=None, maxlength=None):
1110
+ ''' General purpose reader for files yielding data from `offset`.
1111
+
1112
+ *WARNING*: this function might move the file pointer.
1113
+
1114
+ Parameters:
1115
+ * `f`: the file from which to read data;
1116
+ if a string, the file is opened with mode="rb";
1117
+ if an int, treated as an OS file descriptor;
1118
+ otherwise presumed to be a file-like object.
1119
+ If that object has a `.fileno()` method, treat that as an
1120
+ OS file descriptor and use it.
1121
+ * `offset`: starting offset for the data
1122
+ * `maxlength`: optional maximum amount of data to yield
1123
+ * `readsize`: read size, default DEFAULT_READSIZE.
1124
+
1125
+ For file-like objects, the read1 method is used in preference
1126
+ to read if available. The file pointer is briefly moved during
1127
+ fetches.
1128
+ '''
1129
+ if readsize is None:
1130
+ readsize = DEFAULT_READSIZE
1131
+ if isinstance(f, int):
1132
+ # operating system file descriptor
1133
+ for data in datafrom_fd(f, offset=offset, readsize=readsize,
1134
+ maxlength=maxlength):
1135
+ yield data
1136
+ return
1137
+ # see if the file has a fileno; if so use datafrom_fd
1138
+ try:
1139
+ get_fileno = f.fileno
1140
+ except AttributeError:
1141
+ pass
1142
+ else:
1143
+ fd = get_fileno()
1144
+ if stat.S_ISREG(os.fstat(fd).st_mode):
1145
+ for data in datafrom_fd(fd, offset=offset, readsize=readsize,
1146
+ maxlength=maxlength):
1147
+ yield data
1148
+ return
1149
+ # presume a file-like object
1150
+ try:
1151
+ read1 = f.read1
1152
+ except AttributeError:
1153
+ read1 = f.read
1154
+ tell = f.tell
1155
+ seek = f.seek
1156
+ while maxlength is None or maxlength > 0:
1157
+ offset0 = tell()
1158
+ seek(offset, SEEK_SET)
1159
+ n = readsize
1160
+ if maxlength is not None:
1161
+ n = min(n, maxlength)
1162
+ bs = read1(n)
1163
+ seek(offset0)
1164
+ if not bs:
1165
+ break
1166
+ yield bs
1167
+ offset += len(bs)
1168
+ if maxlength is not None:
1169
+ maxlength -= len(bs)
1170
+ assert maxlength >= 0
1171
+
1172
+ class ReadMixin(object):
1173
+ ''' Useful read methods to accomodate modes not necessarily available in a class.
1174
+
1175
+ Note that this mixin presumes that the attribute `self._lock`
1176
+ is a threading.RLock like context manager.
1177
+
1178
+ Classes using this mixin should consider overriding the default
1179
+ .datafrom method with something more efficient or direct.
1180
+ '''
1181
+
1182
+ def datafrom(self, offset, readsize=None):
1183
+ ''' Yield data from the specified `offset` onward in some
1184
+ approximation of the "natural" chunk size.
1185
+
1186
+ *NOTE*: UNLIKE the global datafrom() function, this method
1187
+ MUST NOT move the logical file position. Implementors may need
1188
+ to save and restore the file pointer within a lock around
1189
+ the I/O if they do not use a direct access method like
1190
+ os.pread.
1191
+
1192
+ The aspiration here is to read data with only a single call
1193
+ to the underlying storage, and to return the chunks in
1194
+ natural sizes instead of some default read size.
1195
+
1196
+ Classes using this mixin must implement this method.
1197
+ '''
1198
+ raise NotImplementedError(
1199
+ "return an iterator which does not change the file offset"
1200
+ )
1201
+
1202
+ def bufferfrom(self, offset):
1203
+ ''' Return a CornuCopyBuffer from the specified `offset`.
1204
+ '''
1205
+ return CornuCopyBuffer(self.datafrom(offset), offset=offset)
1206
+
1207
+ # pylint: disable=too-many-branches
1208
+ def read(self, size=-1, offset=None, longread=False):
1209
+ ''' Read up to `size` bytes, honouring the "single system call"
1210
+ spirit unless `longread` is true.
1211
+
1212
+ Parameters:
1213
+ * `size`: the number of bytes requested. A size of -1 requests
1214
+ all bytes to the end of the file.
1215
+ * `offset`: the starting point of the read; if None, use the
1216
+ current file position; if not None, seek to this position
1217
+ before reading, even if `size` == 0.
1218
+ * `longread`: switch from "single system call" to "as many
1219
+ as required to obtain `size` bytes"; short data will still
1220
+ be returned if the file is too short.
1221
+ '''
1222
+ bfr = getattr(self, '_reading_bfr', None)
1223
+ if offset is None:
1224
+ if bfr is None:
1225
+ offset = self.tell()
1226
+ else:
1227
+ offset = bfr.offset
1228
+ if size == -1:
1229
+ size = max(len(self) - offset, 0)
1230
+ if size == 0:
1231
+ return b''
1232
+ if longread:
1233
+ bss = []
1234
+ while size > 0:
1235
+ with self._lock:
1236
+ # We need to retest on each iteration because other reads
1237
+ # may be interleaved, interfering with the buffer.
1238
+ if bfr is None or bfr.offset != offset:
1239
+ ##if bfr is not None:
1240
+ ## info(
1241
+ ## "ReadMixin.read: new bfr from offset=%d (old bfr was %s)",
1242
+ ## offset, bfr)
1243
+ self._reading_bfr = bfr = self.bufferfrom(offset)
1244
+ bfr.extend(1, short_ok=True)
1245
+ if not bfr.buf:
1246
+ break
1247
+ consume = min(size, len(bfr.buf))
1248
+ assert consume > 0
1249
+ chunk = bfr.take(consume)
1250
+ offset += consume
1251
+ self.seek(offset)
1252
+ assert len(chunk) == consume
1253
+ if longread:
1254
+ bss.append(chunk)
1255
+ else:
1256
+ return chunk
1257
+ size -= consume
1258
+ if not bss:
1259
+ return b''
1260
+ if len(bss) == 1:
1261
+ return bss[0]
1262
+ return b''.join(bss)
1263
+
1264
+ def read_n(self, n):
1265
+ ''' Read `n` bytes of data and return them.
1266
+
1267
+ Unlike traditional file.read(), RawIOBase.read() may return short
1268
+ data, thus this workalike, which may only return short data if it
1269
+ hits EOF.
1270
+ '''
1271
+ if n < 1:
1272
+ raise ValueError("n two low, expected >=1, got %r" % (n,))
1273
+ data = bytearray(n)
1274
+ nread = self.readinto(data)
1275
+ if nread != len(data):
1276
+ raise RuntimeError(
1277
+ " WRONG NUMBER OF BYTES(%d): data=%s" % (nread, data)
1278
+ )
1279
+ return memoryview(data)[:nread] if nread != n else data
1280
+
1281
+ @locked
1282
+ def readinto(self, barray):
1283
+ ''' Read data into a bytearray.
1284
+ '''
1285
+ needed = len(barray)
1286
+ boff = 0
1287
+ for bs in self.datafrom(self.tell()):
1288
+ if not bs:
1289
+ break
1290
+ if len(bs) > needed:
1291
+ bs = memoryview(bs)[:needed]
1292
+ bs_len = len(bs)
1293
+ boff2 = boff + bs_len
1294
+ barray[boff:boff2] = bs
1295
+ boff = boff2
1296
+ needed -= bs_len
1297
+ return boff
1298
+
1299
+ class BackedFile(ReadMixin):
1300
+ ''' A RawIOBase duck type
1301
+ which uses a backing file for initial data
1302
+ and writes new data to a front scratch file.
1303
+ '''
1304
+
1305
+ def __init__(self, back_file, dirpath=None):
1306
+ ''' Initialise the BackedFile using `back_file` for the backing data.
1307
+ '''
1308
+ self._offset = 0
1309
+ self._dirpath = dirpath
1310
+ self._lock = RLock()
1311
+ self.back_file = back_file
1312
+ self.front_file = TemporaryFile(dir=dirpath, buffering=0)
1313
+ self.front_range = Range()
1314
+ self.read_only = False
1315
+
1316
+ def __len__(self):
1317
+ back_file = self.back_file
1318
+ try:
1319
+ back_len = len(back_file)
1320
+ except TypeError:
1321
+ back_pos = back_file.tell()
1322
+ back_len = back_file.seek(0, 2)
1323
+ back_file.seek(back_pos, 0)
1324
+ return max(self.front_range.end, back_len)
1325
+
1326
+ @locked
1327
+ def switch_back_file(self, new_back_file):
1328
+ ''' Switch out one back file for another. Return the old back file.
1329
+ '''
1330
+ old_back_file = self.back_file
1331
+ self.back_file = new_back_file
1332
+ return old_back_file
1333
+
1334
+ def __enter__(self):
1335
+ ''' BackedFile instances offer a context manager that take the lock,
1336
+ allowing synchronous use of the file
1337
+ without implementing a suite of special methods like pread/pwrite.
1338
+ '''
1339
+ self._lock.acquire()
1340
+
1341
+ def __exit__(self, *e):
1342
+ self._lock.release()
1343
+
1344
+ def close(self):
1345
+ ''' Close the BackedFile.
1346
+ Flush contents. Close the front_file if necessary.
1347
+ '''
1348
+ self.front_file.close()
1349
+ self.front_file = None
1350
+
1351
+ def tell(self):
1352
+ ''' Report the current file pointer offset.
1353
+ '''
1354
+ return self._offset
1355
+
1356
+ @locked
1357
+ def seek(self, pos, whence=SEEK_SET):
1358
+ ''' Adjust the current file pointer offset.
1359
+ '''
1360
+ if whence == SEEK_SET:
1361
+ self._offset = pos
1362
+ elif whence == SEEK_CUR:
1363
+ self._offset += pos
1364
+ elif whence == SEEK_END:
1365
+ endpos = self.back_file.seek(0, SEEK_END)
1366
+ if self.front_range is not None:
1367
+ endpos = max(endpos, self.front_range.end)
1368
+ self._offset = endpos
1369
+ else:
1370
+ raise ValueError("unsupported whence value %r" % (whence,))
1371
+
1372
+ def datafrom(self, offset):
1373
+ ''' Generator yielding natural chunks from the file commencing at offset.
1374
+ '''
1375
+ global_datafrom = globals()['datafrom']
1376
+ front_file = self.front_file
1377
+ try:
1378
+ front_datafrom = front_file.datafrom
1379
+ except AttributeError:
1380
+ front_datafrom = partial(global_datafrom, front_file)
1381
+ back_file = self.back_file
1382
+ try:
1383
+ back_datafrom = back_file.datafrom
1384
+ except AttributeError:
1385
+ back_datafrom = partial(global_datafrom, back_file)
1386
+ for in_front, span in self.front_range.slices(offset, len(self)):
1387
+ consume = len(span)
1388
+ assert consume > 0
1389
+ if in_front:
1390
+ chunks = front_datafrom(span.start)
1391
+ else:
1392
+ chunks = back_datafrom(span.start)
1393
+ for bs in chunks:
1394
+ assert len(bs) > 0
1395
+ if len(bs) > consume:
1396
+ bs = memoryview(bs)[:consume]
1397
+ yield bs
1398
+ bs_len = len(bs)
1399
+ consume -= bs_len
1400
+ if consume <= 0:
1401
+ break
1402
+ offset += bs_len
1403
+
1404
+ @locked
1405
+ def write(self, b):
1406
+ ''' Write data to the front_file.
1407
+ '''
1408
+ if self.read_only:
1409
+ raise RuntimeError("write to read-only BackedFile")
1410
+ front_file = self.front_file
1411
+ start = self._offset
1412
+ front_file.seek(start)
1413
+ written = front_file.write(b)
1414
+ if written is None:
1415
+ warning(
1416
+ "front_file.write() returned None, assuming %d bytes written, data=%r",
1417
+ len(b), b
1418
+ )
1419
+ written = len(b)
1420
+ self.front_range.add_span(start, start + written)
1421
+ return written
1422
+
1423
+ # pylint: disable=too-few-public-methods,protected-access
1424
+ class BackedFile_TestMethods(object):
1425
+ ''' Mixin for testing subclasses of BackedFile.
1426
+ Tests self.backed_fp.
1427
+ '''
1428
+
1429
+ # pylint: disable=no-member
1430
+ def _eq(self, a, b, opdesc):
1431
+ ''' Convenience wrapper for assertEqual.
1432
+ '''
1433
+ ##if a == b:
1434
+ ## print("OK: %s: %r == %r" % (opdesc, a, b), file=sys.stderr)
1435
+ self.assertEqual(a, b, "%s: got %r, expected %r" % (opdesc, a, b))
1436
+
1437
+ # pylint: disable=no-member
1438
+ def test_BackedFile(self):
1439
+ ''' Test function for a BackedFile to use in unit test suites.
1440
+ '''
1441
+ from random import randint # pylint: disable=import-outside-toplevel
1442
+ backing_text = self.backing_text
1443
+ bfp = self.backed_fp
1444
+ # test reading whole file
1445
+ bfp.seek(0)
1446
+ bfp_text = bfp.read_n(len(bfp))
1447
+ self._eq(backing_text, bfp_text, "backing_text vs bfp_text")
1448
+ # test reading first 512 bytes only
1449
+ bfp.seek(0)
1450
+ bfp_leading_text = bfp.read_n(512)
1451
+ self._eq(
1452
+ backing_text[:512], bfp_leading_text,
1453
+ "leading 512 bytes of backing_text vs bfp_leading_text"
1454
+ )
1455
+ # test writing some data and reading it back
1456
+ random_chunk = bytes(randint(0, 255) for x in range(256))
1457
+ bfp.seek(512)
1458
+ bfp.write(random_chunk)
1459
+ # check that the front file has a single span of the right dimensions
1460
+ ffp = bfp.front_file
1461
+ fr = bfp.front_range
1462
+ self.assertIsNotNone(ffp)
1463
+ self.assertIsNotNone(fr)
1464
+ self.assertEqual(len(fr._spans), 1, "fr._spans = %r" % (fr._spans,))
1465
+ self.assertEqual(fr._spans[0].start, 512)
1466
+ self.assertEqual(fr._spans[0].end, 768)
1467
+ # read the random data back from the front file
1468
+ ffp.seek(512)
1469
+ front_chunk = ffp.read(256)
1470
+ self.assertEqual(random_chunk, front_chunk)
1471
+ # read the random data back from the BackedFile
1472
+ bfp.seek(512)
1473
+ bfp_chunk = bfp.read_n(256)
1474
+ self.assertEqual(bfp_chunk, random_chunk)
1475
+ # read a chunk that overlaps the old data and the new data
1476
+ bfp.seek(256)
1477
+ overlap_chunk = bfp.read_n(512)
1478
+ self.assertEqual(
1479
+ len(overlap_chunk), 512, "overlap_chunk not 512 bytes: %d:%s" %
1480
+ (len(overlap_chunk), bytes(overlap_chunk))
1481
+ )
1482
+ self.assertEqual(overlap_chunk, backing_text[256:512] + random_chunk)
1483
+
1484
+ class Tee(object):
1485
+ ''' An object with .write, .flush and .close methods
1486
+ which copies data to multiple output files.
1487
+ '''
1488
+
1489
+ def __init__(self, *fps):
1490
+ ''' Initialise the Tee; any arguments are taken to be output file objects.
1491
+ '''
1492
+ self._fps = list(fps)
1493
+
1494
+ def add(self, output):
1495
+ ''' Add a new output.
1496
+ '''
1497
+ self._fps.append(output)
1498
+
1499
+ def write(self, data):
1500
+ ''' Write the data to all the outputs.
1501
+ Note: does not detect or accodmodate short writes.
1502
+ '''
1503
+ for fp in self._fps:
1504
+ fp.write(data)
1505
+
1506
+ def flush(self):
1507
+ ''' Flush all the outputs.
1508
+ '''
1509
+ for fp in self._fps:
1510
+ fp.flush()
1511
+
1512
+ def close(self):
1513
+ ''' Close all the outputs and close the Tee.
1514
+ '''
1515
+ for fp in self._fps:
1516
+ fp.close()
1517
+ self._fps = None
1518
+
1519
+ @contextmanager
1520
+ def tee(fp, fp2):
1521
+ ''' Context manager duplicating `.write` and `.flush` from `fp` to `fp2`.
1522
+ '''
1523
+ old_write = fp.write
1524
+ old_flush = fp.flush
1525
+
1526
+ def _write(*a, **kw):
1527
+ fp2.write(*a, **kw)
1528
+ return old_write(*a, **kw)
1529
+
1530
+ def _flush(*a, **kw):
1531
+ fp2.flush(*a, **kw)
1532
+ return old_flush(*a, **kw)
1533
+
1534
+ with stackattrs(fp, write=_write, flush=_flush):
1535
+ yield
1536
+
1537
+ class NullFile(object):
1538
+ ''' Writable file that discards its input.
1539
+
1540
+ Note that this is _not_ an open of `os.devnull`;
1541
+ it just discards writes and is not the underlying file descriptor.
1542
+ '''
1543
+
1544
+ def __init__(self):
1545
+ ''' Initialise the file offset to 0.
1546
+ '''
1547
+ self.offset = 0
1548
+
1549
+ def write(self, data):
1550
+ ''' Discard data, advance file offset by length of data.
1551
+ '''
1552
+ dlen = len(data)
1553
+ self.offset += dlen
1554
+ return dlen
1555
+
1556
+ def flush(self):
1557
+ ''' Flush buffered data to the subsystem.
1558
+ '''
1559
+
1560
+ def file_data(fp, nbytes=None, rsize=None):
1561
+ ''' Read `nbytes` of data from `fp` and yield the chunks as read.
1562
+
1563
+ Parameters:
1564
+ * `nbytes`: number of bytes to read; if None read until EOF.
1565
+ * `rsize`: read size, default DEFAULT_READSIZE.
1566
+ '''
1567
+ # try to use the "short read" flavour of read if available
1568
+ if rsize is None:
1569
+ rsize = DEFAULT_READSIZE
1570
+ try:
1571
+ read1 = fp.read1
1572
+ except AttributeError:
1573
+ read1 = fp.read
1574
+ ##prefix = "file_data(fp, nbytes=%d)" % (nbytes,)
1575
+ copied = 0
1576
+ while nbytes is None or nbytes > 0:
1577
+ to_read = rsize if nbytes is None else min(nbytes, rsize)
1578
+ data = read1(to_read)
1579
+ if not data:
1580
+ if nbytes is not None:
1581
+ if copied > 0:
1582
+ # no warning of nothing copied - that is immediate end of file - valid
1583
+ warning(
1584
+ "early EOF: only %d bytes read, %d still to go", copied, nbytes
1585
+ )
1586
+ break
1587
+ yield data
1588
+ copied += len(data)
1589
+ if nbytes is not None:
1590
+ nbytes -= len(data)
1591
+
1592
+ def copy_data(fpin, fpout, nbytes, rsize=None):
1593
+ ''' Copy `nbytes` of data from `fpin` to `fpout`,
1594
+ return the number of bytes copied.
1595
+
1596
+ Parameters:
1597
+ * `nbytes`: number of bytes to copy.
1598
+ If `None`, copy until EOF.
1599
+ * `rsize`: read size, default `DEFAULT_READSIZE`.
1600
+ '''
1601
+ copied = 0
1602
+ for chunk in file_data(fpin, nbytes, rsize):
1603
+ fpout.write(chunk)
1604
+ copied += len(chunk)
1605
+ return copied
1606
+
1607
+ def read_data(fp, nbytes, rsize=None):
1608
+ ''' Read `nbytes` of data from `fp`, return the data.
1609
+
1610
+ Parameters:
1611
+ * `nbytes`: number of bytes to copy.
1612
+ If `None`, copy until EOF.
1613
+ * `rsize`: read size, default `DEFAULT_READSIZE`.
1614
+ '''
1615
+ bss = list(file_data(fp, nbytes, rsize))
1616
+ if not bss:
1617
+ return b''
1618
+ if len(bss) == 1:
1619
+ return bss[0]
1620
+ return b''.join(bss)
1621
+
1622
+ def read_from(fp, rsize=None, tail_mode=False, tail_delay=None):
1623
+ ''' Generator to present text or data from an open file until EOF.
1624
+
1625
+ Parameters:
1626
+ * `rsize`: read size, default: DEFAULT_READSIZE
1627
+ * `tail_mode`: if true, yield an empty chunk at EOF, allowing resumption
1628
+ if the file grows.
1629
+ '''
1630
+ if rsize is None:
1631
+ rsize = DEFAULT_READSIZE
1632
+ if tail_delay is None:
1633
+ tail_delay = DEFAULT_TAIL_PAUSE
1634
+ elif not tail_mode:
1635
+ raise ValueError(
1636
+ "tail_mode=%r but tail_delay=%r" % (tail_mode, tail_delay)
1637
+ )
1638
+ while True:
1639
+ chunk = fp.read(rsize)
1640
+ if not chunk:
1641
+ if tail_mode:
1642
+ # indicate EOF and pause
1643
+ yield chunk
1644
+ time.sleep(tail_delay)
1645
+ else:
1646
+ break
1647
+ else:
1648
+ yield chunk
1649
+
1650
+ def lines_of(fp, partials=None):
1651
+ ''' Generator yielding lines from a file until EOF.
1652
+ Intended for file-like objects that lack a line iteration API.
1653
+ '''
1654
+ if partials is None:
1655
+ partials = []
1656
+ return as_lines(read_from(fp), partials)
1657
+
1658
+ # pylint: disable=redefined-builtin
1659
+ @contextmanager
1660
+ def atomic_filename(
1661
+ filename,
1662
+ exists_ok=False,
1663
+ placeholder=False,
1664
+ dir=None,
1665
+ prefix=None,
1666
+ suffix=None,
1667
+ rename_func=rename,
1668
+ **kw
1669
+ ):
1670
+ ''' A context manager to create `filename` atomicly on completion.
1671
+ This returns a `NamedTemporaryFile` to use to create the file contents.
1672
+ On completion the temporary file is renamed to the target name `filename`.
1673
+
1674
+ Parameters:
1675
+ * `filename`: the file name to create
1676
+ * `exists_ok`: default `False`;
1677
+ if true it not an error if `filename` already exists
1678
+ * `placeholder`: create a placeholder file at `filename`
1679
+ while the real contents are written to the temporary file
1680
+ * `dir`: passed to `NamedTemporaryFile`, specifies the directory
1681
+ to hold the temporary file; the default is `dirname(filename)`
1682
+ to ensure the rename is atomic
1683
+ * `prefix`: passed to `NamedTemporaryFile`, specifies a prefix
1684
+ for the temporary file; the default is a dot (`'.'`) plus the prefix
1685
+ from `splitext(basename(filename))`
1686
+ * `suffix`: passed to `NamedTemporaryFile`, specifies a suffix
1687
+ for the temporary file; the default is the extension obtained
1688
+ from `splitext(basename(filename))`
1689
+ * `rename_func`: a callable accepting `(tempname,filename)`
1690
+ used to rename the temporary file to the final name; the
1691
+ default is `os.rename` and this parametr exists to accept
1692
+ something such as `FSTags.move`
1693
+ Other keyword arguments are passed to the `NamedTemporaryFile` constructor.
1694
+
1695
+ Example:
1696
+
1697
+ >>> import os
1698
+ >>> from os.path import exists as existspath
1699
+ >>> fn = 'test_atomic_filename'
1700
+ >>> with atomic_filename(fn, mode='w') as f:
1701
+ ... assert not existspath(fn)
1702
+ ... print('foo', file=f)
1703
+ ... assert not existspath(fn)
1704
+ ...
1705
+ >>> assert existspath(fn)
1706
+ >>> assert open(fn).read() == 'foo\\n'
1707
+ >>> os.remove(fn)
1708
+ '''
1709
+ if dir is None:
1710
+ dir = dirname(filename)
1711
+ fprefix, fsuffix = splitext(basename(filename))
1712
+ if prefix is None:
1713
+ prefix = '.' + fprefix + '-'
1714
+ if suffix is None:
1715
+ suffix = fsuffix
1716
+ if not exists_ok and existspath(filename):
1717
+ raise FileExistsError(errno.EEXIST, os.strerror(errno.EEXIST), filename)
1718
+ with NamedTemporaryFile(dir=dir, prefix=prefix, suffix=suffix, **kw) as T:
1719
+ if placeholder:
1720
+ # create a placeholder file
1721
+ with open(filename, 'ab' if exists_ok else 'xb'):
1722
+ pass
1723
+ yield T
1724
+ mtime = pfx_call(os.stat, T.name).st_mtime
1725
+ try:
1726
+ pfx_call(shutil.copystat, filename, T.name)
1727
+ except FileNotFoundError:
1728
+ pass
1729
+ except OSError as e:
1730
+ warning(
1731
+ "defaut modes not copied from from placeholder %r: %s", filename, e
1732
+ )
1733
+ else:
1734
+ # we make the attribute like the original, now bump the mtime
1735
+ try:
1736
+ atime = pfx_call(os.stat, filename).st_atime
1737
+ except FileNotFoundError:
1738
+ atime = mtime
1739
+ pfx_call(os.utime, T.name, (atime, mtime))
1740
+ pfx_call(rename_func, T.name, filename)
1741
+ # recreate the temp file so that it can be cleaned up
1742
+ with pfx_call(open, T.name, 'xb'):
1743
+ pass
1744
+
1745
+ class RWFileBlockCache(object):
1746
+ ''' A scratch file for storing data.
1747
+ '''
1748
+
1749
+ def __init__(self, pathname=None, dirpath=None, suffix=None, lock=None):
1750
+ ''' Initialise the file.
1751
+
1752
+ Parameters:
1753
+ * `pathname`: path of file. If None, create a new file with
1754
+ tempfile.mkstemp using dir=`dirpath` and unlink that file once
1755
+ opened.
1756
+ * `dirpath`: location for the file if made by mkstemp as above.
1757
+ * `lock`: an object to use as a mutex, allowing sharing with
1758
+ some outer system. A Lock will be allocated if omitted.
1759
+ '''
1760
+ opathname = pathname
1761
+ if pathname is None:
1762
+ tmpfd, pathname = mkstemp(dir=dirpath, suffix=suffix)
1763
+ self.fd = os.open(pathname, os.O_RDWR | os.O_APPEND)
1764
+ if opathname is None:
1765
+ os.remove(pathname)
1766
+ os.close(tmpfd)
1767
+ self.pathname = None
1768
+ else:
1769
+ self.pathname = pathname
1770
+ if lock is None:
1771
+ lock = Lock()
1772
+ self._lock = lock
1773
+
1774
+ def __str__(self):
1775
+ return "%s(pathname=%s)" % (type(self).__name__, self.pathname)
1776
+
1777
+ def close(self):
1778
+ ''' Close the file descriptors.
1779
+ '''
1780
+ with Pfx("%s.close", self):
1781
+ fd = self.fd
1782
+ if fd is None:
1783
+ warning("fd already closed")
1784
+ else:
1785
+ os.close(fd)
1786
+ self.fd = None
1787
+
1788
+ @property
1789
+ def closed(self):
1790
+ ''' Test whether the file descriptor has been closed.
1791
+ '''
1792
+ return self.fd is None
1793
+
1794
+ def put(self, data):
1795
+ ''' Store `data`, return offset.
1796
+ '''
1797
+ fd = self.fd
1798
+ with self._lock:
1799
+ offset = os.lseek(fd, 0, 1)
1800
+ if len(data) == 0:
1801
+ length = 0
1802
+ else:
1803
+ length = os.write(fd, data)
1804
+ assert length == len(data)
1805
+ return offset
1806
+
1807
+ def get(self, offset, length):
1808
+ ''' Get data from `offset` of length `length`.
1809
+ '''
1810
+ assert length > 0
1811
+ fd = self.fd
1812
+ data = os.pread(fd, length, offset)
1813
+ assert len(data) == length
1814
+ return data
1815
+
1816
+ @contextmanager
1817
+ def gzifopen(path, mode='r', *a, **kw):
1818
+ ''' Context manager to open a file which may be a plain file or a gzipped file.
1819
+
1820
+ If `path` ends with `'.gz'` then the filesystem paths attempted
1821
+ are `path` and `path` without the extension, otherwise the
1822
+ filesystem paths attempted are `path+'.gz'` and `path`. In
1823
+ this way a path ending in `'.gz'` indicates a preference for
1824
+ a gzipped file otherwise an uncompressed file.
1825
+
1826
+ However, if exactly one of the paths exists already then only
1827
+ that path will be used.
1828
+
1829
+ Note that the single character modes `'r'`, `'a'`, `'w'` and `'x'`
1830
+ are text mode for both uncompressed and gzipped opens,
1831
+ like the builtin `open` and *unlike* `gzip.open`.
1832
+ This is to ensure equivalent behaviour.
1833
+ '''
1834
+ compresslevel = kw.pop('compresslevel', 9)
1835
+ path0 = path
1836
+ path, ext = splitext(path)
1837
+ if ext == '.gz':
1838
+ # gzip preferred
1839
+ gzpath = path0
1840
+ path1, path2 = gzpath, path
1841
+ else:
1842
+ # unzipped has precedence
1843
+ gzpath = path0 + '.gz'
1844
+ path1, path2 = path0, gzpath
1845
+ # if exactly one of the files exists, try only that file
1846
+ if existspath(path1) and not existspath(path2):
1847
+ paths = path1,
1848
+ elif existspath(path2) and not existspath(path1):
1849
+ paths = path2,
1850
+ else:
1851
+ paths = path1, path2
1852
+ for openpath in paths:
1853
+ try:
1854
+ with (gzip.open(openpath,
1855
+ (mode + 't' if mode in ('r', 'a', 'w', 'x') else mode), *
1856
+ a, compresslevel=compresslevel, **kw) if
1857
+ openpath.endswith('.gz') else open(openpath, mode, *a, **kw)) as f:
1858
+ yield f
1859
+ except FileNotFoundError:
1860
+ # last path to try
1861
+ if openpath == paths[-1]:
1862
+ raise
1863
+ # not present, try the other file
1864
+ continue
1865
+ # open succeeded, we're done
1866
+ return
1867
+ raise RuntimeError("NOTREACHED")
1868
+
1869
+ if __name__ == '__main__':
1870
+ import cs.fileutils_tests
1871
+ cs.fileutils_tests.selftest(sys.argv)