cs-fileutils 20250429__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cs/fileutils.py ADDED
@@ -0,0 +1,1835 @@
1
+ #!/usr/bin/env python3
2
+ #
3
+ # Assorted convenience functions for files and filenames/pathnames.
4
+ # - Cameron Simpson <cs@cskk.id.au>
5
+
6
+ ''' My grab bag of convenience functions for files and filenames/pathnames.
7
+ '''
8
+
9
+ # pylint: disable=too-many-lines
10
+
11
+ from __future__ import with_statement, print_function, absolute_import
12
+ from contextlib import contextmanager
13
+ import errno
14
+ from functools import partial
15
+ import gzip
16
+ import os
17
+ from os import SEEK_CUR, SEEK_END, SEEK_SET, O_RDONLY, read
18
+ try:
19
+ from os import pread
20
+ except ImportError:
21
+ pread = None
22
+ from os.path import (
23
+ abspath,
24
+ basename,
25
+ dirname,
26
+ exists as existspath,
27
+ isabs as isabspath,
28
+ isdir,
29
+ join as joinpath,
30
+ splitext,
31
+ )
32
+ import shutil
33
+ import stat
34
+ import sys
35
+ from tempfile import TemporaryFile, NamedTemporaryFile, mkstemp
36
+ from threading import Lock, RLock
37
+ import time
38
+
39
+ from cs.buffer import CornuCopyBuffer
40
+ from cs.context import stackattrs
41
+ from cs.deco import fmtdoc, OBSOLETE, strable
42
+ from cs.filestate import FileState
43
+ from cs.fs import shortpath
44
+ from cs.gimmicks import TimeoutError # pylint: disable=redefined-builtin
45
+ from cs.lex import as_lines, cutsuffix, common_prefix
46
+ from cs.logutils import error, warning, debug
47
+ from cs.pfx import Pfx, pfx, pfx_call
48
+ from cs.progress import Progress, progressbar
49
+ from cs.py3 import ustr, bytes, pread # pylint: disable=redefined-builtin
50
+ from cs.range import Range
51
+ from cs.resources import RunState, uses_runstate
52
+ from cs.result import CancellationError
53
+ from cs.threads import locked, NRLock
54
+ from cs.units import BINARY_BYTES_SCALE
55
+
56
+ __version__ = '20250429'
57
+
58
+ DISTINFO = {
59
+ 'keywords': ["python2", "python3"],
60
+ 'classifiers': [
61
+ "Programming Language :: Python",
62
+ "Programming Language :: Python :: 2",
63
+ "Programming Language :: Python :: 3",
64
+ ],
65
+ 'install_requires': [
66
+ 'cs.buffer',
67
+ 'cs.context',
68
+ 'cs.deco',
69
+ 'cs.filestate',
70
+ 'cs.fs>=shortpath',
71
+ 'cs.gimmicks>=TimeoutError',
72
+ 'cs.lex>=20200914',
73
+ 'cs.logutils',
74
+ 'cs.pfx>=pfx_call',
75
+ 'cs.progress',
76
+ 'cs.py3',
77
+ 'cs.range',
78
+ 'cs.resources',
79
+ 'cs.result',
80
+ 'cs.threads',
81
+ 'cs.units',
82
+ ],
83
+ }
84
+
85
+ DEFAULT_POLL_INTERVAL = 1.0
86
+ DEFAULT_READSIZE = 131072
87
+ DEFAULT_TAIL_PAUSE = 0.25
88
+
89
+ def seekable(fp):
90
+ ''' Try to test whether a filelike object is seekable.
91
+
92
+ First try the `IOBase.seekable` method, otherwise try getting a file
93
+ descriptor from `fp.fileno` and `os.stat()`ing that,
94
+ otherwise return `False`.
95
+ '''
96
+ try:
97
+ test = fp.seekable
98
+ except AttributeError:
99
+ try:
100
+ getfd = fp.fileno
101
+ except AttributeError:
102
+ return False
103
+ test = lambda: stat.S_ISREG(os.fstat(getfd()).st_mode)
104
+ return test()
105
+
106
+ def rename_excl(oldpath, newpath):
107
+ ''' Safely rRename `oldpath` to `newpath`.
108
+ Raise `FileExistsError` if `newpath` already exists.
109
+ '''
110
+ with pfx_call(open, newpath, 'xb'):
111
+ pass
112
+ pfx_call(os.rename, oldpath, newpath)
113
+
114
+ @OBSOLETE("rename_excl")
115
+ def saferename(oldpath, newpath):
116
+ ''' Rename a path using `os.rename()`,
117
+ but raise an exception if the target path already exists.
118
+ Note: slightly racey.
119
+ '''
120
+ try:
121
+ os.lstat(newpath)
122
+ raise OSError(errno.EEXIST)
123
+ except OSError as e:
124
+ if e.errno != errno.ENOENT:
125
+ raise
126
+ os.rename(oldpath, newpath)
127
+
128
+ def trysaferename(oldpath, newpath):
129
+ ''' A `saferename()` that returns `True` on success,
130
+ `False` on failure.
131
+ '''
132
+ try:
133
+ saferename(oldpath, newpath)
134
+ except OSError:
135
+ return False
136
+ ##except Exception:
137
+ ## raise
138
+ return True
139
+
140
+ def compare(f1, f2, mode="rb"):
141
+ ''' Compare the contents of two file-like objects `f1` and `f2` for equality.
142
+
143
+ If `f1` or `f2` is a string, open the named file using `mode`
144
+ (default: `"rb"`).
145
+ '''
146
+ if isinstance(f1, str):
147
+ with open(f1, mode) as f1fp:
148
+ return compare(f1fp, f2, mode)
149
+ if isinstance(f2, str):
150
+ with open(f2, mode) as f2fp:
151
+ return compare(f1, f2fp, mode)
152
+ return f1.read() == f2.read()
153
+
154
+ # pylint: disable=too-many-locals,too-many-branches,too-many-statements
155
+ @contextmanager
156
+ def NamedTemporaryCopy(f, progress=False, progress_label=None, **nt_kw):
157
+ ''' A context manager yielding a temporary copy of `filename`
158
+ as returned by `NamedTemporaryFile(**nt_kw)`.
159
+
160
+ Parameters:
161
+ * `f`: the name of the file to copy, or an open binary file,
162
+ or a `CornuCopyBuffer`
163
+ * `progress`: an optional progress indicator, default `False`;
164
+ if a `bool`, show a progress bar for the copy phase if true;
165
+ if an `int`, show a progress bar for the copy phase
166
+ if the file size equals or exceeds the value;
167
+ otherwise it should be a `cs.progress.Progress` instance
168
+ * `progress_label`: option progress bar label,
169
+ only used if a progress bar is made
170
+ Other keyword parameters are passed to `tempfile.NamedTemporaryFile`.
171
+ '''
172
+ if isinstance(f, str):
173
+ # copy named file
174
+ filename = f
175
+ progress_label = (
176
+ "copy " + repr(filename) if progress_label is None else progress_label
177
+ )
178
+ # should we use shutil.copy() and display no progress?
179
+ if progress is False:
180
+ fast_mode = True
181
+ else:
182
+ with Pfx("stat(%r)", filename):
183
+ S = os.stat(filename)
184
+ fast_mode = stat.S_ISREG(S.st_mode)
185
+ if fast_mode:
186
+ with NamedTemporaryFile(**nt_kw) as T:
187
+ with Pfx("shutil.copy(%r,%r)", filename, T.name):
188
+ shutil.copy(filename, T.name)
189
+ yield T
190
+ else:
191
+ with Pfx("open(%r)", filename):
192
+ with open(filename, 'rb') as f2:
193
+ with NamedTemporaryCopy(f2, progress=progress,
194
+ progress_label=progress_label, **nt_kw) as T:
195
+ yield T
196
+ return
197
+ prefix = nt_kw.pop('prefix', None)
198
+ if prefix is None:
199
+ prefix = 'NamedTemporaryCopy'
200
+ # prepare the buffer and try to infer the length
201
+ if isinstance(f, CornuCopyBuffer):
202
+ length = None
203
+ bfr = f
204
+ else:
205
+ if isinstance(f, int):
206
+ fd = f
207
+ bfr = CornuCopyBuffer.from_fd(fd)
208
+ else:
209
+ bfr = CornuCopyBuffer.from_file(f)
210
+ try:
211
+ fd = f.fileno()
212
+ except AttributeError:
213
+ fd = None
214
+ if fd is None:
215
+ length = None
216
+ else:
217
+ S = os.fstat(fd)
218
+ length = S.st_size if stat.S_ISREG(S.st_mode) else None
219
+ # determine whether we need a progress bar
220
+ if isinstance(progress, bool):
221
+ need_bar = progress
222
+ progress = None
223
+ elif isinstance(progress, int):
224
+ need_bar = length is None or length >= progress
225
+ progress = None
226
+ else:
227
+ need_bar = False
228
+ assert isinstance(progress, Progress)
229
+ with NamedTemporaryFile(prefix=prefix, **nt_kw) as T:
230
+ it = (
231
+ bfr if need_bar else progressbar(
232
+ bfr,
233
+ label=progress_label,
234
+ total=length,
235
+ itemlenfunc=len,
236
+ units_scale=BINARY_BYTES_SCALE,
237
+ )
238
+ )
239
+ nbs = 0
240
+ for bs in it:
241
+ while bs:
242
+ nwritten = T.write(bs)
243
+ if progress is not None:
244
+ progress += nwritten
245
+ if nwritten != len(bs):
246
+ warning(
247
+ "NamedTemporaryCopy: %r.write(%d bytes) => %d",
248
+ T.name,
249
+ len(bs),
250
+ nwritten,
251
+ )
252
+ bs = bs[nwritten:]
253
+ else:
254
+ bs = b''
255
+ nbs += nwritten
256
+ bfr.close()
257
+ T.flush()
258
+ if length is not None and nbs != length:
259
+ warning(
260
+ "NamedTemporaryCopy: given length=%s, wrote %d bytes to %r",
261
+ length,
262
+ nbs,
263
+ T.name,
264
+ )
265
+ yield T
266
+
267
+ # pylint: disable=too-many-arguments
268
+ def rewrite(
269
+ filepath,
270
+ srcf,
271
+ mode='w',
272
+ backup_ext=None,
273
+ do_rename=False,
274
+ do_diff=None,
275
+ empty_ok=False,
276
+ overwrite_anyway=False
277
+ ):
278
+ ''' Rewrite the file `filepath` with data from the file object `srcf`.
279
+ Return `True` if the content was changed, `False` if unchanged.
280
+
281
+ Parameters:
282
+ * `filepath`: the name of the file to rewrite.
283
+ * `srcf`: the source file containing the new content.
284
+ * `mode`: the write-mode for the file, default `'w'` (for text);
285
+ use `'wb'` for binary data.
286
+ * `empty_ok`: if true (default `False`),
287
+ do not raise `ValueError` if the new data are empty.
288
+ * `overwrite_anyway`: if true (default `False`),
289
+ skip the content check and overwrite unconditionally.
290
+ * `backup_ext`: if a nonempty string,
291
+ take a backup of the original at `filepath + backup_ext`.
292
+ * `do_diff`: if not `None`, call `do_diff(filepath,tempfile)`.
293
+ * `do_rename`: if true (default `False`),
294
+ rename the temp file to `filepath`
295
+ after copying the permission bits.
296
+ Otherwise (default), copy the tempfile to `filepath`;
297
+ this preserves the file's inode and permissions etc.
298
+ '''
299
+ with Pfx("rewrite(%r)", filepath):
300
+ with NamedTemporaryFile(dir=dirname(filepath), mode=mode) as T:
301
+ T.write(srcf.read())
302
+ T.flush()
303
+ if not empty_ok:
304
+ st = os.stat(T.name)
305
+ if st.st_size == 0:
306
+ raise ValueError("no data in temp file")
307
+ if do_diff or not overwrite_anyway:
308
+ # need to compare data
309
+ if compare(T.name, filepath):
310
+ # data the same, do nothing
311
+ return False
312
+ if do_diff:
313
+ # call the supplied differ
314
+ do_diff(filepath, T.name)
315
+ if do_rename:
316
+ # rename new file into old path
317
+ # tries to preserve perms, but does nothing for other metadata
318
+ shutil.copymode(filepath, T.name)
319
+ if backup_ext:
320
+ os.link(filepath, filepath + backup_ext)
321
+ os.rename(T.name, filepath)
322
+ else:
323
+ # overwrite old file - preserves perms, ownership, hard links
324
+ if backup_ext:
325
+ shutil.copy2(filepath, filepath + backup_ext)
326
+ shutil.copyfile(T.name, filepath)
327
+ return True
328
+
329
+ @contextmanager
330
+ def rewrite_cmgr(filepath, mode='w', **kw):
331
+ ''' Rewrite a file, presented as a context manager.
332
+
333
+ Parameters:
334
+ * `mode`: file write mode, defaulting to "w" for text.
335
+
336
+ Other keyword parameters are passed to `rewrite()`.
337
+
338
+ Example:
339
+
340
+ with rewrite_cmgr(pathname, do_rename=True) as f:
341
+ ... write new content to f ...
342
+ '''
343
+ with NamedTemporaryFile(mode=mode) as T:
344
+ yield T
345
+ T.flush()
346
+ with open(T.name, 'rb') as f:
347
+ return rewrite(filepath, mode='wb', srcf=f, **kw)
348
+
349
+ def abspath_from_file(path, from_file):
350
+ ''' Return the absolute path of `path` with respect to `from_file`,
351
+ as one might do for an include file.
352
+ '''
353
+ if not isabspath(path):
354
+ if not isabspath(from_file):
355
+ from_file = abspath(from_file)
356
+ path = joinpath(dirname(from_file), path)
357
+ return path
358
+
359
+ def poll_file(path, old_state, reload_file, missing_ok=False):
360
+ ''' Watch a file for modification by polling its state as obtained
361
+ by `FileState()`.
362
+ Call `reload_file(path)` if the state changes.
363
+ Return `(new_state,reload_file(path))` if the file was modified
364
+ and was unchanged (stable state) before and after the reload_file().
365
+ Otherwise return `(None,None)`.
366
+
367
+ This may raise an `OSError` if the `path` cannot be `os.stat()`ed
368
+ and of course for any exceptions that occur calling `reload_file`.
369
+
370
+ If `missing_ok` is true then a failure to `os.stat()` which
371
+ raises `OSError` with `ENOENT` will just return `(None,None)`.
372
+ '''
373
+ try:
374
+ new_state = FileState(path)
375
+ except OSError as e:
376
+ if e.errno == errno.ENOENT:
377
+ if missing_ok:
378
+ return None, None
379
+ raise
380
+ if old_state is None or old_state != new_state:
381
+ # first stat or changed stat
382
+ R = reload_file(path)
383
+ try:
384
+ new_new_state = FileState(path)
385
+ except OSError as e:
386
+ if e.errno == errno.ENOENT:
387
+ if missing_ok:
388
+ return None, None
389
+ raise
390
+ # make sure file was unchanged
391
+ if new_new_state == new_state:
392
+ return new_state, R
393
+ return None, None
394
+
395
+ def files_property(func):
396
+ ''' A property whose value reloads if any of a list of files changes.
397
+
398
+ Note: this is just the default mode for `make_files_property`.
399
+
400
+ `func` accepts the file path and returns the new value.
401
+ The underlying attribute name is `'_'+func.__name__`,
402
+ the default from `make_files_property()`.
403
+ The attribute *{attr_name}*`_lock` is a mutex controlling access to the property.
404
+ The attributes *{attr_name}*`_filestates` and *{attr_name}*`_paths` track the
405
+ associated file states.
406
+ The attribute *{attr_name}*`_lastpoll` tracks the last poll time.
407
+
408
+ The decorated function is passed the current list of files
409
+ and returns the new list of files and the associated value.
410
+
411
+ One example use would be a configuration file with recurive
412
+ include operations; the inner function would parse the first
413
+ file in the list, and the parse would accumulate this filename
414
+ and those of any included files so that they can be monitored,
415
+ triggering a fresh parse if one changes.
416
+
417
+ Example:
418
+
419
+ class C(object):
420
+ def __init__(self):
421
+ self._foo_path = '.foorc'
422
+ @files_property
423
+ def foo(self,paths):
424
+ new_paths, result = parse(paths[0])
425
+ return new_paths, result
426
+
427
+ The load function is called on the first access and on every
428
+ access thereafter where an associated file's `FileState` has
429
+ changed and the time since the last successful load exceeds
430
+ the poll_rate (1s). An attempt at avoiding races is made by
431
+ ignoring reloads that raise exceptions and ignoring reloads
432
+ where files that were stat()ed during the change check have
433
+ changed state after the load.
434
+ '''
435
+ return make_files_property()(func)
436
+
437
+ # pylint: disable=too-many-statements
438
+ @fmtdoc
439
+ def make_files_property(
440
+ attr_name=None, unset_object=None, poll_rate=DEFAULT_POLL_INTERVAL
441
+ ):
442
+ ''' Construct a decorator that watches multiple associated files.
443
+
444
+ Parameters:
445
+ * `attr_name`: the underlying attribute, default: `'_'+func.__name__`
446
+ * `unset_object`: the sentinel value for "uninitialised", default: `None`
447
+ * `poll_rate`: how often in seconds to poll the file for changes,
448
+ default from `DEFAULT_POLL_INTERVAL`: `{DEFAULT_POLL_INTERVAL}`
449
+
450
+ The attribute *attr_name*`_lock` controls access to the property.
451
+ The attributes *attr_name*`_filestates` and *attr_name*`_paths` track the
452
+ associated files' state.
453
+ The attribute *attr_name*`_lastpoll` tracks the last poll time.
454
+
455
+ The decorated function is passed the current list of files
456
+ and returns the new list of files and the associated value.
457
+
458
+ One example use would be a configuration file with recursive
459
+ include operations; the inner function would parse the first
460
+ file in the list, and the parse would accumulate this filename
461
+ and those of any included files so that they can be monitored,
462
+ triggering a fresh parse if one changes.
463
+
464
+ Example:
465
+
466
+ class C(object):
467
+ def __init__(self):
468
+ self._foo_path = '.foorc'
469
+ @files_property
470
+ def foo(self,paths):
471
+ new_paths, result = parse(paths[0])
472
+ return new_paths, result
473
+
474
+ The load function is called on the first access and on every
475
+ access thereafter where an associated file's `FileState` has
476
+ changed and the time since the last successful load exceeds
477
+ the `poll_rate`.
478
+
479
+ An attempt at avoiding races is made by
480
+ ignoring reloads that raise exceptions and ignoring reloads
481
+ where files that were `os.stat()`ed during the change check have
482
+ changed state after the load.
483
+ '''
484
+
485
+ # pylint: disable=too-many-statements
486
+ def made_files_property(func):
487
+ if attr_name is None:
488
+ attr_value = '_' + func.__name__
489
+ else:
490
+ attr_value = attr_name
491
+ attr_lock = attr_value + '_lock'
492
+ attr_filestates = attr_value + '_filestates'
493
+ attr_paths = attr_value + '_paths'
494
+ attr_lastpoll = attr_value + '_lastpoll'
495
+
496
+ # pylint: disable=too-many-statements,too-many-branches
497
+ def getprop(self):
498
+ ''' Try to reload the property value from the file if the property value
499
+ is stale and the file has been modified since the last reload.
500
+ '''
501
+ with getattr(self, attr_lock):
502
+ now = time.time()
503
+ then = getattr(self, attr_lastpoll, None)
504
+ if then is None or then + poll_rate <= now:
505
+ setattr(self, attr_lastpoll, now)
506
+ old_paths = getattr(self, attr_paths)
507
+ old_filestates = getattr(self, attr_filestates, None)
508
+ preload_filestate_map = {}
509
+ if old_filestates is None:
510
+ changed = True
511
+ else:
512
+ changed = False
513
+ # Instead of breaking out of the loop below on the first change
514
+ # found we actually stat every file path because we want to
515
+ # maximise the coverage of the stability check after the load.
516
+ for path, old_filestate in zip(old_paths, old_filestates):
517
+ try:
518
+ new_filestate = FileState(path)
519
+ except OSError:
520
+ changed = True
521
+ else:
522
+ preload_filestate_map[path] = new_filestate
523
+ if old_filestate != new_filestate:
524
+ changed = True
525
+ if changed:
526
+ try:
527
+ new_paths, new_value = func(self, old_paths)
528
+ new_filestates = [FileState(new_path) for new_path in new_paths]
529
+ except NameError:
530
+ raise
531
+ except AttributeError:
532
+ raise
533
+ except Exception as e: # pylint: disable=broad-except
534
+ new_value = getattr(self, attr_value, unset_object)
535
+ if new_value is unset_object:
536
+ raise
537
+ debug(
538
+ "exception reloading .%s, keeping cached value: %s",
539
+ attr_value, e
540
+ )
541
+ else:
542
+ # examine new filestates in case they changed during load
543
+ # _if_ we knew about them from the earlier load
544
+ stable = True
545
+ for path, new_filestate in zip(new_paths, new_filestates):
546
+ if path in preload_filestate_map:
547
+ if preload_filestate_map[path] != new_filestate:
548
+ stable = False
549
+ break
550
+ if stable:
551
+ setattr(self, attr_value, new_value)
552
+ setattr(self, attr_paths, new_paths)
553
+ setattr(self, attr_filestates, new_filestates)
554
+ return getattr(self, attr_value, unset_object)
555
+
556
+ return property(getprop)
557
+
558
+ return made_files_property
559
+
560
+ # pylint: disable=too-many-branches
561
+ @uses_runstate
562
+ @pfx
563
+ def makelockfile(
564
+ path,
565
+ *,
566
+ ext=None,
567
+ poll_interval=None,
568
+ timeout=None,
569
+ runstate: RunState,
570
+ keepopen=False,
571
+ max_interval=37,
572
+ ):
573
+ ''' Create a lockfile and return its path.
574
+ If `keepopen`, return a `(lockpath,lockfd)` 2-tuple.
575
+
576
+ The lockfile can be removed with `os.remove`.
577
+ This is the core functionality supporting the `lockfile()`
578
+ context manager.
579
+
580
+ Parameters:
581
+ * `path`: the base associated with the lock file,
582
+ often the filesystem object whose access is being managed.
583
+ * `ext`: the extension to the base used to construct the lockfile name.
584
+ Default: ".lock"
585
+ * `timeout`: maximum time to wait before failing.
586
+ Default: `None` (wait forever).
587
+ Note that zero is an accepted value
588
+ and requires the lock to succeed on the first attempt.
589
+ * `poll_interval`: polling frequency when timeout is not 0.
590
+ * `runstate`: optional `RunState` duck instance supporting cancellation.
591
+ Note that if a cancelled `RunState` is provided
592
+ no attempt will be made to make the lockfile.
593
+ * `keepopen`: optional flag, default `False`:
594
+ if true, do not close the lockfile and return `(lockpath,lockfd)`
595
+ being the lock file path and the open file descriptor
596
+ '''
597
+ if poll_interval is None:
598
+ poll_interval = DEFAULT_POLL_INTERVAL
599
+ if ext is None:
600
+ ext = '.lock'
601
+ if timeout is not None and timeout < 0:
602
+ raise ValueError("timeout should be None or >= 0, not %r" % (timeout,))
603
+ start = None
604
+ lockpath = path + ext
605
+ with Pfx("makelockfile: %r", lockpath):
606
+ while True:
607
+ if runstate.cancelled:
608
+ warning(
609
+ "%s cancelled; pid %d waited %ds", runstate, os.getpid(),
610
+ 0 if start is None else time.time() - start
611
+ )
612
+ raise CancellationError("lock acquisition cancelled")
613
+ try:
614
+ lockfd = os.open(lockpath, os.O_CREAT | os.O_EXCL | os.O_RDWR, 0)
615
+ except OSError as e:
616
+ if e.errno != errno.EEXIST:
617
+ raise
618
+ if timeout is not None and timeout <= 0:
619
+ # immediate failure
620
+ # pylint: disable=raise-missing-from
621
+ raise TimeoutError("pid %d timed out" % (os.getpid(),), timeout)
622
+ now = time.time()
623
+ # post: timeout is None or timeout > 0
624
+ if start is None:
625
+ # first try - set up counters
626
+ start = now
627
+ complaint_last = start
628
+ complaint_interval = 2 * max(DEFAULT_POLL_INTERVAL, poll_interval)
629
+ elif now - complaint_last >= complaint_interval:
630
+ warning("pid %d waited %ds", os.getpid(), now - start)
631
+ complaint_last = now
632
+ complaint_interval = min(complaint_interval * 2, max_interval)
633
+ # post: start is set
634
+ if timeout is None:
635
+ sleep_for = poll_interval
636
+ else:
637
+ sleep_for = min(poll_interval, start + timeout - now)
638
+ # test for timeout
639
+ if sleep_for <= 0:
640
+ # pylint: disable=raise-missing-from
641
+ raise TimeoutError("pid %d timed out" % (os.getpid(),), timeout)
642
+ time.sleep(sleep_for)
643
+ continue
644
+ else:
645
+ break
646
+ if keepopen:
647
+ return lockpath, lockfd
648
+ os.close(lockfd)
649
+ return lockpath
650
+
651
+ @contextmanager
652
+ def lockfile(path, _lockmap={}, _lockmap_lock=Lock(), **makelockfile_kw):
653
+ ''' A context manager which takes and holds a lock file.
654
+ An open file descriptor is kept for the lock file as well
655
+ to aid locating the process holding the lock file using eg `lsof`.
656
+ This is just a context manager shim for `makelockfile`
657
+ and all keyword arguments are plumbed through.
658
+ '''
659
+ with _lockmap_lock:
660
+ try:
661
+ nrlock = _lockmap[path]
662
+ except KeyError:
663
+ nrlock = _lockmap[path] = NRLock(path)
664
+ with nrlock:
665
+ lockpath, lockfd = makelockfile(path, keepopen=True, **makelockfile_kw)
666
+ try:
667
+ yield lockpath
668
+ finally:
669
+ try:
670
+ pfx_call(os.remove, lockpath)
671
+ except FileNotFoundError as e:
672
+ warning("lock file already removed: %s", e)
673
+ pfx_call(os.close, lockfd)
674
+
675
+ def crop_name(name, ext=None, name_max=255):
676
+ ''' Crop a file basename so as not to exceed `name_max` in length.
677
+ Return the original `name` if it already short enough.
678
+ Otherwise crop `name` before the file extension
679
+ to make it short enough.
680
+
681
+ Parameters:
682
+ * `name`: the file basename to crop
683
+ * `ext`: optional file extension;
684
+ the default is to infer the extension with `os.path.splitext`.
685
+ * `name_max`: optional maximum length, default: `255`
686
+ '''
687
+ if ext is None:
688
+ base, ext = splitext(name)
689
+ else:
690
+ base = cutsuffix(name, ext)
691
+ if base is name:
692
+ base, ext = splitext(name)
693
+ max_base_len = name_max - len(ext)
694
+ if max_base_len < 0:
695
+ raise ValueError(
696
+ "cannot crop name before ext %r to <=%s: name=%r" %
697
+ (ext, name_max, name)
698
+ )
699
+ if len(base) <= max_base_len:
700
+ return name
701
+ return base[:max_base_len] + ext
702
+
703
+ def max_suffix(dirpath, prefix):
704
+ ''' Compute the highest existing numeric suffix
705
+ for names starting with `prefix`.
706
+
707
+ This is generally used as a starting point for picking
708
+ a new numeric suffix.
709
+ '''
710
+ prefix = ustr(prefix)
711
+ maxn = None
712
+ pfxlen = len(prefix)
713
+ for e in os.listdir(dirpath):
714
+ e = ustr(e)
715
+ if len(e) <= pfxlen or not e.startswith(prefix):
716
+ continue
717
+ tail = e[pfxlen:]
718
+ if tail.isdigit():
719
+ n = int(tail)
720
+ if maxn is None:
721
+ maxn = n
722
+ elif maxn < n:
723
+ maxn = n
724
+ return maxn
725
+
726
+ # pylint: disable=too-many-branches
727
+ def mkdirn(path, sep=''):
728
+ ''' Create a new directory named `path+sep+n`,
729
+ where `n` exceeds any name already present.
730
+
731
+ Parameters:
732
+ * `path`: the basic directory path.
733
+ * `sep`: a separator between `path` and `n`.
734
+ Default: `''`
735
+ '''
736
+ with Pfx("mkdirn(path=%r, sep=%r)", path, sep):
737
+ if os.sep in sep:
738
+ raise ValueError("sep contains os.sep (%r)" % (os.sep,))
739
+ opath = path
740
+ if not path:
741
+ path = '.' + os.sep
742
+
743
+ if path.endswith(os.sep):
744
+ if sep:
745
+ raise ValueError(
746
+ "mkdirn(path=%r, sep=%r): using non-empty sep"
747
+ " with a trailing %r seems nonsensical" % (path, sep, os.sep)
748
+ )
749
+ dirpath = path[:-len(os.sep)]
750
+ prefix = ''
751
+ else:
752
+ dirpath = dirname(path)
753
+ if not dirpath:
754
+ dirpath = '.'
755
+ prefix = basename(path) + sep
756
+
757
+ if not isdir(dirpath):
758
+ error("parent not a directory: %r", dirpath)
759
+ return None
760
+
761
+ # do a quick scan of the directory to find
762
+ # if any names of the desired form already exist
763
+ # in order to start after them
764
+ maxn = max_suffix(dirpath, prefix)
765
+ if maxn is None:
766
+ newn = 0
767
+ else:
768
+ newn = maxn
769
+
770
+ while True:
771
+ newn += 1
772
+ newpath = path + sep + str(newn)
773
+ try:
774
+ os.mkdir(newpath)
775
+ except OSError as e:
776
+ if e.errno == errno.EEXIST:
777
+ # taken, try new value
778
+ continue
779
+ error("mkdir(%s): %s", newpath, e)
780
+ return None
781
+ if not opath:
782
+ newpath = basename(newpath)
783
+ return newpath
784
+
785
+ def tmpdir():
786
+ ''' Return the pathname of the default temporary directory for scratch data,
787
+ the environment variable `$TMPDIR` or `'/tmp'`.
788
+ '''
789
+ return os.environ.get('TMPDIR', '/tmp')
790
+
791
+ def tmpdirn(tmp=None):
792
+ ''' Make a new temporary directory with a numeric suffix.
793
+ '''
794
+ if tmp is None:
795
+ tmp = tmpdir()
796
+ return mkdirn(joinpath(tmp, basename(sys.argv[0])))
797
+
798
+ def find(path, select=None, sort_names=True):
799
+ ''' Walk a directory tree `path`
800
+ yielding selected paths.
801
+
802
+ Note: not selecting a directory prunes all its descendants.
803
+ '''
804
+ if select is None:
805
+ select = lambda _: True
806
+ for dirpath, dirnames, filenames in os.walk(path):
807
+ if select(dirpath):
808
+ yield dirpath
809
+ else:
810
+ dirnames[:] = []
811
+ continue
812
+ if sort_names:
813
+ dirnames[:] = sorted(dirnames)
814
+ filenames[:] = sorted(filenames)
815
+ for filename in filenames:
816
+ filepath = joinpath(dirpath, filename)
817
+ if select(filepath):
818
+ yield filepath
819
+ dirnames[:] = [
820
+ dirname for dirname in dirnames if select(joinpath(dirpath, dirname))
821
+ ]
822
+
823
+ def findup(path, test, first=False):
824
+ ''' Test the pathname `abspath(path)` and each of its ancestors
825
+ against the callable `test`,
826
+ yielding paths satisfying the test.
827
+
828
+ If `first` is true (default `False`)
829
+ this function always yields exactly one value,
830
+ either the first path satisfying the test or `None`.
831
+ This mode supports a use such as:
832
+
833
+ matched_path = next(findup(path, test, first=True))
834
+ # post condition: matched_path will be `None` on no match
835
+ # otherwise the first matching path
836
+ '''
837
+ path = abspath(path)
838
+ while True:
839
+ if test(path):
840
+ yield path
841
+ if first:
842
+ return
843
+ up = dirname(path)
844
+ if up == path:
845
+ break
846
+ path = up
847
+ if first:
848
+ yield None
849
+
850
+ def common_path_prefix(*paths):
851
+ ''' Return the common path prefix of the `paths`.
852
+
853
+ Note that the common prefix of `'/a/b/c1'` and `'/a/b/c2'`
854
+ is `'/a/b/'`, _not_ `'/a/b/c'`.
855
+
856
+ Callers may find it useful to preadjust the supplied paths
857
+ with `normpath`, `abspath` or `realpath` from `os.path`;
858
+ see the `os.path` documentation for the various caveats
859
+ which go with those functions.
860
+
861
+ Examples:
862
+
863
+ >>> # the obvious
864
+ >>> common_path_prefix('', '')
865
+ ''
866
+ >>> common_path_prefix('/', '/')
867
+ '/'
868
+ >>> common_path_prefix('a', 'a')
869
+ 'a'
870
+ >>> common_path_prefix('a', 'b')
871
+ ''
872
+ >>> # nonempty directory path prefixes end in os.sep
873
+ >>> common_path_prefix('/', '/a')
874
+ '/'
875
+ >>> # identical paths include the final basename
876
+ >>> common_path_prefix('p/a', 'p/a')
877
+ 'p/a'
878
+ >>> # the comparison does not normalise paths
879
+ >>> common_path_prefix('p//a', 'p//a')
880
+ 'p//a'
881
+ >>> common_path_prefix('p//a', 'p//b')
882
+ 'p//'
883
+ >>> common_path_prefix('p//a', 'p/a')
884
+ 'p/'
885
+ >>> common_path_prefix('p/a', 'p/b')
886
+ 'p/'
887
+ >>> # the comparison strips complete unequal path components
888
+ >>> common_path_prefix('p/a1', 'p/a2')
889
+ 'p/'
890
+ >>> common_path_prefix('p/a/b1', 'p/a/b2')
891
+ 'p/a/'
892
+ >>> # contrast with cs.lex.common_prefix
893
+ >>> common_prefix('abc/def', 'abc/def1')
894
+ 'abc/def'
895
+ >>> common_path_prefix('abc/def', 'abc/def1')
896
+ 'abc/'
897
+ >>> common_prefix('abc/def', 'abc/def1', 'abc/def2')
898
+ 'abc/def'
899
+ >>> common_path_prefix('abc/def', 'abc/def1', 'abc/def2')
900
+ 'abc/'
901
+ '''
902
+ prefix = common_prefix(*paths)
903
+ if not prefix.endswith(os.sep):
904
+ path0 = paths[0]
905
+ if not all(map(lambda path: path == path0, paths)):
906
+ # strip basename from prefix
907
+ base = basename(prefix)
908
+ prefix = prefix[:-len(base)]
909
+ return prefix
910
+
911
+ class Pathname(str):
912
+ ''' Subclass of str presenting convenience properties useful for
913
+ format strings related to file paths.
914
+ '''
915
+
916
+ _default_prefixes = (('$HOME/', '~/'),)
917
+
918
+ def __format__(self, fmt_spec):
919
+ ''' Calling format(<Pathname>, fmt_spec) treat `fmt_spec` as a new style
920
+ formatting string with a single positional parameter of `self`.
921
+ '''
922
+ if fmt_spec == '':
923
+ return str(self)
924
+ return fmt_spec.format(self)
925
+
926
+ @property
927
+ def dirname(self):
928
+ ''' The dirname of the Pathname.
929
+ '''
930
+ return Pathname(dirname(self))
931
+
932
+ @property
933
+ def basename(self):
934
+ ''' The basename of this Pathname.
935
+ '''
936
+ return Pathname(basename(self))
937
+
938
+ @property
939
+ def abs(self):
940
+ ''' The absolute form of this Pathname.
941
+ '''
942
+ return Pathname(abspath(self))
943
+
944
+ @property
945
+ def isabs(self):
946
+ ''' Whether this Pathname is an absolute Pathname.
947
+ '''
948
+ return isabspath(self)
949
+
950
+ @property
951
+ def short(self):
952
+ ''' The shortened form of this Pathname.
953
+ '''
954
+ return self.shorten()
955
+
956
+ def shorten(self, prefixes=None):
957
+ ''' Shorten a Pathname using ~ and ~user.
958
+ '''
959
+ return shortpath(self, prefixes=prefixes)
960
+
961
+ def iter_fd(fd, **kw):
962
+ ''' Iterate over data from the file descriptor `fd`.
963
+ '''
964
+ for bs in CornuCopyBuffer.from_fd(fd, **kw):
965
+ yield bs
966
+
967
+ def iter_file(f, **kw):
968
+ ''' Iterate over data from the file `f`.
969
+ '''
970
+ for bs in CornuCopyBuffer.from_file(f, **kw):
971
+ yield bs
972
+
973
+ def byteses_as_fd(bss, **kw):
974
+ ''' Deliver the iterable of bytes `bss` as a readable file descriptor.
975
+ Return the file descriptor.
976
+ Any keyword arguments are passed to `CornuCopyBuffer.as_fd`.
977
+
978
+ Example:
979
+
980
+ # present a passphrase for use as in input file descrptor
981
+ # for a subprocess
982
+ rfd = byteses_as_fd([(passphrase + '\n').encode()])
983
+ '''
984
+ return CornuCopyBuffer(bss).as_fd(**kw)
985
+
986
+ def datafrom_fd(fd, offset=None, readsize=None, aligned=True, maxlength=None):
987
+ ''' General purpose reader for file descriptors yielding data from `offset`.
988
+ **Note**: This does not move the file descriptor position
989
+ **if** the file is seekable.
990
+
991
+ Parameters:
992
+ * `fd`: the file descriptor from which to read.
993
+ * `offset`: the offset from which to read.
994
+ If omitted, use the current file descriptor position.
995
+ * `readsize`: the read size, default: `DEFAULT_READSIZE`
996
+ * `aligned`: if true (the default), the first read is sized
997
+ to align the new offset with a multiple of `readsize`.
998
+ * `maxlength`: if specified yield no more than this many bytes of data.
999
+ '''
1000
+ try:
1001
+ cur_offset = os.lseek(fd, 0, SEEK_CUR)
1002
+ is_seekable = True
1003
+ except OSError:
1004
+ cur_offset = 0 # guess
1005
+ is_seekable = False
1006
+ if offset is None:
1007
+ offset = cur_offset
1008
+ if readsize is None:
1009
+ readsize = DEFAULT_READSIZE
1010
+ if aligned:
1011
+ # do an initial read to align all subsequent reads
1012
+ alignsize = offset % readsize
1013
+ if alignsize > 0:
1014
+ if maxlength is not None:
1015
+ alignsize = min(maxlength, alignsize)
1016
+ bs = pread(fd, alignsize, offset) if is_seekable else read(fd, alignsize)
1017
+ if not bs:
1018
+ return
1019
+ yield bs
1020
+ bslen = len(bs)
1021
+ offset += bslen
1022
+ if maxlength is not None:
1023
+ maxlength -= bslen
1024
+ while maxlength is None or maxlength > 0:
1025
+ if maxlength is not None:
1026
+ readsize = min(readsize, maxlength)
1027
+ bs = pread(fd, readsize, offset) if is_seekable else read(fd, readsize)
1028
+ if not bs:
1029
+ return
1030
+ yield bs
1031
+ bslen = len(bs)
1032
+ offset += bslen
1033
+ if maxlength is not None:
1034
+ maxlength -= bslen
1035
+
1036
+ @strable(open_func=lambda filename: os.open(filename, flags=O_RDONLY))
1037
+ def datafrom(f, offset=None, readsize=None, maxlength=None):
1038
+ ''' General purpose reader for files yielding data from `offset`.
1039
+
1040
+ *WARNING*: this function might move the file pointer.
1041
+
1042
+ Parameters:
1043
+ * `f`: the file from which to read data;
1044
+ if a string, the file is opened with mode="rb";
1045
+ if an int, treated as an OS file descriptor;
1046
+ otherwise presumed to be a file-like object.
1047
+ If that object has a `.fileno()` method, treat that as an
1048
+ OS file descriptor and use it.
1049
+ * `offset`: starting offset for the data
1050
+ * `maxlength`: optional maximum amount of data to yield
1051
+ * `readsize`: read size, default DEFAULT_READSIZE.
1052
+
1053
+ For file-like objects, the read1 method is used in preference
1054
+ to read if available. The file pointer is briefly moved during
1055
+ fetches.
1056
+ '''
1057
+ if readsize is None:
1058
+ readsize = DEFAULT_READSIZE
1059
+ if isinstance(f, int):
1060
+ # operating system file descriptor
1061
+ for data in datafrom_fd(f, offset=offset, readsize=readsize,
1062
+ maxlength=maxlength):
1063
+ yield data
1064
+ return
1065
+ # see if the file has a fileno; if so use datafrom_fd
1066
+ try:
1067
+ get_fileno = f.fileno
1068
+ except AttributeError:
1069
+ pass
1070
+ else:
1071
+ fd = get_fileno()
1072
+ if stat.S_ISREG(os.fstat(fd).st_mode):
1073
+ for data in datafrom_fd(fd, offset=offset, readsize=readsize,
1074
+ maxlength=maxlength):
1075
+ yield data
1076
+ return
1077
+ # presume a file-like object
1078
+ try:
1079
+ read1 = f.read1
1080
+ except AttributeError:
1081
+ read1 = f.read
1082
+ tell = f.tell
1083
+ seek = f.seek
1084
+ while maxlength is None or maxlength > 0:
1085
+ offset0 = tell()
1086
+ seek(offset, SEEK_SET)
1087
+ n = readsize
1088
+ if maxlength is not None:
1089
+ n = min(n, maxlength)
1090
+ bs = read1(n)
1091
+ seek(offset0)
1092
+ if not bs:
1093
+ break
1094
+ yield bs
1095
+ offset += len(bs)
1096
+ if maxlength is not None:
1097
+ maxlength -= len(bs)
1098
+ assert maxlength >= 0
1099
+
1100
+ class ReadMixin(object):
1101
+ ''' Useful read methods to accomodate modes not necessarily available in a class.
1102
+
1103
+ Note that this mixin presumes that the attribute `self._lock`
1104
+ is a threading.RLock like context manager.
1105
+
1106
+ Classes using this mixin should consider overriding the default
1107
+ .datafrom method with something more efficient or direct.
1108
+ '''
1109
+
1110
+ def datafrom(self, offset, readsize=None):
1111
+ ''' Yield data from the specified `offset` onward in some
1112
+ approximation of the "natural" chunk size.
1113
+
1114
+ *NOTE*: UNLIKE the global datafrom() function, this method
1115
+ MUST NOT move the logical file position. Implementors may need
1116
+ to save and restore the file pointer within a lock around
1117
+ the I/O if they do not use a direct access method like
1118
+ os.pread.
1119
+
1120
+ The aspiration here is to read data with only a single call
1121
+ to the underlying storage, and to return the chunks in
1122
+ natural sizes instead of some default read size.
1123
+
1124
+ Classes using this mixin must implement this method.
1125
+ '''
1126
+ raise NotImplementedError(
1127
+ "return an iterator which does not change the file offset"
1128
+ )
1129
+
1130
+ def bufferfrom(self, offset):
1131
+ ''' Return a CornuCopyBuffer from the specified `offset`.
1132
+ '''
1133
+ return CornuCopyBuffer(self.datafrom(offset), offset=offset)
1134
+
1135
+ # pylint: disable=too-many-branches
1136
+ def read(self, size=-1, offset=None, longread=False):
1137
+ ''' Read up to `size` bytes, honouring the "single system call"
1138
+ spirit unless `longread` is true.
1139
+
1140
+ Parameters:
1141
+ * `size`: the number of bytes requested. A size of -1 requests
1142
+ all bytes to the end of the file.
1143
+ * `offset`: the starting point of the read; if None, use the
1144
+ current file position; if not None, seek to this position
1145
+ before reading, even if `size` == 0.
1146
+ * `longread`: switch from "single system call" to "as many
1147
+ as required to obtain `size` bytes"; short data will still
1148
+ be returned if the file is too short.
1149
+ '''
1150
+ bfr = getattr(self, '_reading_bfr', None)
1151
+ if offset is None:
1152
+ if bfr is None:
1153
+ offset = self.tell()
1154
+ else:
1155
+ offset = bfr.offset
1156
+ if size == -1:
1157
+ size = max(len(self) - offset, 0)
1158
+ if size == 0:
1159
+ return b''
1160
+ if longread:
1161
+ bss = []
1162
+ while size > 0:
1163
+ with self._lock:
1164
+ # We need to retest on each iteration because other reads
1165
+ # may be interleaved, interfering with the buffer.
1166
+ if bfr is None or bfr.offset != offset:
1167
+ ##if bfr is not None:
1168
+ ## info(
1169
+ ## "ReadMixin.read: new bfr from offset=%d (old bfr was %s)",
1170
+ ## offset, bfr)
1171
+ self._reading_bfr = bfr = self.bufferfrom(offset)
1172
+ bfr.extend(1, short_ok=True)
1173
+ if not bfr.buf:
1174
+ break
1175
+ consume = min(size, len(bfr.buf))
1176
+ assert consume > 0
1177
+ chunk = bfr.take(consume)
1178
+ offset += consume
1179
+ self.seek(offset)
1180
+ assert len(chunk) == consume
1181
+ if longread:
1182
+ bss.append(chunk)
1183
+ else:
1184
+ return chunk
1185
+ size -= consume
1186
+ if not bss:
1187
+ return b''
1188
+ if len(bss) == 1:
1189
+ return bss[0]
1190
+ return b''.join(bss)
1191
+
1192
+ def read_n(self, n):
1193
+ ''' Read `n` bytes of data and return them.
1194
+
1195
+ Unlike traditional file.read(), RawIOBase.read() may return short
1196
+ data, thus this workalike, which may only return short data if it
1197
+ hits EOF.
1198
+ '''
1199
+ if n < 1:
1200
+ raise ValueError("n two low, expected >=1, got %r" % (n,))
1201
+ data = bytearray(n)
1202
+ nread = self.readinto(data)
1203
+ if nread != len(data):
1204
+ raise RuntimeError(
1205
+ " WRONG NUMBER OF BYTES(%d): data=%s" % (nread, data)
1206
+ )
1207
+ return memoryview(data)[:nread] if nread != n else data
1208
+
1209
+ @locked
1210
+ def readinto(self, barray):
1211
+ ''' Read data into a bytearray.
1212
+ '''
1213
+ needed = len(barray)
1214
+ boff = 0
1215
+ for bs in self.datafrom(self.tell()):
1216
+ if not bs:
1217
+ break
1218
+ if len(bs) > needed:
1219
+ bs = memoryview(bs)[:needed]
1220
+ bs_len = len(bs)
1221
+ boff2 = boff + bs_len
1222
+ barray[boff:boff2] = bs
1223
+ boff = boff2
1224
+ needed -= bs_len
1225
+ return boff
1226
+
1227
+ class BackedFile(ReadMixin):
1228
+ ''' A RawIOBase duck type
1229
+ which uses a backing file for initial data
1230
+ and writes new data to a front scratch file.
1231
+ '''
1232
+
1233
+ def __init__(self, back_file, dirpath=None):
1234
+ ''' Initialise the BackedFile using `back_file` for the backing data.
1235
+ '''
1236
+ self._offset = 0
1237
+ self._dirpath = dirpath
1238
+ self._lock = RLock()
1239
+ self.back_file = back_file
1240
+ self.front_file = TemporaryFile(dir=dirpath, buffering=0)
1241
+ self.front_range = Range()
1242
+ self.read_only = False
1243
+
1244
+ def __len__(self):
1245
+ back_file = self.back_file
1246
+ try:
1247
+ back_len = len(back_file)
1248
+ except TypeError:
1249
+ back_pos = back_file.tell()
1250
+ back_len = back_file.seek(0, 2)
1251
+ back_file.seek(back_pos, 0)
1252
+ return max(self.front_range.end, back_len)
1253
+
1254
+ @locked
1255
+ def switch_back_file(self, new_back_file):
1256
+ ''' Switch out one back file for another. Return the old back file.
1257
+ '''
1258
+ old_back_file = self.back_file
1259
+ self.back_file = new_back_file
1260
+ return old_back_file
1261
+
1262
+ def __enter__(self):
1263
+ ''' BackedFile instances offer a context manager that take the lock,
1264
+ allowing synchronous use of the file
1265
+ without implementing a suite of special methods like pread/pwrite.
1266
+ '''
1267
+ self._lock.acquire()
1268
+
1269
+ def __exit__(self, *e):
1270
+ self._lock.release()
1271
+
1272
+ def close(self):
1273
+ ''' Close the BackedFile.
1274
+ Flush contents. Close the front_file if necessary.
1275
+ '''
1276
+ self.front_file.close()
1277
+ self.front_file = None
1278
+
1279
+ def tell(self):
1280
+ ''' Report the current file pointer offset.
1281
+ '''
1282
+ return self._offset
1283
+
1284
+ @locked
1285
+ def seek(self, pos, whence=SEEK_SET):
1286
+ ''' Adjust the current file pointer offset.
1287
+ '''
1288
+ if whence == SEEK_SET:
1289
+ self._offset = pos
1290
+ elif whence == SEEK_CUR:
1291
+ self._offset += pos
1292
+ elif whence == SEEK_END:
1293
+ endpos = self.back_file.seek(0, SEEK_END)
1294
+ if self.front_range is not None:
1295
+ endpos = max(endpos, self.front_range.end)
1296
+ self._offset = endpos
1297
+ else:
1298
+ raise ValueError("unsupported whence value %r" % (whence,))
1299
+
1300
+ def datafrom(self, offset):
1301
+ ''' Generator yielding natural chunks from the file commencing at offset.
1302
+ '''
1303
+ global_datafrom = globals()['datafrom']
1304
+ front_file = self.front_file
1305
+ try:
1306
+ front_datafrom = front_file.datafrom
1307
+ except AttributeError:
1308
+ front_datafrom = partial(global_datafrom, front_file)
1309
+ back_file = self.back_file
1310
+ try:
1311
+ back_datafrom = back_file.datafrom
1312
+ except AttributeError:
1313
+ back_datafrom = partial(global_datafrom, back_file)
1314
+ for in_front, span in self.front_range.slices(offset, len(self)):
1315
+ consume = len(span)
1316
+ assert consume > 0
1317
+ if in_front:
1318
+ chunks = front_datafrom(span.start)
1319
+ else:
1320
+ chunks = back_datafrom(span.start)
1321
+ for bs in chunks:
1322
+ assert len(bs) > 0
1323
+ if len(bs) > consume:
1324
+ bs = memoryview(bs)[:consume]
1325
+ yield bs
1326
+ bs_len = len(bs)
1327
+ consume -= bs_len
1328
+ if consume <= 0:
1329
+ break
1330
+ offset += bs_len
1331
+
1332
+ @locked
1333
+ def write(self, b):
1334
+ ''' Write data to the front_file.
1335
+ '''
1336
+ if self.read_only:
1337
+ raise RuntimeError("write to read-only BackedFile")
1338
+ front_file = self.front_file
1339
+ start = self._offset
1340
+ front_file.seek(start)
1341
+ written = front_file.write(b)
1342
+ if written is None:
1343
+ warning(
1344
+ "front_file.write() returned None, assuming %d bytes written, data=%r",
1345
+ len(b), b
1346
+ )
1347
+ written = len(b)
1348
+ self.front_range.add_span(start, start + written)
1349
+ return written
1350
+
1351
+ # pylint: disable=too-few-public-methods,protected-access
1352
+ class BackedFile_TestMethods(object):
1353
+ ''' Mixin for testing subclasses of BackedFile.
1354
+ Tests self.backed_fp.
1355
+ '''
1356
+
1357
+ # pylint: disable=no-member
1358
+ def _eq(self, a, b, opdesc):
1359
+ ''' Convenience wrapper for assertEqual.
1360
+ '''
1361
+ ##if a == b:
1362
+ ## print("OK: %s: %r == %r" % (opdesc, a, b), file=sys.stderr)
1363
+ self.assertEqual(a, b, "%s: got %r, expected %r" % (opdesc, a, b))
1364
+
1365
+ # pylint: disable=no-member
1366
+ def test_BackedFile(self):
1367
+ ''' Test function for a BackedFile to use in unit test suites.
1368
+ '''
1369
+ from random import randint # pylint: disable=import-outside-toplevel
1370
+ backing_text = self.backing_text
1371
+ bfp = self.backed_fp
1372
+ # test reading whole file
1373
+ bfp.seek(0)
1374
+ bfp_text = bfp.read_n(len(bfp))
1375
+ self._eq(backing_text, bfp_text, "backing_text vs bfp_text")
1376
+ # test reading first 512 bytes only
1377
+ bfp.seek(0)
1378
+ bfp_leading_text = bfp.read_n(512)
1379
+ self._eq(
1380
+ backing_text[:512], bfp_leading_text,
1381
+ "leading 512 bytes of backing_text vs bfp_leading_text"
1382
+ )
1383
+ # test writing some data and reading it back
1384
+ random_chunk = bytes(randint(0, 255) for x in range(256))
1385
+ bfp.seek(512)
1386
+ bfp.write(random_chunk)
1387
+ # check that the front file has a single span of the right dimensions
1388
+ ffp = bfp.front_file
1389
+ fr = bfp.front_range
1390
+ self.assertIsNotNone(ffp)
1391
+ self.assertIsNotNone(fr)
1392
+ self.assertEqual(len(fr._spans), 1, "fr._spans = %r" % (fr._spans,))
1393
+ self.assertEqual(fr._spans[0].start, 512)
1394
+ self.assertEqual(fr._spans[0].end, 768)
1395
+ # read the random data back from the front file
1396
+ ffp.seek(512)
1397
+ front_chunk = ffp.read(256)
1398
+ self.assertEqual(random_chunk, front_chunk)
1399
+ # read the random data back from the BackedFile
1400
+ bfp.seek(512)
1401
+ bfp_chunk = bfp.read_n(256)
1402
+ self.assertEqual(bfp_chunk, random_chunk)
1403
+ # read a chunk that overlaps the old data and the new data
1404
+ bfp.seek(256)
1405
+ overlap_chunk = bfp.read_n(512)
1406
+ self.assertEqual(
1407
+ len(overlap_chunk), 512, "overlap_chunk not 512 bytes: %d:%s" %
1408
+ (len(overlap_chunk), bytes(overlap_chunk))
1409
+ )
1410
+ self.assertEqual(overlap_chunk, backing_text[256:512] + random_chunk)
1411
+
1412
+ class Tee(object):
1413
+ ''' An object with .write, .flush and .close methods
1414
+ which copies data to multiple output files.
1415
+ '''
1416
+
1417
+ def __init__(self, *fps):
1418
+ ''' Initialise the Tee; any arguments are taken to be output file objects.
1419
+ '''
1420
+ self._fps = list(fps)
1421
+
1422
+ def add(self, output):
1423
+ ''' Add a new output.
1424
+ '''
1425
+ self._fps.append(output)
1426
+
1427
+ def write(self, data):
1428
+ ''' Write the data to all the outputs.
1429
+ Note: does not detect or accodmodate short writes.
1430
+ '''
1431
+ for fp in self._fps:
1432
+ fp.write(data)
1433
+
1434
+ def flush(self):
1435
+ ''' Flush all the outputs.
1436
+ '''
1437
+ for fp in self._fps:
1438
+ fp.flush()
1439
+
1440
+ def close(self):
1441
+ ''' Close all the outputs and close the Tee.
1442
+ '''
1443
+ for fp in self._fps:
1444
+ fp.close()
1445
+ self._fps = None
1446
+
1447
+ @contextmanager
1448
+ def tee(fp, fp2):
1449
+ ''' Context manager duplicating `.write` and `.flush` from `fp` to `fp2`.
1450
+ '''
1451
+ old_write = fp.write
1452
+ old_flush = fp.flush
1453
+
1454
+ def _write(*a, **kw):
1455
+ fp2.write(*a, **kw)
1456
+ return old_write(*a, **kw)
1457
+
1458
+ def _flush(*a, **kw):
1459
+ fp2.flush(*a, **kw)
1460
+ return old_flush(*a, **kw)
1461
+
1462
+ with stackattrs(fp, write=_write, flush=_flush):
1463
+ yield
1464
+
1465
+ class NullFile(object):
1466
+ ''' Writable file that discards its input.
1467
+
1468
+ Note that this is _not_ an open of `os.devnull`;
1469
+ it just discards writes and is not the underlying file descriptor.
1470
+ '''
1471
+
1472
+ def __init__(self):
1473
+ ''' Initialise the file offset to 0.
1474
+ '''
1475
+ self.offset = 0
1476
+
1477
+ def write(self, data):
1478
+ ''' Discard data, advance file offset by length of data.
1479
+ '''
1480
+ dlen = len(data)
1481
+ self.offset += dlen
1482
+ return dlen
1483
+
1484
+ def flush(self):
1485
+ ''' Flush buffered data to the subsystem.
1486
+ '''
1487
+
1488
+ def file_data(fp, nbytes=None, rsize=None):
1489
+ ''' Read `nbytes` of data from `fp` and yield the chunks as read.
1490
+
1491
+ Parameters:
1492
+ * `nbytes`: number of bytes to read; if None read until EOF.
1493
+ * `rsize`: read size, default DEFAULT_READSIZE.
1494
+ '''
1495
+ # try to use the "short read" flavour of read if available
1496
+ if rsize is None:
1497
+ rsize = DEFAULT_READSIZE
1498
+ try:
1499
+ read1 = fp.read1
1500
+ except AttributeError:
1501
+ read1 = fp.read
1502
+ ##prefix = "file_data(fp, nbytes=%d)" % (nbytes,)
1503
+ copied = 0
1504
+ while nbytes is None or nbytes > 0:
1505
+ to_read = rsize if nbytes is None else min(nbytes, rsize)
1506
+ data = read1(to_read)
1507
+ if not data:
1508
+ if nbytes is not None:
1509
+ if copied > 0:
1510
+ # no warning of nothing copied - that is immediate end of file - valid
1511
+ warning(
1512
+ "early EOF: only %d bytes read, %d still to go", copied, nbytes
1513
+ )
1514
+ break
1515
+ yield data
1516
+ copied += len(data)
1517
+ if nbytes is not None:
1518
+ nbytes -= len(data)
1519
+
1520
+ def copy_data(fpin, fpout, nbytes, rsize=None):
1521
+ ''' Copy `nbytes` of data from `fpin` to `fpout`,
1522
+ return the number of bytes copied.
1523
+
1524
+ Parameters:
1525
+ * `nbytes`: number of bytes to copy.
1526
+ If `None`, copy until EOF.
1527
+ * `rsize`: read size, default `DEFAULT_READSIZE`.
1528
+ '''
1529
+ copied = 0
1530
+ for chunk in file_data(fpin, nbytes, rsize):
1531
+ fpout.write(chunk)
1532
+ copied += len(chunk)
1533
+ return copied
1534
+
1535
+ def read_data(fp, nbytes, rsize=None):
1536
+ ''' Read `nbytes` of data from `fp`, return the data.
1537
+
1538
+ Parameters:
1539
+ * `nbytes`: number of bytes to copy.
1540
+ If `None`, copy until EOF.
1541
+ * `rsize`: read size, default `DEFAULT_READSIZE`.
1542
+ '''
1543
+ bss = list(file_data(fp, nbytes, rsize))
1544
+ if not bss:
1545
+ return b''
1546
+ if len(bss) == 1:
1547
+ return bss[0]
1548
+ return b''.join(bss)
1549
+
1550
+ def read_from(fp, rsize=None, tail_mode=False, tail_delay=None):
1551
+ ''' Generator to present text or data from an open file until EOF.
1552
+
1553
+ Parameters:
1554
+ * `rsize`: read size, default: DEFAULT_READSIZE
1555
+ * `tail_mode`: if true, yield an empty chunk at EOF, allowing resumption
1556
+ if the file grows.
1557
+ '''
1558
+ if rsize is None:
1559
+ rsize = DEFAULT_READSIZE
1560
+ if tail_delay is None:
1561
+ tail_delay = DEFAULT_TAIL_PAUSE
1562
+ elif not tail_mode:
1563
+ raise ValueError(
1564
+ "tail_mode=%r but tail_delay=%r" % (tail_mode, tail_delay)
1565
+ )
1566
+ while True:
1567
+ chunk = fp.read(rsize)
1568
+ if not chunk:
1569
+ if tail_mode:
1570
+ # indicate EOF and pause
1571
+ yield chunk
1572
+ time.sleep(tail_delay)
1573
+ else:
1574
+ break
1575
+ else:
1576
+ yield chunk
1577
+
1578
+ def lines_of(fp, partials=None):
1579
+ ''' Generator yielding lines from a file until EOF.
1580
+ Intended for file-like objects that lack a line iteration API.
1581
+ '''
1582
+ if partials is None:
1583
+ partials = []
1584
+ return as_lines(read_from(fp), partials)
1585
+
1586
+ # pylint: disable=redefined-builtin
1587
+ @contextmanager
1588
+ def atomic_filename(
1589
+ filename,
1590
+ exists_ok=False,
1591
+ placeholder=False,
1592
+ dir=None,
1593
+ prefix=None,
1594
+ suffix=None,
1595
+ rename_func=None,
1596
+ **tempfile_kw
1597
+ ):
1598
+ ''' A context manager to create `filename` atomicly on completion.
1599
+ This yields a `NamedTemporaryFile` to use to create the file contents.
1600
+ On completion the temporary file is renamed to the target name `filename`.
1601
+
1602
+ If the caller decides to _not_ create the target they may remove the
1603
+ temporary file. This is not considered an error.
1604
+
1605
+ Parameters:
1606
+ * `filename`: the file name to create
1607
+ * `exists_ok`: default `False`;
1608
+ if true it not an error if `filename` already exists
1609
+ * `placeholder`: create a placeholder file at `filename`
1610
+ while the real contents are written to the temporary file
1611
+ * `dir`: passed to `NamedTemporaryFile`, specifies the directory
1612
+ to hold the temporary file; the default is `dirname(filename)`
1613
+ to ensure the rename is atomic
1614
+ * `prefix`: passed to `NamedTemporaryFile`, specifies a prefix
1615
+ for the temporary file; the default is a dot (`'.'`) plus the prefix
1616
+ from `splitext(basename(filename))`
1617
+ * `suffix`: passed to `NamedTemporaryFile`, specifies a suffix
1618
+ for the temporary file; the default is the extension obtained
1619
+ from `splitext(basename(filename))`
1620
+ * `rename_func`: a callable accepting `(tempname,filename)`
1621
+ used to rename the temporary file to the final name; the
1622
+ default is `os.rename` if `exists_ok` or `placeholder`,
1623
+ otherwise `rename_excl`.
1624
+ This parametr exists to accept something such as `FSTags.move`.
1625
+ Other keyword arguments are passed to the `NamedTemporaryFile` constructor.
1626
+
1627
+ Example:
1628
+
1629
+ >>> import os
1630
+ >>> from os.path import exists as existspath
1631
+ >>> fn = 'test_atomic_filename'
1632
+ >>> with atomic_filename(fn, mode='w') as f:
1633
+ ... assert not existspath(fn)
1634
+ ... print('foo', file=f)
1635
+ ... assert not existspath(fn)
1636
+ ...
1637
+ >>> assert existspath(fn)
1638
+ >>> assert open(fn).read() == 'foo\\n'
1639
+ >>> os.remove(fn)
1640
+ '''
1641
+ if dir is None:
1642
+ dir = dirname(filename)
1643
+ fprefix, fsuffix = splitext(basename(filename))
1644
+ if prefix is None:
1645
+ prefix = '.' + fprefix + '-'
1646
+ if suffix is None:
1647
+ suffix = fsuffix
1648
+ if rename_func is None:
1649
+ if exists_ok or placeholder:
1650
+ rename_func = os.rename
1651
+ else:
1652
+ rename_func = rename_excl
1653
+ if not exists_ok and existspath(filename):
1654
+ raise FileExistsError(errno.EEXIST, os.strerror(errno.EEXIST), filename)
1655
+ with NamedTemporaryFile(
1656
+ dir=dir,
1657
+ prefix=prefix,
1658
+ suffix=suffix,
1659
+ **tempfile_kw,
1660
+ ) as T:
1661
+ if placeholder:
1662
+ # create a placeholder file
1663
+ with open(filename, 'ab' if exists_ok else 'xb'):
1664
+ pass
1665
+ yield T
1666
+ # if the caller removed the temp file
1667
+ # do not create/replace the target
1668
+ if existspath(T.name):
1669
+ mtime = pfx_call(os.stat, T.name).st_mtime
1670
+ try:
1671
+ pfx_call(shutil.copystat, filename, T.name)
1672
+ except FileNotFoundError:
1673
+ pass
1674
+ except OSError as e:
1675
+ warning(
1676
+ "defaut modes not copied from from placeholder %r: %s", filename, e
1677
+ )
1678
+ else:
1679
+ # we make the attribute like the original, now bump the mtime
1680
+ try:
1681
+ atime = pfx_call(os.stat, filename).st_atime
1682
+ except FileNotFoundError:
1683
+ atime = mtime
1684
+ pfx_call(os.utime, T.name, (atime, mtime))
1685
+ # just in case something made the file
1686
+ if not placeholder and not exists_ok and existspath(filename):
1687
+ raise FileExistsError(
1688
+ errno.EEXIST, os.strerror(errno.EEXIST), filename
1689
+ )
1690
+ pfx_call(rename_func, T.name, filename)
1691
+ # recreate the temp file so that it can be cleaned up by NamedTemporaryFile
1692
+ with pfx_call(open, T.name, 'xb'):
1693
+ pass
1694
+
1695
+ def atomic_copy2(srcpath, dstpath, *, follow_symlinks=True, **af_kw):
1696
+ ''' Call `shutil.copy2` to copy `srcpath` to `dstpath` via a
1697
+ temporary file using `atomic_filename`.
1698
+ This differs from `shutil.copy2` in 2 ways:
1699
+ - it is an error if `dstpath` already exists unless you supply
1700
+ `exists_ok=True`
1701
+ - the new copy appears atomicly when the copy is complete
1702
+ instead of be visible partially complete during the copy
1703
+ The `follow_symlinks=True` parameter is passed to `shutil.copy2`.
1704
+ Other keyword parameters are passed to `atomic_filename`.
1705
+ '''
1706
+ with atomic_filename(dstpath, **af_kw) as af:
1707
+ return shutil.copy2(srcpath, af.name, follow_symlinks=follow_symlinks)
1708
+
1709
+ class RWFileBlockCache(object):
1710
+ ''' A scratch file for storing data.
1711
+ '''
1712
+
1713
+ def __init__(self, pathname=None, dirpath=None, suffix=None, lock=None):
1714
+ ''' Initialise the file.
1715
+
1716
+ Parameters:
1717
+ * `pathname`: path of file. If None, create a new file with
1718
+ tempfile.mkstemp using dir=`dirpath` and unlink that file once
1719
+ opened.
1720
+ * `dirpath`: location for the file if made by mkstemp as above.
1721
+ * `lock`: an object to use as a mutex, allowing sharing with
1722
+ some outer system. A Lock will be allocated if omitted.
1723
+ '''
1724
+ opathname = pathname
1725
+ if pathname is None:
1726
+ tmpfd, pathname = mkstemp(dir=dirpath, suffix=suffix)
1727
+ self.fd = os.open(pathname, os.O_RDWR | os.O_APPEND)
1728
+ if opathname is None:
1729
+ os.remove(pathname)
1730
+ os.close(tmpfd)
1731
+ self.pathname = None
1732
+ else:
1733
+ self.pathname = pathname
1734
+ if lock is None:
1735
+ lock = Lock()
1736
+ self._lock = lock
1737
+
1738
+ def __str__(self):
1739
+ return "%s(pathname=%s)" % (type(self).__name__, self.pathname)
1740
+
1741
+ def close(self):
1742
+ ''' Close the file descriptors.
1743
+ '''
1744
+ with Pfx("%s.close", self):
1745
+ fd = self.fd
1746
+ if fd is None:
1747
+ warning("fd already closed")
1748
+ else:
1749
+ os.close(fd)
1750
+ self.fd = None
1751
+
1752
+ @property
1753
+ def closed(self):
1754
+ ''' Test whether the file descriptor has been closed.
1755
+ '''
1756
+ return self.fd is None
1757
+
1758
+ def put(self, data):
1759
+ ''' Store `data`, return offset.
1760
+ '''
1761
+ fd = self.fd
1762
+ with self._lock:
1763
+ offset = os.lseek(fd, 0, 1)
1764
+ if len(data) == 0:
1765
+ length = 0
1766
+ else:
1767
+ length = os.write(fd, data)
1768
+ assert length == len(data)
1769
+ return offset
1770
+
1771
+ def get(self, offset, length):
1772
+ ''' Get data from `offset` of length `length`.
1773
+ '''
1774
+ assert length > 0
1775
+ fd = self.fd
1776
+ data = os.pread(fd, length, offset)
1777
+ assert len(data) == length
1778
+ return data
1779
+
1780
+ @contextmanager
1781
+ def gzifopen(path, mode='r', *a, **kw):
1782
+ ''' Context manager to open a file which may be a plain file or a gzipped file.
1783
+
1784
+ If `path` ends with `'.gz'` then the filesystem paths attempted
1785
+ are `path` and `path` without the extension, otherwise the
1786
+ filesystem paths attempted are `path+'.gz'` and `path`. In
1787
+ this way a path ending in `'.gz'` indicates a preference for
1788
+ a gzipped file otherwise an uncompressed file.
1789
+
1790
+ However, if exactly one of the paths exists already then only
1791
+ that path will be used.
1792
+
1793
+ Note that the single character modes `'r'`, `'a'`, `'w'` and `'x'`
1794
+ are text mode for both uncompressed and gzipped opens,
1795
+ like the builtin `open` and *unlike* `gzip.open`.
1796
+ This is to ensure equivalent behaviour.
1797
+ '''
1798
+ compresslevel = kw.pop('compresslevel', 9)
1799
+ path0 = path
1800
+ path, ext = splitext(path)
1801
+ if ext == '.gz':
1802
+ # gzip preferred
1803
+ gzpath = path0
1804
+ path1, path2 = gzpath, path
1805
+ else:
1806
+ # unzipped has precedence
1807
+ gzpath = path0 + '.gz'
1808
+ path1, path2 = path0, gzpath
1809
+ # if exactly one of the files exists, try only that file
1810
+ if existspath(path1) and not existspath(path2):
1811
+ paths = (path1,)
1812
+ elif existspath(path2) and not existspath(path1):
1813
+ paths = (path2,)
1814
+ else:
1815
+ paths = (path1, path2)
1816
+ for openpath in paths:
1817
+ try:
1818
+ with (gzip.open(openpath,
1819
+ (mode + 't' if mode in ('r', 'a', 'w', 'x') else mode), *
1820
+ a, compresslevel=compresslevel, **kw) if
1821
+ openpath.endswith('.gz') else open(openpath, mode, *a, **kw)) as f:
1822
+ yield f
1823
+ except FileNotFoundError:
1824
+ # last path to try
1825
+ if openpath == paths[-1]:
1826
+ raise
1827
+ # not present, try the other file
1828
+ continue
1829
+ # open succeeded, we're done
1830
+ return
1831
+ raise RuntimeError("NOTREACHED")
1832
+
1833
+ if __name__ == '__main__':
1834
+ import cs.fileutils_tests
1835
+ cs.fileutils_tests.selftest(sys.argv)