cs-tarutils 20260531__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cs/tarutils.py ADDED
@@ -0,0 +1,335 @@
1
+ #!/usr/bin/env python3
2
+ #
3
+ # Tar utilities. - Cameron Simpson <cs@cskk.id.au>
4
+ #
5
+
6
+ ''' Assorted tar related things, including a fast tar-based copy.
7
+
8
+ My most heavily used use for this is my `cpdir` script which
9
+ does a high performance directory copy by piping 2 `tar`s
10
+ together.
11
+ It runs this:
12
+
13
+ from cs.tarutils import traced_cpdir
14
+ sys.exit(traced_cpdir(*sys.argv[1:]))
15
+
16
+ '''
17
+
18
+ import os
19
+ from os import mkdir, lstat
20
+ from os.path import (
21
+ exists as existspath,
22
+ join as joinpath,
23
+ isabs as isabspath,
24
+ isdir as isdirpath,
25
+ )
26
+ from stat import S_ISREG
27
+ from subprocess import Popen, DEVNULL, PIPE
28
+ from threading import Thread
29
+ from time import sleep
30
+ from typing import List
31
+
32
+ from cs.deco import fmtdoc
33
+ from cs.fs import shortpath
34
+ from cs.gimmicks import warning
35
+ from cs.pfx import pfx_call
36
+ from cs.progress import progressbar
37
+ from cs.queues import IterableQueue, QueueIterator
38
+ from cs.units import BINARY_BYTES_SCALE
39
+ from cs.upd import Upd, uses_upd # pylint: disable=redefined-builtin
40
+
41
+ __version__ = '20260531'
42
+
43
+ DISTINFO = {
44
+ 'keywords': ["python3"],
45
+ 'classifiers': [
46
+ "Programming Language :: Python",
47
+ "Programming Language :: Python :: 3",
48
+ ],
49
+ 'install_requires': [
50
+ 'cs.deco',
51
+ 'cs.fs',
52
+ 'cs.gimmicks',
53
+ 'cs.pfx',
54
+ 'cs.progress',
55
+ 'cs.queues',
56
+ 'cs.units',
57
+ 'cs.upd',
58
+ ],
59
+ }
60
+
61
+ TAR_EXE = 'tar'
62
+ DEFAULT_BCOUNT = 2048
63
+
64
+ @uses_upd
65
+ def _warning(msg, *a, upd: Upd):
66
+ with upd.above():
67
+ warning(msg, *a)
68
+
69
+ def _stat_diff(fspath: str, old_size: int):
70
+ ''' `lstat(fspath)` and return the difference between its size and `old_size`.
71
+ `lstat` failure warns and reports a difference of `0`.
72
+ '''
73
+ try:
74
+ S = lstat(fspath)
75
+ except FileNotFoundError:
76
+ diff = 0
77
+ except OSError as e:
78
+ _warning("lstat(%r): %s", fspath, e)
79
+ diff = 0
80
+ else:
81
+ if S_ISREG(S.st_mode):
82
+ diff = S.st_size - old_size
83
+ if diff < 0:
84
+ _warning("%r: file shrank! %d => %d", fspath, old_size, S.st_size)
85
+ else:
86
+ # not a regular file - ignore the size
87
+ diff = 0
88
+ return diff
89
+
90
+ # pylint: disable=too-many-branches
91
+ def _watch_filenames(
92
+ filenames_qit: QueueIterator, chdirpath: str, *, poll_interval=0.3
93
+ ):
94
+ ''' Consumer of `filenames_qit`, a `QueueIterator` as obtained from `IterableQueue`,
95
+ yielding `(filename,diff)` being a 2-tuple of
96
+ filename and incremental bytes consumed at this point.
97
+
98
+ This code assumes that we see a filename once but that
99
+ it may be before the file is written or after the file is
100
+ written, and as such stats the filename twice: on sight and
101
+ on sight of the next filename.
102
+
103
+ The yielded results therefore mention each filename twice.
104
+ '''
105
+ current_filename = None
106
+ current_size = None
107
+ while True:
108
+ if filenames_qit.empty():
109
+ # poll the current file and wait for another name
110
+ if current_filename is not None:
111
+ diff = _stat_diff(current_filename, current_size)
112
+ yield current_filename, diff
113
+ current_size += diff
114
+ if filenames_qit.closed:
115
+ break
116
+ sleep(poll_interval)
117
+ continue
118
+ try:
119
+ new_filename = next(filenames_qit)
120
+ except StopIteration:
121
+ break
122
+ if not isabspath(new_filename) and chdirpath != '.':
123
+ new_filename = joinpath(chdirpath, new_filename)
124
+ if current_filename != new_filename:
125
+ # new file: poll the old file and reset the size to 0 for the new file
126
+ if current_filename is not None:
127
+ diff = _stat_diff(current_filename, current_size)
128
+ yield current_filename, diff
129
+ current_filename = new_filename
130
+ current_size = 0
131
+ diff = _stat_diff(current_filename, current_size)
132
+ current_size += diff
133
+ yield current_filename, diff
134
+ # final poll
135
+ if current_filename is not None:
136
+ diff = _stat_diff(current_filename, current_size)
137
+ yield current_filename, diff
138
+
139
+ def _read_tar_stdout_filenames(f, filenames_q):
140
+ for line in f:
141
+ filename = line.rstrip('\n')
142
+ filenames_q.put(filename)
143
+ filenames_q.close()
144
+
145
+ def _read_tar_stderr(f, filenames_q):
146
+ for errline in f:
147
+ errline = errline.rstrip('\n')
148
+ if errline.startswith("x "):
149
+ filenames_q.put(errline[2:])
150
+ else:
151
+ _warning("%s: err: " + errline)
152
+ filenames_q.close()
153
+
154
+ # pylint: disable=too-many-locals
155
+ @uses_upd
156
+ @fmtdoc
157
+ def traced_untar(
158
+ tarfd,
159
+ *,
160
+ chdirpath='.',
161
+ label=None,
162
+ tar_exe=TAR_EXE,
163
+ bcount=DEFAULT_BCOUNT,
164
+ total=None,
165
+ _stat_fd=False,
166
+ upd
167
+ ):
168
+ ''' Read tar data from `tarfd` and extract.
169
+ Return the `tar` exit code.
170
+
171
+ Parameters:
172
+ * `tarfd`: the source tar data,
173
+ suitable for `subprocess.Popen`'s `stdin` parameter
174
+ * `chdirpath`: optional directory to which to `chdir` before accessing `srcpaths`
175
+ * `label`: optional label for the progress bar
176
+ * `tar_exe`: optional `tar` executable, default from `TAR_EXE`: `{TAR_EXE}`
177
+ * `bcount`: blocking factor in 512 byte unites,
178
+ default from `DEFAULT_BCOUNT`: `{DEFAULT_BCOUNT}`
179
+ '''
180
+ if isinstance(tarfd, str):
181
+ with pfx_call(open, tarfd, 'rb') as tarf:
182
+ return traced_untar(
183
+ tarf,
184
+ chdirpath=chdirpath,
185
+ label=f'untar {shortpath(tarfd)} -> {chdirpath}',
186
+ tar_exe=tar_exe,
187
+ bcount=bcount,
188
+ _stat_fd=tarfd.endswith('.tar'),
189
+ upd=upd,
190
+ )
191
+ if label is None:
192
+ label = f'untar {tarfd} -> {chdirpath}'
193
+ if total is None and _stat_fd:
194
+ # stat the file to get its size
195
+ if isinstance(tarfd, int):
196
+ fd = tarfd
197
+ else:
198
+ try:
199
+ fd = tarfd.fileno()
200
+ except AttributeError:
201
+ # no .fileno()
202
+ fd = -1
203
+ if fd >= 0:
204
+ try:
205
+ S = os.fstat(fd)
206
+ except OSError as e:
207
+ _warning("os.fstat(%r): %s", tarfd, e)
208
+ else:
209
+ if S_ISREG(S.st_mode):
210
+ total = S.st_size
211
+ # pylint: disable=consider-using-with
212
+ P = Popen(
213
+ [
214
+ tar_exe,
215
+ '-x',
216
+ '-v',
217
+ '-C',
218
+ chdirpath,
219
+ '-b',
220
+ str(bcount),
221
+ '-f',
222
+ '-',
223
+ ],
224
+ stdin=tarfd,
225
+ stdout=PIPE,
226
+ stderr=PIPE,
227
+ text=True,
228
+ )
229
+ ##with open(P.stdout, 'r', buffering=1) as fout:
230
+ ## with open(P.stderr, 'r', buffering=1) as ferr:
231
+ with upd.insert(0) as filename_proxy:
232
+ filenames_q = IterableQueue()
233
+ # consumer of tar stdout
234
+ # expects bare filenames
235
+ Thread(
236
+ target=_read_tar_stdout_filenames, args=(P.stdout, filenames_q)
237
+ ).start()
238
+ # consumer of tar stderr, recognising "x filename" lines
239
+ # copies "x filename" filename to filenames_q,
240
+ # issues warnings for other messages
241
+ Thread(target=_read_tar_stderr, args=(P.stderr, filenames_q)).start()
242
+ # consume filenames->(filename,diff) generator
243
+ for filename, _ in progressbar(
244
+ _watch_filenames(filenames_q, chdirpath),
245
+ label=label,
246
+ itemlenfunc=lambda f_d: f_d[1],
247
+ total=total,
248
+ upd=upd,
249
+ report_print=True,
250
+ units_scale=BINARY_BYTES_SCALE,
251
+ ):
252
+ filename_proxy.text = filename
253
+ return P.wait()
254
+
255
+ @fmtdoc
256
+ def tar(
257
+ *srcpaths: List[str],
258
+ chdirpath='.',
259
+ output,
260
+ tar_exe=TAR_EXE,
261
+ bcount=DEFAULT_BCOUNT
262
+ ):
263
+ ''' Tar up the contents of `srcpaths` to `output`.
264
+ Return the `Popen` object for the `tar` command.
265
+
266
+ Parameters:
267
+ * `srcpaths`: source filesystem paths
268
+ * `chdirpath`: optional directory to which to `chdir` before accessing `srcpaths`
269
+ * `tar_exe`: optional `tar` executable, default from `TAR_EXE`: `{TAR_EXE}`
270
+ * `bcount`: blocking factor in 512 byte unites,
271
+ default from `DEFAULT_BCOUNT`: `{DEFAULT_BCOUNT}`
272
+ '''
273
+ if not srcpaths:
274
+ raise ValueError("empty srcpaths")
275
+ if isinstance(output, str):
276
+ if existspath(output):
277
+ raise ValueError(f'path already exists: {output!r}')
278
+ return Popen(
279
+ [
280
+ tar_exe,
281
+ '-c',
282
+ '-C',
283
+ chdirpath,
284
+ '-b',
285
+ str(bcount),
286
+ '-f',
287
+ (output if isinstance(output, str) else '-'),
288
+ '--',
289
+ *srcpaths,
290
+ ],
291
+ stdin=DEVNULL,
292
+ stdout=(None if isinstance(output, str) else output),
293
+ )
294
+
295
+ @uses_upd
296
+ @fmtdoc
297
+ def traced_cpdir(
298
+ srcdirpath,
299
+ dstdirpath,
300
+ *,
301
+ label=None,
302
+ tar_exe=TAR_EXE,
303
+ bcount=DEFAULT_BCOUNT,
304
+ upd
305
+ ):
306
+ ''' Copy a directory to a new place using piped tars with progress reporting.
307
+ Return `0` if both tars succeed, nonzero otherwise.
308
+
309
+ Parameters:
310
+ * `srcdirpath`: the source directory filesystem path
311
+ * `dstdirpath`: the destination directory filesystem path,
312
+ which must not already exist
313
+ * `label`: optional label for the progress bar
314
+ * `tar_exe`: optional `tar` executable, default from `TAR_EXE`: `{TAR_EXE}`
315
+ * `bcount`: blocking factor in 512 byte unites,
316
+ default from `DEFAULT_BCOUNT`: `{DEFAULT_BCOUNT}`
317
+ '''
318
+ if label is None:
319
+ label = f'cpdir {shortpath(srcdirpath)} {shortpath(dstdirpath)}'
320
+ if not isdirpath(srcdirpath):
321
+ raise ValueError(f'not a directory: {srcdirpath!r}')
322
+ pfx_call(mkdir, dstdirpath)
323
+ tarP = tar(
324
+ '.', chdirpath=srcdirpath, output=PIPE, tar_exe=tar_exe, bcount=bcount
325
+ )
326
+ untar_returncode = traced_untar(
327
+ tarP.stdout,
328
+ chdirpath=dstdirpath,
329
+ label=label,
330
+ tar_exe=tar_exe,
331
+ bcount=bcount,
332
+ upd=upd,
333
+ )
334
+ tar_returncode = tarP.wait()
335
+ return tar_returncode or untar_returncode
@@ -0,0 +1,87 @@
1
+ Metadata-Version: 2.4
2
+ Name: cs-tarutils
3
+ Version: 20260531
4
+ Summary: Assorted tar related things, including a fast tar-based copy.
5
+ Keywords: python3
6
+ Author-email: Cameron Simpson <cs@cskk.id.au>
7
+ Description-Content-Type: text/markdown
8
+ Classifier: Programming Language :: Python
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
14
+ Classifier: License :: OSI Approved :: GNU General Public License v3 or later (GPLv3+)
15
+ Requires-Dist: cs.deco>=20260525
16
+ Requires-Dist: cs.fs>=20260526
17
+ Requires-Dist: cs.gimmicks>=20260311
18
+ Requires-Dist: cs.pfx>=20250914
19
+ Requires-Dist: cs.progress>=20260531
20
+ Requires-Dist: cs.queues>=20260531
21
+ Requires-Dist: cs.units>=20260526
22
+ Requires-Dist: cs.upd>=20260526
23
+ Project-URL: MonoRepo Commits, https://bitbucket.org/cameron_simpson/css/commits/branch/main
24
+ Project-URL: Monorepo Git Mirror, https://github.com/cameron-simpson/css
25
+ Project-URL: Monorepo Hg/Mercurial Mirror, https://hg.sr.ht/~cameron-simpson/css
26
+ Project-URL: Source, https://github.com/cameron-simpson/css/blob/main/lib/python/cs/tarutils.py
27
+
28
+ Assorted tar related things, including a fast tar-based copy.
29
+
30
+ *Latest release 20260531*:
31
+ Update because an IterableQueue no longer has a .open() method.
32
+
33
+ My most heavily used use for this is my `cpdir` script which
34
+ does a high performance directory copy by piping 2 `tar`s
35
+ together.
36
+ It runs this:
37
+
38
+ from cs.tarutils import traced_cpdir
39
+ sys.exit(traced_cpdir(*sys.argv[1:]))
40
+
41
+ Short summary:
42
+ * `tar`: Tar up the contents of `srcpaths` to `output`. Return the `Popen` object for the `tar` command.
43
+ * `traced_cpdir`: Copy a directory to a new place using piped tars with progress reporting. Return `0` if both tars succeed, nonzero otherwise.
44
+ * `traced_untar`: Read tar data from `tarfd` and extract. Return the `tar` exit code.
45
+
46
+ Module contents:
47
+ - <a name="tar"></a>`tar(*srcpaths: List[str], chdirpath='.', output, tar_exe='tar', bcount=2048)`: Tar up the contents of `srcpaths` to `output`.
48
+ Return the `Popen` object for the `tar` command.
49
+
50
+ Parameters:
51
+ * `srcpaths`: source filesystem paths
52
+ * `chdirpath`: optional directory to which to `chdir` before accessing `srcpaths`
53
+ * `tar_exe`: optional `tar` executable, default from `TAR_EXE`: `tar`
54
+ * `bcount`: blocking factor in 512 byte unites,
55
+ default from `DEFAULT_BCOUNT`: `2048`
56
+ - <a name="traced_cpdir"></a>`traced_cpdir(*a, upd: Optional[cs.upd.Upd] = <function uses_upd.<locals>.<lambda> at 0x10b0b7920>, **kw)`: Copy a directory to a new place using piped tars with progress reporting.
57
+ Return `0` if both tars succeed, nonzero otherwise.
58
+
59
+ Parameters:
60
+ * `srcdirpath`: the source directory filesystem path
61
+ * `dstdirpath`: the destination directory filesystem path,
62
+ which must not already exist
63
+ * `label`: optional label for the progress bar
64
+ * `tar_exe`: optional `tar` executable, default from `TAR_EXE`: `tar`
65
+ * `bcount`: blocking factor in 512 byte unites,
66
+ default from `DEFAULT_BCOUNT`: `2048`
67
+ - <a name="traced_untar"></a>`traced_untar(*a, upd: Optional[cs.upd.Upd] = <function uses_upd.<locals>.<lambda> at 0x10b0b7560>, **kw)`: Read tar data from `tarfd` and extract.
68
+ Return the `tar` exit code.
69
+
70
+ Parameters:
71
+ * `tarfd`: the source tar data,
72
+ suitable for `subprocess.Popen`'s `stdin` parameter
73
+ * `chdirpath`: optional directory to which to `chdir` before accessing `srcpaths`
74
+ * `label`: optional label for the progress bar
75
+ * `tar_exe`: optional `tar` executable, default from `TAR_EXE`: `tar`
76
+ * `bcount`: blocking factor in 512 byte unites,
77
+ default from `DEFAULT_BCOUNT`: `2048`
78
+
79
+ # Release Log
80
+
81
+
82
+
83
+ *Release 20260531*:
84
+ Update because an IterableQueue no longer has a .open() method.
85
+
86
+ *Release 20240318*:
87
+ Initial PyPI release with nice traced_cpdir() function.
@@ -0,0 +1,4 @@
1
+ cs/tarutils.py,sha256=cNa5IPn4pGqavgAc61vXjno7K9z5cjJG_Z81whf4Kek,9462
2
+ cs_tarutils-20260531.dist-info/WHEEL,sha256=Dyt6SBfaasWElUrURkknVFAZDHSTwxg3PaTza7RSbkY,100
3
+ cs_tarutils-20260531.dist-info/METADATA,sha256=gGMHkIj3c2i7g2XvUppWHPS85bj5xPR5z36NI4_e6MY,3915
4
+ cs_tarutils-20260531.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: flit 3.12.0
3
+ Root-Is-Purelib: true
4
+ Tag: py2-none-any
5
+ Tag: py3-none-any