python-misc-utils 0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (117) hide show
  1. py_misc_utils/__init__.py +0 -0
  2. py_misc_utils/abs_timeout.py +12 -0
  3. py_misc_utils/alog.py +311 -0
  4. py_misc_utils/app_main.py +179 -0
  5. py_misc_utils/archive_streamer.py +112 -0
  6. py_misc_utils/assert_checks.py +118 -0
  7. py_misc_utils/ast_utils.py +121 -0
  8. py_misc_utils/async_manager.py +189 -0
  9. py_misc_utils/break_control.py +63 -0
  10. py_misc_utils/buffered_iterator.py +35 -0
  11. py_misc_utils/cached_file.py +507 -0
  12. py_misc_utils/call_limiter.py +26 -0
  13. py_misc_utils/call_result_selector.py +13 -0
  14. py_misc_utils/cleanups.py +85 -0
  15. py_misc_utils/cmd.py +97 -0
  16. py_misc_utils/compression.py +116 -0
  17. py_misc_utils/cond_waiter.py +13 -0
  18. py_misc_utils/context_base.py +18 -0
  19. py_misc_utils/context_managers.py +67 -0
  20. py_misc_utils/core_utils.py +577 -0
  21. py_misc_utils/daemon_process.py +252 -0
  22. py_misc_utils/data_cache.py +46 -0
  23. py_misc_utils/date_utils.py +90 -0
  24. py_misc_utils/debug.py +24 -0
  25. py_misc_utils/dyn_modules.py +50 -0
  26. py_misc_utils/dynamod.py +103 -0
  27. py_misc_utils/env_config.py +35 -0
  28. py_misc_utils/executor.py +239 -0
  29. py_misc_utils/file_overwrite.py +29 -0
  30. py_misc_utils/fin_wrap.py +77 -0
  31. py_misc_utils/fp_utils.py +47 -0
  32. py_misc_utils/fs/__init__.py +0 -0
  33. py_misc_utils/fs/file_fs.py +127 -0
  34. py_misc_utils/fs/ftp_fs.py +242 -0
  35. py_misc_utils/fs/gcs_fs.py +196 -0
  36. py_misc_utils/fs/http_fs.py +241 -0
  37. py_misc_utils/fs/s3_fs.py +417 -0
  38. py_misc_utils/fs_base.py +133 -0
  39. py_misc_utils/fs_utils.py +207 -0
  40. py_misc_utils/gcs_fs.py +169 -0
  41. py_misc_utils/gen_indices.py +54 -0
  42. py_misc_utils/gfs.py +371 -0
  43. py_misc_utils/git_repo.py +77 -0
  44. py_misc_utils/global_namespace.py +110 -0
  45. py_misc_utils/http_async_fetcher.py +139 -0
  46. py_misc_utils/http_server.py +196 -0
  47. py_misc_utils/http_utils.py +143 -0
  48. py_misc_utils/img_utils.py +20 -0
  49. py_misc_utils/infix_op.py +20 -0
  50. py_misc_utils/inspect_utils.py +205 -0
  51. py_misc_utils/iostream.py +21 -0
  52. py_misc_utils/iter_file.py +117 -0
  53. py_misc_utils/key_wrap.py +46 -0
  54. py_misc_utils/lazy_import.py +25 -0
  55. py_misc_utils/lockfile.py +164 -0
  56. py_misc_utils/mem_size.py +64 -0
  57. py_misc_utils/mirror_from.py +72 -0
  58. py_misc_utils/mmap.py +16 -0
  59. py_misc_utils/module_utils.py +196 -0
  60. py_misc_utils/moving_average.py +19 -0
  61. py_misc_utils/msgpack_streamer.py +26 -0
  62. py_misc_utils/multi_wait.py +24 -0
  63. py_misc_utils/multiprocessing.py +102 -0
  64. py_misc_utils/named_array.py +224 -0
  65. py_misc_utils/no_break.py +46 -0
  66. py_misc_utils/no_except.py +32 -0
  67. py_misc_utils/np_ml_framework.py +184 -0
  68. py_misc_utils/np_utils.py +346 -0
  69. py_misc_utils/ntuple_utils.py +38 -0
  70. py_misc_utils/num_utils.py +54 -0
  71. py_misc_utils/obj.py +73 -0
  72. py_misc_utils/object_cache.py +100 -0
  73. py_misc_utils/object_tracker.py +88 -0
  74. py_misc_utils/ordered_set.py +71 -0
  75. py_misc_utils/osfd.py +27 -0
  76. py_misc_utils/packet.py +22 -0
  77. py_misc_utils/parquet_streamer.py +69 -0
  78. py_misc_utils/pd_utils.py +254 -0
  79. py_misc_utils/periodic_task.py +61 -0
  80. py_misc_utils/pickle_wrap.py +121 -0
  81. py_misc_utils/pipeline.py +98 -0
  82. py_misc_utils/remap_pickle.py +50 -0
  83. py_misc_utils/resource_manager.py +155 -0
  84. py_misc_utils/rnd_utils.py +56 -0
  85. py_misc_utils/run_once.py +19 -0
  86. py_misc_utils/scheduler.py +135 -0
  87. py_misc_utils/select_params.py +300 -0
  88. py_misc_utils/signal.py +141 -0
  89. py_misc_utils/skl_utils.py +270 -0
  90. py_misc_utils/split.py +147 -0
  91. py_misc_utils/state.py +53 -0
  92. py_misc_utils/std_module.py +56 -0
  93. py_misc_utils/stream_dataframe.py +176 -0
  94. py_misc_utils/streamed_file.py +144 -0
  95. py_misc_utils/tempdir.py +79 -0
  96. py_misc_utils/template_replace.py +51 -0
  97. py_misc_utils/tensor_stream.py +269 -0
  98. py_misc_utils/thread_context.py +33 -0
  99. py_misc_utils/throttle.py +30 -0
  100. py_misc_utils/time_trigger.py +18 -0
  101. py_misc_utils/timegen.py +11 -0
  102. py_misc_utils/traceback.py +49 -0
  103. py_misc_utils/tracking_executor.py +91 -0
  104. py_misc_utils/transform_array.py +42 -0
  105. py_misc_utils/uncompress.py +35 -0
  106. py_misc_utils/url_fetcher.py +157 -0
  107. py_misc_utils/utils.py +538 -0
  108. py_misc_utils/varint.py +50 -0
  109. py_misc_utils/virt_array.py +52 -0
  110. py_misc_utils/weak_call.py +33 -0
  111. py_misc_utils/work_results.py +100 -0
  112. py_misc_utils/writeback_file.py +43 -0
  113. python_misc_utils-0.2.dist-info/METADATA +36 -0
  114. python_misc_utils-0.2.dist-info/RECORD +117 -0
  115. python_misc_utils-0.2.dist-info/WHEEL +5 -0
  116. python_misc_utils-0.2.dist-info/licenses/LICENSE +13 -0
  117. python_misc_utils-0.2.dist-info/top_level.txt +1 -0
py_misc_utils/split.py ADDED
@@ -0,0 +1,147 @@
1
+ import collections
2
+ import re
3
+
4
+ from . import assert_checks as tas
5
+
6
+
7
+ class _Skipper:
8
+
9
+ def __init__(self, quote_rx):
10
+ self.quote_rx = quote_rx
11
+ self.next_pos = 0
12
+
13
+ def skip(self, data, pos):
14
+ next_pos = self.next_pos - pos
15
+ if next_pos <= 0:
16
+ m = re.search(self.quote_rx, data)
17
+ next_pos = m.start() if m else len(data)
18
+ self.next_pos = pos + next_pos
19
+
20
+ return next_pos
21
+
22
+
23
+ def _chars_regex(chars):
24
+ rexs = bytearray(b'[\\')
25
+ for c in sorted(chars):
26
+ rexs.extend((ord('\\'), c))
27
+
28
+ rexs.append(ord(']'))
29
+
30
+ return re.compile(bytes(rexs))
31
+
32
+
33
+ def _specials_regex(qmap):
34
+ return _chars_regex(set(tuple(qmap.keys()) + tuple(qmap.values())))
35
+
36
+
37
+ def _split_forward(data, pos, split_rx, skipper, seq):
38
+ pdata = data[pos:]
39
+
40
+ xm = re.search(split_rx, pdata)
41
+ if xm:
42
+ seq_pos, next_pos = xm.start(), xm.end()
43
+ else:
44
+ seq_pos = next_pos = len(pdata)
45
+
46
+ skip_pos = skipper.skip(pdata, pos)
47
+ if skip_pos < seq_pos:
48
+ seq_pos = next_pos = skip_pos
49
+ xm = None
50
+
51
+ seq.extend(pdata[: seq_pos])
52
+
53
+ return pos + next_pos, xm is not None
54
+
55
+
56
+ SplitContext = collections.namedtuple('SplitContext', 'map, quote_rx, quote_sprx')
57
+
58
+ def make_context(quote_map):
59
+ bmap = {ord(k): ord(v) for k, v in quote_map.items()}
60
+
61
+ return SplitContext(map=bmap,
62
+ quote_rx=_chars_regex(bmap.keys()),
63
+ quote_sprx=_specials_regex(bmap))
64
+
65
+
66
+ def _to_bytes(data, split_rx):
67
+ if isinstance(data, str):
68
+ data = data.encode()
69
+ if isinstance(split_rx, str):
70
+ split_rx = split_rx.encode()
71
+
72
+ split_rx = re.compile(split_rx) if isinstance(split_rx, bytes) else split_rx
73
+
74
+ return memoryview(data), split_rx
75
+
76
+
77
+ _QUOTE_MAP = {
78
+ '"': '"',
79
+ "'": "'",
80
+ '`': '`',
81
+ '(': ')',
82
+ '{': '}',
83
+ '[': ']',
84
+ '<': '>',
85
+ }
86
+ _QUOTE_CTX = make_context(_QUOTE_MAP)
87
+
88
+ _Quote = collections.namedtuple('Quote', 'closec, pos, nest_ok')
89
+
90
+ def split(data, split_rx, quote_ctx=None):
91
+ qctx = quote_ctx or _QUOTE_CTX
92
+
93
+ bdata, bsplit_rx = _to_bytes(data, split_rx)
94
+ skipper = _Skipper(qctx.quote_rx)
95
+
96
+ sval = ord('\\')
97
+ pos, qstack, parts, seq = 0, [], [], bytearray()
98
+ while pos < len(bdata):
99
+ if seq and seq[-1] == sval:
100
+ seq.append(bdata[pos])
101
+ pos += 1
102
+ elif qstack:
103
+ m = re.search(qctx.quote_sprx, bdata[pos:])
104
+ if not m:
105
+ break
106
+
107
+ seq.extend(bdata[pos: pos + m.start()])
108
+ pos += m.start()
109
+ c = bdata[pos]
110
+ tq = qstack[-1]
111
+ if c == tq.closec:
112
+ qstack.pop()
113
+ elif tq.nest_ok and (cc := qctx.map.get(c)) is not None:
114
+ qstack.append(_Quote(cc, pos, c != cc))
115
+ seq.append(c)
116
+ pos += 1
117
+ else:
118
+ kpos, is_split = _split_forward(bdata, pos, bsplit_rx, skipper, seq)
119
+ if is_split:
120
+ parts.append(seq)
121
+ seq = bytearray()
122
+ elif kpos < len(bdata):
123
+ c = bdata[kpos]
124
+ if (cc := qctx.map.get(c)) is not None:
125
+ qstack.append(_Quote(cc, kpos, c != cc))
126
+ seq.append(c)
127
+ kpos += 1
128
+ pos = max(kpos, pos + 1)
129
+
130
+ tas.check_eq(len(qstack), 0, msg=f'Unmatched quotes during split: "{data}"\n {qstack}')
131
+ if seq or parts:
132
+ parts.append(seq)
133
+
134
+ return tuple(p.decode() for p in parts) if isinstance(data, str) else tuple(parts)
135
+
136
+
137
+ def unquote(data, quote_map=None):
138
+ if len(data) >= 2:
139
+ quote_map = quote_map or _QUOTE_MAP
140
+ cc = quote_map.get(data[0])
141
+ if cc == data[-1]:
142
+ udata = data[1: -1]
143
+
144
+ return re.sub(rf'\\{cc}', rf'{cc}', udata) if cc == data[0] else udata
145
+
146
+ return data
147
+
py_misc_utils/state.py ADDED
@@ -0,0 +1,53 @@
1
+ import pickle
2
+
3
+ from . import gfs
4
+
5
+
6
+ _STATE_KEY = '__SB_STATE__'
7
+
8
+
9
+ def _kname(cls, name):
10
+ return f'{cls.__name__}.{name}'
11
+
12
+
13
+ class StateBase:
14
+
15
+ def _get_state(self, state):
16
+ return state
17
+
18
+ def _set_state(self, state):
19
+ self.__dict__.update(state)
20
+
21
+ def _store_state(self, cls, **kwargs):
22
+ sdict = getattr(self, _STATE_KEY, None)
23
+ if sdict is None:
24
+ sdict = dict()
25
+ setattr(self, _STATE_KEY, sdict)
26
+
27
+ for k, v in kwargs.items():
28
+ sdict[_kname(cls, k)] = v
29
+
30
+ def _load_state(self, cls, state, name, defval=None):
31
+ sdict = state.get(_STATE_KEY)
32
+
33
+ return sdict.get(_kname(cls, name), defval) if sdict is not None else defval
34
+
35
+
36
+ def to_state(obj, path):
37
+ # Needs a copy here, as the _get_state() call chains will modify the state.
38
+ state = obj._get_state(obj.__dict__.copy())
39
+ with gfs.open(path, mode='wb') as sfd:
40
+ pickle.dump(state, sfd)
41
+
42
+
43
+ def from_state(cls, path, **kwargs):
44
+ with gfs.open(path, mode='rb') as sfd:
45
+ state = pickle.load(sfd)
46
+
47
+ state.update(kwargs)
48
+
49
+ obj = cls.__new__(cls)
50
+ obj._set_state(state)
51
+
52
+ return obj
53
+
@@ -0,0 +1,56 @@
1
+ import functools
2
+ import importlib
3
+ import os
4
+ import sys
5
+
6
+ from . import core_utils as cu
7
+
8
+
9
+ def _module_origin(modname):
10
+ module = sys.modules.get(modname)
11
+ if module is None:
12
+ try:
13
+ module = importlib.import_module(modname)
14
+ except ModuleNotFoundError:
15
+ pass
16
+
17
+ if module is not None:
18
+ path = getattr(module, '__file__', None)
19
+ if path is None:
20
+ spec = getattr(module, '__spec__', None)
21
+ path = spec.origin if spec is not None else None
22
+
23
+ return path
24
+
25
+
26
+ def _module_libpath(modname):
27
+ origin = _module_origin(modname)
28
+ if origin not in {None, 'built-in'}:
29
+ lib_path = os.path.dirname(origin)
30
+
31
+ return lib_path if lib_path else None
32
+
33
+
34
+ # Some of the standard modules. Should be enough to get coverage of the
35
+ # Python standard library path (there are more than one since some might
36
+ # turn "built-in" and not have a __file__ or __spec__).
37
+ _STDLIB_MODULES = (
38
+ 'abc',
39
+ 'copy',
40
+ 'io',
41
+ 'os',
42
+ 'pickle',
43
+ 'random',
44
+ 'string',
45
+ 'types',
46
+ )
47
+ _STDLIB_PATHS = set(filter(lambda x: x is not None,
48
+ (_module_libpath(m) for m in _STDLIB_MODULES)))
49
+
50
+ @functools.cache
51
+ def is_std_module(modname):
52
+ modname = cu.root_module(modname)
53
+ lib_path = _module_libpath(modname)
54
+
55
+ return lib_path is None or lib_path in _STDLIB_PATHS
56
+
@@ -0,0 +1,176 @@
1
+ import bisect
2
+ import collections
3
+
4
+ import numpy as np
5
+ import pandas as pd
6
+
7
+ from . import assert_checks as tas
8
+ from . import np_utils as npu
9
+ from . import tensor_stream as ts
10
+ from . import utils as ut
11
+
12
+
13
+ WriteField = collections.namedtuple('WriteField', 'dtype')
14
+
15
+
16
+ class StreamDataWriter:
17
+
18
+ def __init__(self, fields, path):
19
+ self._writer = ts.Writer(path)
20
+ self._fields = collections.OrderedDict()
21
+ if isinstance(fields, str):
22
+ sfields = tuple(tuple(ut.resplit(x, '=')) for x in ut.comma_split(fields))
23
+ else:
24
+ sfields = fields
25
+
26
+ for field, dtype in sfields:
27
+ self._fields[field] = WriteField(dtype=np.dtype(dtype))
28
+
29
+ # Note that the tensors handed over to the write() API will become owned by
30
+ # the StreamDataWriter obect, and cannot be written over after the write operation.
31
+ def write(self, **kwargs):
32
+ args = []
33
+ for field, wfield in self._fields.items():
34
+ data = kwargs.get(field)
35
+ tas.check_is_not_none(data, msg=f'Missing "{field}" data in write operation')
36
+
37
+ if isinstance(data, np.ndarray):
38
+ if data.dtype != wfield.dtype:
39
+ data = data.astype(wfield.dtype)
40
+ else:
41
+ data = np.array(data, dtype=wfield.dtype)
42
+
43
+ args.append(data)
44
+
45
+ self._writer.write(*args)
46
+
47
+ def write_dataframe(self, df):
48
+ wargs = collections.OrderedDict()
49
+ for field in self._fields.keys():
50
+ wargs[field] = df[field].to_numpy()
51
+
52
+ self.write(**wargs)
53
+
54
+ def flush(self):
55
+ state = dict(fields=self._fields)
56
+
57
+ self._writer.flush(state=state)
58
+
59
+
60
+ class StreamDataReader:
61
+
62
+ def __init__(self, path):
63
+ self._reader = ts.Reader(path)
64
+ self._fields = self._reader.state['fields']
65
+ self._fields_id = {field: i for i, field in enumerate(self._fields.keys())}
66
+
67
+ def __len__(self):
68
+ return len(self._reader)
69
+
70
+ def fields(self):
71
+ return tuple(self._fields.keys())
72
+
73
+ @property
74
+ def dtype(self):
75
+ return tuple(wfield.dtype for wfield in self._fields.values())
76
+
77
+ def get_slice(self, start, size=None):
78
+ data = collections.OrderedDict()
79
+ for i, field in enumerate(self._fields.keys()):
80
+ data[field] = self._reader.get_slice(i, start, size=size)
81
+
82
+ return data
83
+
84
+ def get_field_slice(self, field, start, size=None):
85
+ fid = self._fields_id[field]
86
+
87
+ return self._reader.get_slice(fid, start, size=size)
88
+
89
+ def typed_fields(self):
90
+ return tuple((field, wfield.dtype) for field, wfield in self._fields.items())
91
+
92
+ def empty_array(self, size):
93
+ rdata = collections.OrderedDict()
94
+ for field, dtype in self.typed_fields():
95
+ if npu.is_numeric(dtype):
96
+ rdata[field] = np.empty(size, dtype=dtype)
97
+ else:
98
+ rdata[field] = [None] * size
99
+
100
+ return rdata
101
+
102
+
103
+ def _compute_indices(reader, field, start=None, end=None, reverse=False):
104
+ fvalues = reader.get_field_slice(field, 0)
105
+ indices = np.argsort(fvalues)
106
+ if reverse:
107
+ indices = np.flip(indices)
108
+
109
+ if start is not None or end is not None:
110
+ fvalues = fvalues[indices]
111
+ start_index = bisect.bisect(fvalues, start) if start is not None else 0
112
+ end_index = bisect.bisect(fvalues, end) if end is not None else len(indices)
113
+ if start_index > end_index:
114
+ start_index, end_index = end_index, start_index
115
+
116
+ indices = indices[start_index: end_index]
117
+
118
+ return indices
119
+
120
+
121
+ class StreamSortedScan:
122
+
123
+ def __init__(self, reader, field,
124
+ start=None,
125
+ end=None,
126
+ slice_size=None,
127
+ max_slices=None,
128
+ reverse=False):
129
+ self._slice_size = slice_size or 100000
130
+ self._max_slices = max_slices or 16
131
+ self._reader = reader
132
+ self._slices = collections.OrderedDict()
133
+ self._indices = _compute_indices(reader, field, start=start, end=end, reverse=reverse)
134
+
135
+ def _get_slice(self, idx):
136
+ sidx = (idx // self._slice_size) * self._slice_size
137
+ data = self._slices.get(sidx)
138
+ if data is None:
139
+ if len(self._slices) >= self._max_slices:
140
+ self._slices.popitem(last=False)
141
+
142
+ slice_size = min(self._slice_size, len(self._indices) - sidx)
143
+
144
+ data = self._reader.get_slice(sidx, size=slice_size)
145
+ self._slices[sidx] = data
146
+ else:
147
+ self._slices.move_to_end(sidx)
148
+
149
+ return data, idx - sidx
150
+
151
+ def _as_numpy(self, rdata):
152
+ return {field: np.array(data) for field, data in rdata.items()}
153
+
154
+ def scan(self):
155
+ # An ampty array can contain fields which are Python lists, so _as_numpy() is
156
+ # used when returning data to the caller.
157
+ rdata = self._reader.empty_array(self._slice_size)
158
+ widx = 0
159
+ for idx in self._indices:
160
+ if widx == self._slice_size:
161
+ yield widx, self._as_numpy(rdata)
162
+ widx = 0
163
+
164
+ sdata, sidx = self._get_slice(idx)
165
+ for field, data in rdata.items():
166
+ data[widx] = sdata[field][sidx]
167
+
168
+ widx += 1
169
+
170
+ if widx:
171
+ frdata = collections.OrderedDict()
172
+ for field, data in rdata.items():
173
+ frdata[field] = data[: widx]
174
+
175
+ yield widx, self._as_numpy(frdata)
176
+
@@ -0,0 +1,144 @@
1
+ import os
2
+ import tempfile
3
+ import threading
4
+
5
+ from . import alog
6
+ from . import assert_checks as tas
7
+ from . import fin_wrap as fw
8
+
9
+
10
+ class StreamedFile:
11
+
12
+ def __init__(self, resp):
13
+ self._resp = resp
14
+ self._lock = threading.Lock()
15
+ self._cond = threading.Condition(lock=self._lock)
16
+
17
+ tmpfile = tempfile.TemporaryFile()
18
+ fw.fin_wrap(self, '_tempfile', tmpfile, finfn=tmpfile.close)
19
+
20
+ self._offset = 0
21
+ self._size = 0
22
+ self._completed = False
23
+ self._closed = False
24
+ self._thread = threading.Thread(target=self._stream, daemon=True)
25
+ self._thread.start()
26
+
27
+ def _stream(self):
28
+ for data in self._resp:
29
+ with self._lock:
30
+ self._tempfile.seek(self._size)
31
+ self._tempfile.write(data)
32
+ self._size += len(data)
33
+ self._cond.notify_all()
34
+ if self._closed:
35
+ break
36
+
37
+ with self._lock:
38
+ self._completed = True
39
+ self._cond.notify_all()
40
+
41
+ def _wait_completed(self):
42
+ with self._lock:
43
+ while not (self._completed or self._closed):
44
+ self._cond.wait()
45
+
46
+ def close(self):
47
+ with self._lock:
48
+ self._closed = True
49
+ while not self._completed:
50
+ self._cond.wait()
51
+
52
+ self._thread.join()
53
+
54
+ with self._lock:
55
+ tempfile = self._tempfile
56
+ if tempfile is not None:
57
+ fw.fin_wrap(self, '_tempfile', None)
58
+
59
+ if tempfile is not None:
60
+ tempfile.close()
61
+
62
+ @property
63
+ def closed(self):
64
+ return self._tempfile is None
65
+
66
+ def seek(self, pos, whence=os.SEEK_SET):
67
+ if whence == os.SEEK_SET:
68
+ offset = pos
69
+ elif whence == os.SEEK_CUR:
70
+ offset = self._offset + pos
71
+ elif whence == os.SEEK_END:
72
+ self._wait_completed()
73
+ offset = self._size + pos
74
+ else:
75
+ alog.xraise(ValueError, f'Invalid seek mode: {whence}')
76
+
77
+ if offset > 0:
78
+ if whence != os.SEEK_END:
79
+ self._wait_completed()
80
+ tas.check_le(offset, self._size, msg=f'Offset out of range')
81
+
82
+ tas.check_ge(offset, 0, msg=f'Offset out of range')
83
+
84
+ self._offset = offset
85
+
86
+ return offset
87
+
88
+ def tell(self):
89
+ return self._offset
90
+
91
+ def _read(self, offset, size, adj_offset):
92
+ while not (self._completed or self._closed or
93
+ (size >= 0 and self._size >= offset + size)):
94
+ self._cond.wait()
95
+
96
+ available = self._size - offset
97
+ to_read = min(size, available) if size >= 0 else available
98
+ if not self._closed and to_read > 0:
99
+ self._tempfile.seek(offset)
100
+ data = self._tempfile.read(to_read)
101
+ if adj_offset:
102
+ self._offset += len(data)
103
+ else:
104
+ data = b''
105
+
106
+ return data
107
+
108
+ def read(self, size=-1):
109
+ with self._lock:
110
+ return self._read(self._offset, size, True)
111
+
112
+ def read1(self, size=-1):
113
+ return self.read(size=size)
114
+
115
+ def peek(self, size=0):
116
+ with self._lock:
117
+ size = min(size, max(1, self._size - self._offset))
118
+
119
+ return self._read(self._offset, size, False) if size > 0 else b''
120
+
121
+ def pread(self, offset, size):
122
+ with self._lock:
123
+ return self._read(offset, size, False)
124
+
125
+ def flush(self):
126
+ pass
127
+
128
+ def readable(self):
129
+ return not self.closed
130
+
131
+ def seekable(self):
132
+ return not self.closed
133
+
134
+ def writable(self):
135
+ return False
136
+
137
+ def __enter__(self):
138
+ return self
139
+
140
+ def __exit__(self, *exc):
141
+ self.close()
142
+
143
+ return False
144
+
@@ -0,0 +1,79 @@
1
+ import os
2
+ import shutil
3
+ import tempfile
4
+
5
+ from . import cleanups
6
+ from . import global_namespace as gns
7
+ from . import rnd_utils as rngu
8
+
9
+
10
+ class _RootDir:
11
+
12
+ def __init__(self):
13
+ self._path = tempfile.mkdtemp()
14
+ self._cid = cleanups.register(shutil.rmtree, self._path, ignore_errors=True)
15
+
16
+ def create(self):
17
+ return tempfile.mkdtemp(dir=self._path)
18
+
19
+ def root(self):
20
+ return self._path
21
+
22
+
23
+ _ROOTDIR = gns.Var(f'{__name__}.ROOTDIR',
24
+ fork_init=True,
25
+ defval=lambda: _RootDir())
26
+
27
+ def _root_dir():
28
+ return gns.get(_ROOTDIR)
29
+
30
+
31
+ def create():
32
+ return _root_dir().create()
33
+
34
+
35
+ def get_temp_root():
36
+ return _root_dir().root()
37
+
38
+
39
+ def _try_fastfs_dir(path):
40
+ if os.path.isdir(path):
41
+ fastfs_dir = os.path.join(path, 'fastfs')
42
+ try:
43
+ os.makedirs(fastfs_dir, exist_ok=True)
44
+
45
+ return fastfs_dir
46
+ except:
47
+ pass
48
+
49
+
50
+ def _find_fastfs_dir():
51
+ fastfs_dirs = []
52
+
53
+ if (path := os.getenv('FASTFS_DIR')) is not None:
54
+ fastfs_dirs.append(path)
55
+
56
+ if os.name == 'posix':
57
+ # Try known tmpfs/ramfs places in case on Linux.
58
+ fastfs_dirs.append(f'/run/user/{os.getuid()}')
59
+ fastfs_dirs.append('/dev/shm')
60
+
61
+ fastfs_dirs.append(tempfile.gettempdir())
62
+ fastfs_dirs.append(os.getcwd())
63
+
64
+ for path in fastfs_dirs:
65
+ if (fastfs_dir := _try_fastfs_dir(path)) is not None:
66
+ return fastfs_dir
67
+
68
+
69
+ _FASTFS_DIR = _find_fastfs_dir()
70
+ _NAMELEN = int(os.getenv('FASTFS_NAMELEN', 12))
71
+
72
+ def fastfs_dir(name=None, namelen=_NAMELEN):
73
+ name = name or rngu.rand_string(namelen)
74
+
75
+ path = os.path.join(_FASTFS_DIR, name)
76
+ os.makedirs(path, exist_ok=True)
77
+
78
+ return path
79
+
@@ -0,0 +1,51 @@
1
+ import re
2
+ import string
3
+
4
+ from . import alog
5
+
6
+
7
+ class _FnDict:
8
+
9
+ def __init__(self, lookup_fn):
10
+ self._lookup_fn = lookup_fn
11
+
12
+ def __getitem__(self, key):
13
+ m = re.match(r'([^:]+):(.*)', key)
14
+ if m:
15
+ lkey, defval = m.group(1), m.group(2)
16
+ else:
17
+ lkey, defval = key, None
18
+
19
+ return self._lookup_fn(lkey, defval=defval)
20
+
21
+
22
+ def _dict_lookup_fn(vals, delim, misses_ok):
23
+
24
+ def lookup_fn(key, defval=None):
25
+ value = vals.get(key, defval)
26
+ if value is None:
27
+ if not misses_ok:
28
+ alog.xraise(KeyError, f'String template replace missing value for key: {key}')
29
+ else:
30
+ value = f'{delim}{key}'
31
+
32
+ return value
33
+
34
+ return lookup_fn
35
+
36
+
37
+ def template_replace(st, vals=None, lookup_fn=None, delim=None, misses_ok=None):
38
+ delim = delim or '$'
39
+ misses_ok = False if misses_ok is None else misses_ok
40
+
41
+ class Template(string.Template):
42
+
43
+ # Allow for brace ID with the format ${ID:DEFAULT_VALUE}.
44
+ braceidpattern = r'((?a:[_a-z][_a-z0-9]*)(:[^}]*)?)'
45
+ delimiter = delim
46
+
47
+ if lookup_fn is None:
48
+ lookup_fn = _dict_lookup_fn(vals, delim, misses_ok)
49
+
50
+ return Template(st).safe_substitute(_FnDict(lookup_fn))
51
+