python-misc-utils 0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. python_misc_utils-0.2/LICENSE +13 -0
  2. python_misc_utils-0.2/PKG-INFO +36 -0
  3. python_misc_utils-0.2/README.md +5 -0
  4. python_misc_utils-0.2/py_misc_utils/__init__.py +0 -0
  5. python_misc_utils-0.2/py_misc_utils/abs_timeout.py +12 -0
  6. python_misc_utils-0.2/py_misc_utils/alog.py +311 -0
  7. python_misc_utils-0.2/py_misc_utils/app_main.py +179 -0
  8. python_misc_utils-0.2/py_misc_utils/archive_streamer.py +112 -0
  9. python_misc_utils-0.2/py_misc_utils/assert_checks.py +118 -0
  10. python_misc_utils-0.2/py_misc_utils/ast_utils.py +121 -0
  11. python_misc_utils-0.2/py_misc_utils/async_manager.py +189 -0
  12. python_misc_utils-0.2/py_misc_utils/break_control.py +63 -0
  13. python_misc_utils-0.2/py_misc_utils/buffered_iterator.py +35 -0
  14. python_misc_utils-0.2/py_misc_utils/cached_file.py +507 -0
  15. python_misc_utils-0.2/py_misc_utils/call_limiter.py +26 -0
  16. python_misc_utils-0.2/py_misc_utils/call_result_selector.py +13 -0
  17. python_misc_utils-0.2/py_misc_utils/cleanups.py +85 -0
  18. python_misc_utils-0.2/py_misc_utils/cmd.py +97 -0
  19. python_misc_utils-0.2/py_misc_utils/compression.py +116 -0
  20. python_misc_utils-0.2/py_misc_utils/cond_waiter.py +13 -0
  21. python_misc_utils-0.2/py_misc_utils/context_base.py +18 -0
  22. python_misc_utils-0.2/py_misc_utils/context_managers.py +67 -0
  23. python_misc_utils-0.2/py_misc_utils/core_utils.py +577 -0
  24. python_misc_utils-0.2/py_misc_utils/daemon_process.py +252 -0
  25. python_misc_utils-0.2/py_misc_utils/data_cache.py +46 -0
  26. python_misc_utils-0.2/py_misc_utils/date_utils.py +90 -0
  27. python_misc_utils-0.2/py_misc_utils/debug.py +24 -0
  28. python_misc_utils-0.2/py_misc_utils/dyn_modules.py +50 -0
  29. python_misc_utils-0.2/py_misc_utils/dynamod.py +103 -0
  30. python_misc_utils-0.2/py_misc_utils/env_config.py +35 -0
  31. python_misc_utils-0.2/py_misc_utils/executor.py +239 -0
  32. python_misc_utils-0.2/py_misc_utils/file_overwrite.py +29 -0
  33. python_misc_utils-0.2/py_misc_utils/fin_wrap.py +77 -0
  34. python_misc_utils-0.2/py_misc_utils/fp_utils.py +47 -0
  35. python_misc_utils-0.2/py_misc_utils/fs/__init__.py +0 -0
  36. python_misc_utils-0.2/py_misc_utils/fs/file_fs.py +127 -0
  37. python_misc_utils-0.2/py_misc_utils/fs/ftp_fs.py +242 -0
  38. python_misc_utils-0.2/py_misc_utils/fs/gcs_fs.py +196 -0
  39. python_misc_utils-0.2/py_misc_utils/fs/http_fs.py +241 -0
  40. python_misc_utils-0.2/py_misc_utils/fs/s3_fs.py +417 -0
  41. python_misc_utils-0.2/py_misc_utils/fs_base.py +133 -0
  42. python_misc_utils-0.2/py_misc_utils/fs_utils.py +207 -0
  43. python_misc_utils-0.2/py_misc_utils/gcs_fs.py +169 -0
  44. python_misc_utils-0.2/py_misc_utils/gen_indices.py +54 -0
  45. python_misc_utils-0.2/py_misc_utils/gfs.py +371 -0
  46. python_misc_utils-0.2/py_misc_utils/git_repo.py +77 -0
  47. python_misc_utils-0.2/py_misc_utils/global_namespace.py +110 -0
  48. python_misc_utils-0.2/py_misc_utils/http_async_fetcher.py +139 -0
  49. python_misc_utils-0.2/py_misc_utils/http_server.py +196 -0
  50. python_misc_utils-0.2/py_misc_utils/http_utils.py +143 -0
  51. python_misc_utils-0.2/py_misc_utils/img_utils.py +20 -0
  52. python_misc_utils-0.2/py_misc_utils/infix_op.py +20 -0
  53. python_misc_utils-0.2/py_misc_utils/inspect_utils.py +205 -0
  54. python_misc_utils-0.2/py_misc_utils/iostream.py +21 -0
  55. python_misc_utils-0.2/py_misc_utils/iter_file.py +117 -0
  56. python_misc_utils-0.2/py_misc_utils/key_wrap.py +46 -0
  57. python_misc_utils-0.2/py_misc_utils/lazy_import.py +25 -0
  58. python_misc_utils-0.2/py_misc_utils/lockfile.py +164 -0
  59. python_misc_utils-0.2/py_misc_utils/mem_size.py +64 -0
  60. python_misc_utils-0.2/py_misc_utils/mirror_from.py +72 -0
  61. python_misc_utils-0.2/py_misc_utils/mmap.py +16 -0
  62. python_misc_utils-0.2/py_misc_utils/module_utils.py +196 -0
  63. python_misc_utils-0.2/py_misc_utils/moving_average.py +19 -0
  64. python_misc_utils-0.2/py_misc_utils/msgpack_streamer.py +26 -0
  65. python_misc_utils-0.2/py_misc_utils/multi_wait.py +24 -0
  66. python_misc_utils-0.2/py_misc_utils/multiprocessing.py +102 -0
  67. python_misc_utils-0.2/py_misc_utils/named_array.py +224 -0
  68. python_misc_utils-0.2/py_misc_utils/no_break.py +46 -0
  69. python_misc_utils-0.2/py_misc_utils/no_except.py +32 -0
  70. python_misc_utils-0.2/py_misc_utils/np_ml_framework.py +184 -0
  71. python_misc_utils-0.2/py_misc_utils/np_utils.py +346 -0
  72. python_misc_utils-0.2/py_misc_utils/ntuple_utils.py +38 -0
  73. python_misc_utils-0.2/py_misc_utils/num_utils.py +54 -0
  74. python_misc_utils-0.2/py_misc_utils/obj.py +73 -0
  75. python_misc_utils-0.2/py_misc_utils/object_cache.py +100 -0
  76. python_misc_utils-0.2/py_misc_utils/object_tracker.py +88 -0
  77. python_misc_utils-0.2/py_misc_utils/ordered_set.py +71 -0
  78. python_misc_utils-0.2/py_misc_utils/osfd.py +27 -0
  79. python_misc_utils-0.2/py_misc_utils/packet.py +22 -0
  80. python_misc_utils-0.2/py_misc_utils/parquet_streamer.py +69 -0
  81. python_misc_utils-0.2/py_misc_utils/pd_utils.py +254 -0
  82. python_misc_utils-0.2/py_misc_utils/periodic_task.py +61 -0
  83. python_misc_utils-0.2/py_misc_utils/pickle_wrap.py +121 -0
  84. python_misc_utils-0.2/py_misc_utils/pipeline.py +98 -0
  85. python_misc_utils-0.2/py_misc_utils/remap_pickle.py +50 -0
  86. python_misc_utils-0.2/py_misc_utils/resource_manager.py +155 -0
  87. python_misc_utils-0.2/py_misc_utils/rnd_utils.py +56 -0
  88. python_misc_utils-0.2/py_misc_utils/run_once.py +19 -0
  89. python_misc_utils-0.2/py_misc_utils/scheduler.py +135 -0
  90. python_misc_utils-0.2/py_misc_utils/select_params.py +300 -0
  91. python_misc_utils-0.2/py_misc_utils/signal.py +141 -0
  92. python_misc_utils-0.2/py_misc_utils/skl_utils.py +270 -0
  93. python_misc_utils-0.2/py_misc_utils/split.py +147 -0
  94. python_misc_utils-0.2/py_misc_utils/state.py +53 -0
  95. python_misc_utils-0.2/py_misc_utils/std_module.py +56 -0
  96. python_misc_utils-0.2/py_misc_utils/stream_dataframe.py +176 -0
  97. python_misc_utils-0.2/py_misc_utils/streamed_file.py +144 -0
  98. python_misc_utils-0.2/py_misc_utils/tempdir.py +79 -0
  99. python_misc_utils-0.2/py_misc_utils/template_replace.py +51 -0
  100. python_misc_utils-0.2/py_misc_utils/tensor_stream.py +269 -0
  101. python_misc_utils-0.2/py_misc_utils/thread_context.py +33 -0
  102. python_misc_utils-0.2/py_misc_utils/throttle.py +30 -0
  103. python_misc_utils-0.2/py_misc_utils/time_trigger.py +18 -0
  104. python_misc_utils-0.2/py_misc_utils/timegen.py +11 -0
  105. python_misc_utils-0.2/py_misc_utils/traceback.py +49 -0
  106. python_misc_utils-0.2/py_misc_utils/tracking_executor.py +91 -0
  107. python_misc_utils-0.2/py_misc_utils/transform_array.py +42 -0
  108. python_misc_utils-0.2/py_misc_utils/uncompress.py +35 -0
  109. python_misc_utils-0.2/py_misc_utils/url_fetcher.py +157 -0
  110. python_misc_utils-0.2/py_misc_utils/utils.py +538 -0
  111. python_misc_utils-0.2/py_misc_utils/varint.py +50 -0
  112. python_misc_utils-0.2/py_misc_utils/virt_array.py +52 -0
  113. python_misc_utils-0.2/py_misc_utils/weak_call.py +33 -0
  114. python_misc_utils-0.2/py_misc_utils/work_results.py +100 -0
  115. python_misc_utils-0.2/py_misc_utils/writeback_file.py +43 -0
  116. python_misc_utils-0.2/pyproject.toml +27 -0
  117. python_misc_utils-0.2/python_misc_utils.egg-info/PKG-INFO +36 -0
  118. python_misc_utils-0.2/python_misc_utils.egg-info/SOURCES.txt +122 -0
  119. python_misc_utils-0.2/python_misc_utils.egg-info/dependency_links.txt +1 -0
  120. python_misc_utils-0.2/python_misc_utils.egg-info/requires.txt +11 -0
  121. python_misc_utils-0.2/python_misc_utils.egg-info/top_level.txt +1 -0
  122. python_misc_utils-0.2/setup.cfg +4 -0
  123. python_misc_utils-0.2/setup.py +33 -0
  124. python_misc_utils-0.2/test/test_runner.py +33 -0
@@ -0,0 +1,13 @@
1
+ Copyright 2023 Davide Libenzi
2
+
3
+ Licensed under the Apache License, Version 2.0 (the "License");
4
+ you may not use this file except in compliance with the License.
5
+ You may obtain a copy of the License at
6
+
7
+ http://www.apache.org/licenses/LICENSE-2.0
8
+
9
+ Unless required by applicable law or agreed to in writing, software
10
+ distributed under the License is distributed on an "AS IS" BASIS,
11
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ See the License for the specific language governing permissions and
13
+ limitations under the License.
@@ -0,0 +1,36 @@
1
+ Metadata-Version: 2.4
2
+ Name: python_misc_utils
3
+ Version: 0.2
4
+ Summary: A collection of Python utility APIs
5
+ Author: Davide Libenzi
6
+ License-Expression: Apache-2.0
7
+ Project-URL: Homepage, https://github.com/davidel/py_misc_utils
8
+ Project-URL: Issues, https://github.com/davidel/py_misc_utils/issues
9
+ Project-URL: Repository, https://github.com/davidel/py_misc_utils.git
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Intended Audience :: Developers
13
+ Requires-Python: >=3.9
14
+ Description-Content-Type: text/markdown
15
+ License-File: LICENSE
16
+ Requires-Dist: numpy
17
+ Requires-Dist: pandas
18
+ Requires-Dist: psutil
19
+ Requires-Dist: pyyaml
20
+ Provides-Extra: fs
21
+ Requires-Dist: boto3; extra == "fs"
22
+ Requires-Dist: bs4; extra == "fs"
23
+ Requires-Dist: ftputil; extra == "fs"
24
+ Requires-Dist: google-cloud-storage; extra == "fs"
25
+ Requires-Dist: pyarrow; extra == "fs"
26
+ Dynamic: author
27
+ Dynamic: license-file
28
+ Dynamic: provides-extra
29
+ Dynamic: requires-dist
30
+ Dynamic: summary
31
+
32
+ ## Python Utility Code
33
+
34
+ I keep writing the same stuff in different projects, so I finally decided
35
+ to throw stuff into a common boilerplate and stop the cut&paste jobs.
36
+
@@ -0,0 +1,5 @@
1
+ ## Python Utility Code
2
+
3
+ I keep writing the same stuff in different projects, so I finally decided
4
+ to throw stuff into a common boilerplate and stop the cut&paste jobs.
5
+
File without changes
@@ -0,0 +1,12 @@
1
+ import time
2
+
3
+
4
+ class AbsTimeout:
5
+
6
+ def __init__(self, timeout, timefn=None):
7
+ self._timefn = time.time if timefn is None else timefn
8
+ self._expires = self._timefn() + timeout if timeout is not None else None
9
+
10
+ def get(self):
11
+ return max(0, self._expires - self._timefn()) if self._expires is not None else None
12
+
@@ -0,0 +1,311 @@
1
+ import argparse
2
+ import logging
3
+ import math
4
+ import os
5
+ import sys
6
+ import time
7
+ import traceback
8
+ import types
9
+
10
+ from . import call_limiter as cl
11
+ from . import run_once as ro
12
+ from . import traceback as tb
13
+
14
+
15
+ DEBUG = logging.DEBUG
16
+ INFO = logging.INFO
17
+ WARNING = logging.WARNING
18
+ ERROR = logging.ERROR
19
+ CRITICAL = logging.CRITICAL
20
+
21
+ SPAM = DEBUG - 2
22
+ VERBOSE = DEBUG - 1
23
+ DEBUG0 = DEBUG + 1
24
+ DEBUG1 = DEBUG + 2
25
+ DEBUG2 = DEBUG + 3
26
+ DEBUG3 = DEBUG + 4
27
+
28
+ _SHORT_LEV = {
29
+ SPAM: 'SP',
30
+ VERBOSE: 'VB',
31
+ DEBUG0: '0D',
32
+ DEBUG1: '1D',
33
+ DEBUG2: '2D',
34
+ DEBUG3: '3D',
35
+ DEBUG: 'DD',
36
+ INFO: 'IN',
37
+ WARNING: 'WA',
38
+ ERROR: 'ER',
39
+ CRITICAL: 'CR',
40
+ }
41
+
42
+
43
+ class Formatter(logging.Formatter):
44
+
45
+ def __init__(self, emit_extra=None):
46
+ super().__init__()
47
+ self.emit_extra = emit_extra
48
+
49
+ def format(self, r):
50
+ hdr = self.make_header(r)
51
+ msg = (r.msg % r.args) if r.args else r.msg
52
+
53
+ return '\n'.join([f'{hdr}: {ln}' for ln in msg.split('\n')])
54
+
55
+ def formatTime(self, r, datefmt=None):
56
+ if datefmt:
57
+ return time.strftime(datefmt, r.created)
58
+
59
+ tstr = time.strftime('%Y%m%d %H:%M:%S', time.localtime(r.created))
60
+ usecs = math.modf(r.created)[0] * 1e6
61
+
62
+ return f'{tstr}.{usecs:06.0f}'
63
+
64
+ def make_header(self, r):
65
+ tstr = self.formatTime(r)
66
+ lid = _SHORT_LEV.get(r.levelno, r.levelname[:2])
67
+ hdr = f'{lid}{tstr};{os.getpid()};{r.module}'
68
+ if self.emit_extra:
69
+ extras = [str(getattr(r, name, None)) for name in self.emit_extra]
70
+ hdr = f'{hdr};{";".join(extras)}'
71
+
72
+ return hdr
73
+
74
+
75
+ _DEFAULT_ARGS = dict(
76
+ log_level=os.getenv('LOG_LEVEL', 'INFO'),
77
+ log_file=os.getenv('LOG_FILE', 'STDERR'),
78
+ log_mod_levels=[],
79
+ log_emit_extra=[],
80
+ )
81
+
82
+ def add_logging_options(parser):
83
+ parser.add_argument('--log_level', type=str, default=_DEFAULT_ARGS.get('log_level'),
84
+ choices={'SPAM', 'VERBOSE', 'DEBUG', 'DEBUG0', 'DEBUG1', 'DEBUG2',
85
+ 'DEBUG3', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'},
86
+ help='The logging level')
87
+ parser.add_argument('--log_file', type=str, default=_DEFAULT_ARGS.get('log_file'),
88
+ help='Comma separated list of target log files (STDOUT, STDERR ' \
89
+ f'are also recognized)')
90
+ parser.add_argument('--log_mod_levels', nargs='*',
91
+ help='Comma separated list of LOGGER_NAME,LEVEL to set the log level at')
92
+ parser.add_argument('--log_emit_extra', nargs='*',
93
+ help='Which other logging record fields should be emitted')
94
+
95
+
96
+ @ro.run_once
97
+ def _add_levels():
98
+ logging.addLevelName(SPAM, 'SPAM')
99
+ logging.addLevelName(VERBOSE, 'VERBOSE')
100
+ logging.addLevelName(DEBUG0, 'DEBUG0')
101
+ logging.addLevelName(DEBUG1, 'DEBUG1')
102
+ logging.addLevelName(DEBUG2, 'DEBUG2')
103
+ logging.addLevelName(DEBUG3, 'DEBUG3')
104
+
105
+
106
+ def _clear_logging_handlers():
107
+ # Python >= 3.8 has a force=True argument to logging.basicConfig() to force
108
+ # initialization, but since Colab is not there yet, we do it manually.
109
+ root_logger = logging.getLogger()
110
+ for handler in tuple(root_logger.handlers):
111
+ handler.flush()
112
+ root_logger.removeHandler(handler)
113
+
114
+
115
+ def _set_logmod_levels(mlevels):
116
+ mlevels = list(mlevels) if mlevels else []
117
+ env_mlevels = os.getenv('LOGMOD_LEVELS', None)
118
+ if env_mlevels is not None:
119
+ mlevels.extend(env_mlevels.split(':'))
120
+ for mlev in mlevels:
121
+ mod, level = mlev.split(',')
122
+ logging.getLogger(mod).setLevel(logging.getLevelName(level.upper()))
123
+
124
+
125
+ def setup_logging(args):
126
+ _add_levels()
127
+ _clear_logging_handlers()
128
+
129
+ numeric_level = logging.getLevelName(args.log_level.upper())
130
+ handlers = []
131
+ if args.log_file:
132
+ for fname in args.log_file.split(','):
133
+ if fname == 'STDOUT':
134
+ handler = logging.StreamHandler(sys.stdout)
135
+ elif fname == 'STDERR':
136
+ handler = logging.StreamHandler(sys.stderr)
137
+ else:
138
+ handler = logging.StreamHandler(open(fname, mode='a'))
139
+
140
+ handler.setLevel(numeric_level)
141
+ handler.setFormatter(Formatter(emit_extra=args.log_emit_extra))
142
+ handlers.append(handler)
143
+
144
+ logging.basicConfig(level=numeric_level, handlers=handlers, force=True)
145
+
146
+ set_current_level(numeric_level, set_logger=False)
147
+
148
+ _set_logmod_levels(args.log_mod_levels)
149
+
150
+
151
+ def basic_setup(**kwargs):
152
+ args = _DEFAULT_ARGS.copy()
153
+ args.update(kwargs)
154
+ setup_logging(types.SimpleNamespace(**args))
155
+
156
+
157
+ def get_main_config():
158
+ return types.SimpleNamespace(add_arguments=add_logging_options,
159
+ config_module=setup_logging)
160
+
161
+
162
+ _LEVEL = DEBUG
163
+
164
+ def set_current_level(level, set_logger=True):
165
+ if set_logger:
166
+ logger = logging.getLogger()
167
+ logger.setLevel(level)
168
+ for handler in logger.handlers:
169
+ handler.setLevel(level)
170
+
171
+ global _LEVEL
172
+
173
+ _LEVEL = level
174
+
175
+
176
+ def level_active(level):
177
+ return _LEVEL <= level
178
+
179
+
180
+ def level_run(level, fn):
181
+ return fn() if level_active(level) else None
182
+
183
+
184
+ _LOGGING_FRAMES = 1 if sys.version_info >= (3, 11) else 2
185
+
186
+ def logging_args(kwargs):
187
+ limit = kwargs.pop('limit', -1)
188
+ stacklevel = kwargs.get('stacklevel', 1)
189
+ if limit < 0 or cl.trigger(__file__, limit):
190
+ kwargs['stacklevel'] = stacklevel + _LOGGING_FRAMES
191
+
192
+ return kwargs
193
+
194
+
195
+ def _nested_args(kwargs):
196
+ kwargs['stacklevel'] = kwargs.get('stacklevel', 1) + 1
197
+
198
+ return kwargs
199
+
200
+
201
+ def _dmsg(msg):
202
+ return msg() if callable(msg) else msg
203
+
204
+
205
+ def log(level, msg, *args, **kwargs):
206
+ kwargs = logging_args(kwargs)
207
+ if kwargs is not None:
208
+ logging.log(level, _dmsg(msg), *args, **kwargs)
209
+
210
+
211
+ def spam(msg, *args, **kwargs):
212
+ if SPAM >= _LEVEL:
213
+ log(SPAM, _dmsg(msg), *args, **_nested_args(kwargs))
214
+
215
+
216
+ def verbose(msg, *args, **kwargs):
217
+ if VERBOSE >= _LEVEL:
218
+ log(VERBOSE, _dmsg(msg), *args, **_nested_args(kwargs))
219
+
220
+
221
+ def debug0(msg, *args, **kwargs):
222
+ if DEBUG0 >= _LEVEL:
223
+ log(DEBUG0, _dmsg(msg), *args, **_nested_args(kwargs))
224
+
225
+
226
+ def debug1(msg, *args, **kwargs):
227
+ if DEBUG1 >= _LEVEL:
228
+ log(DEBUG1, _dmsg(msg), *args, **_nested_args(kwargs))
229
+
230
+
231
+ def debug2(msg, *args, **kwargs):
232
+ if DEBUG2 >= _LEVEL:
233
+ log(DEBUG2, _dmsg(msg), *args, **_nested_args(kwargs))
234
+
235
+
236
+ def debug3(msg, *args, **kwargs):
237
+ if DEBUG3 >= _LEVEL:
238
+ log(DEBUG3, _dmsg(msg), *args, **_nested_args(kwargs))
239
+
240
+
241
+ def debug(msg, *args, **kwargs):
242
+ if DEBUG >= _LEVEL:
243
+ log(DEBUG, _dmsg(msg), *args, **_nested_args(kwargs))
244
+
245
+
246
+ def info(msg, *args, **kwargs):
247
+ if INFO >= _LEVEL:
248
+ log(INFO, _dmsg(msg), *args, **_nested_args(kwargs))
249
+
250
+
251
+ def warning(msg, *args, **kwargs):
252
+ if WARNING >= _LEVEL:
253
+ log(WARNING, _dmsg(msg), *args, **_nested_args(kwargs))
254
+
255
+
256
+ def error(msg, *args, **kwargs):
257
+ if ERROR >= _LEVEL:
258
+ log(ERROR, _dmsg(msg), *args, **_nested_args(kwargs))
259
+
260
+
261
+ def critical(msg, *args, **kwargs):
262
+ if CRITICAL >= _LEVEL:
263
+ log(CRITICAL, _dmsg(msg), *args, **_nested_args(kwargs))
264
+
265
+
266
+ def exception(e, *args, **kwargs):
267
+ kwargs = logging_args(kwargs)
268
+ if kwargs is not None:
269
+ msg = kwargs.pop('exmsg', 'Exception')
270
+ tb = traceback.format_exc()
271
+ error(f'{_dmsg(msg)}: {e}\n{tb}', *args, **_nested_args(kwargs))
272
+
273
+
274
+ def xraise(e, msg, *args, **kwargs):
275
+ if kwargs.pop('logit', False):
276
+ error(msg, *args, **_nested_args(kwargs))
277
+
278
+ raise e(_dmsg(msg))
279
+
280
+
281
+ def async_log(level, msg, *args, **kwargs):
282
+ # This one cannot use the logging module as it could be called from signal
283
+ # handler asycnhronously. The logging.getLevelName() is safe since it is simply
284
+ # a (lockless) dictionary lookup.
285
+ # Similarly, no other APIs taking locks can be called from this context.
286
+ if level >= _LEVEL:
287
+ kwargs = logging_args(kwargs)
288
+ if kwargs is not None:
289
+ # Fake a logging record, filling up only the fields used by the Formatter.
290
+ # Do not call logging APIs for that, for the same reasons cited above.
291
+ frame = tb.get_frame(n=1)
292
+ module = frame.f_globals.get('__name__', 'ASYNC').split('.')[-1]
293
+
294
+ now = time.time()
295
+ record = types.SimpleNamespace(
296
+ msg=_dmsg(msg),
297
+ args=args,
298
+ created=now,
299
+ msecs=math.modf(now)[0] * 1000,
300
+ levelno=level,
301
+ levelname=logging.getLevelName(level),
302
+ module=module,
303
+ )
304
+
305
+ formatter = Formatter()
306
+
307
+ logfd = kwargs.pop('file', sys.stderr)
308
+ logfd.write(formatter.format(record))
309
+ logfd.write('\n')
310
+ logfd.flush()
311
+
@@ -0,0 +1,179 @@
1
+ import argparse
2
+ import functools
3
+ import inspect
4
+ import sys
5
+ import typing
6
+ import yaml
7
+
8
+ from . import alog
9
+ from . import core_utils as cu
10
+ from . import global_namespace as gns
11
+ from . import multiprocessing as mp
12
+
13
+
14
+ def _get_init_modules():
15
+ # Here is the place to import (import here to avoid cycling dependencies) and
16
+ # call the get_main_config() API of modules which require setting up a
17
+ # command line and configuring themselves with the parsed arguments.
18
+ # Note that alog is imported at the top since it is used in other places (and
19
+ # also has minimal dependencies which do not create issues).
20
+ # Objects returned by the get_main_config() API must have a add_arguments(parser)
21
+ # API to allow them to add command line arguments, and a config_module(args) API
22
+ # to configure themselves with the parsed arguments.
23
+ # Example:
24
+ #
25
+ # from . import foo
26
+ # modules.append(foo.get_main_config())
27
+ #
28
+ modules = []
29
+ modules.append(alog.get_main_config())
30
+
31
+ return tuple(modules)
32
+
33
+
34
+ def _add_arguments(init_modules, parser):
35
+ for module in init_modules:
36
+ module.add_arguments(parser)
37
+
38
+
39
+ def _config_modules(init_modules, args):
40
+ for module in init_modules:
41
+ module.config_module(args)
42
+
43
+
44
+ def _child_setup_modules(args):
45
+ init_modules = _get_init_modules()
46
+ _config_modules(init_modules, args)
47
+
48
+ return args
49
+
50
+
51
+ _ARGS = gns.Var(f'{__name__}.ARGS', child_fn=_child_setup_modules)
52
+
53
+ def _main(parser, mainfn, args, rem_args):
54
+ if isinstance(mainfn, Main):
55
+ mainfn.add_arguments(parser)
56
+
57
+ init_modules = _get_init_modules()
58
+ _add_arguments(init_modules, parser)
59
+
60
+ if rem_args:
61
+ xargs = args or sys.argv[1:]
62
+
63
+ ddpos = cu.lindex(xargs, '--')
64
+ if ddpos >= 0:
65
+ rargs = xargs[ddpos + 1:]
66
+ xargs = xargs[: ddpos]
67
+ else:
68
+ rargs = []
69
+
70
+ parsed_args = parser.parse_args(args=xargs)
71
+ setattr(parsed_args, rem_args, tuple(rargs))
72
+ else:
73
+ parsed_args = parser.parse_args(args=args)
74
+
75
+ gns.set(_ARGS, parsed_args)
76
+ _config_modules(init_modules, parsed_args)
77
+
78
+ mainfn(parsed_args)
79
+
80
+
81
+ def main(parser, mainfn, args=None, rem_args=None):
82
+ mp.procfn_wrap(_main, parser, mainfn, args, rem_args)
83
+
84
+
85
+ def basic_main(mainfn, description='Basic Main'):
86
+ parser = argparse.ArgumentParser(
87
+ description=description,
88
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
89
+ )
90
+ main(parser, mainfn)
91
+
92
+
93
+ def _child_setup_functions(setup_functions):
94
+ for setupfn in setup_functions:
95
+ setupfn()
96
+
97
+ return setup_functions
98
+
99
+
100
+ _SETUP_FUNCTIONS = gns.Var(f'{__name__}.SETUP_FUNCTIONS',
101
+ child_fn=_child_setup_functions,
102
+ defval=[])
103
+
104
+ def add_setupfn(setupfn, run=True):
105
+ if run:
106
+ setupfn()
107
+
108
+ setup_functions = gns.get(_SETUP_FUNCTIONS)
109
+ setup_functions.append(setupfn)
110
+
111
+
112
+ # This is similar to Fire but brings up the app_main infrastructure.
113
+ # Use as:
114
+ #
115
+ # @app_main.Main
116
+ # def my_main(arg, ..., kwarg=17, ...):
117
+ # ...
118
+ #
119
+ # if __name__ == '__main__':
120
+ # parser = argparse.ArgumentParser(...)
121
+ # ...
122
+ # app_main.main(parser, my_main, ...)
123
+ #
124
+ class Main:
125
+
126
+ def __init__(self, func):
127
+ self._func = func
128
+ self._sig = inspect.signature(func)
129
+ functools.update_wrapper(self, func)
130
+
131
+ def __call__(self, parsed_args):
132
+ args, kwargs = [], {}
133
+ for n, p in self._sig.parameters.items():
134
+ pv = getattr(parsed_args, n, None)
135
+ if p.kind == p.POSITIONAL_ONLY:
136
+ args.append(pv)
137
+ else:
138
+ kwargs[n] = pv
139
+
140
+ return self._func(*args, **kwargs)
141
+
142
+ def add_arguments(self, parser):
143
+ fname = self._func.__name__
144
+
145
+ for n, p in self._sig.parameters.items():
146
+ choices = None
147
+ defval = p.default if p.default is not p.empty else None
148
+ if p.annotation is not p.empty:
149
+ ptype = p.annotation
150
+ if typing.get_origin(ptype) == typing.Literal:
151
+ choices = typing.get_args(ptype)
152
+ ptype = type(choices[0])
153
+
154
+ type_cast = functools.partial(cu.to_type, vtype=ptype)
155
+ elif defval is not None:
156
+ ptype = type(defval)
157
+ type_cast = functools.partial(cu.to_type, vtype=ptype)
158
+ else:
159
+ ptype, type_cast = str, yaml.safe_load
160
+
161
+ action = argparse.BooleanOptionalAction if ptype is bool else None
162
+
163
+ help_str = f'Argument "{n}" (type={ptype.__name__}) of function {fname}(...)'
164
+ if p.default is p.empty or p.kind == p.POSITIONAL_ONLY:
165
+ parser.add_argument(n,
166
+ metavar=n.upper(),
167
+ action=action,
168
+ type=type_cast,
169
+ default=defval,
170
+ choices=choices,
171
+ help=help_str)
172
+ else:
173
+ parser.add_argument(f'--{n}',
174
+ action=action,
175
+ type=type_cast,
176
+ default=defval,
177
+ choices=choices,
178
+ help=help_str)
179
+
@@ -0,0 +1,112 @@
1
+ import collections
2
+ import hashlib
3
+ import os
4
+ import tarfile
5
+ import zipfile
6
+
7
+ from . import alog
8
+ from . import assert_checks as tas
9
+ from . import gfs
10
+ from . import img_utils as imgu
11
+ from . import utils as ut
12
+
13
+
14
+ ArchiveSpecs = collections.namedtuple('ArchiveSpecs', 'kind, compression, base_path, purl')
15
+ ArchiveEntry = collections.namedtuple('ArchiveEntry', 'name, data')
16
+
17
+
18
+ _EXT_COMPRESSION = {
19
+ 'gz': 'gz',
20
+ 'xz': 'xz',
21
+ 'bz2': 'bz2',
22
+ 'bzip2': 'bz2',
23
+ }
24
+
25
+ def parse_specs(url):
26
+ usplit = gfs.splitext(url)
27
+
28
+ compression = _EXT_COMPRESSION.get(usplit.ext)
29
+ ubase = usplit.base if compression else usplit.purl.path
30
+
31
+ base_path, ext = os.path.splitext(ubase)
32
+
33
+ tas.check(ext, msg=f'Unable to infer archive type: {url}')
34
+
35
+ return ArchiveSpecs(kind=usplit.ext.lower(),
36
+ compression=compression,
37
+ base_path=base_path,
38
+ purl=usplit.purl)
39
+
40
+
41
+ class ArchiveStreamer:
42
+
43
+ def __init__(self, url, **kwargs):
44
+ self._url = url
45
+ self._kwargs = kwargs
46
+
47
+ def _url_uid(self, url):
48
+ return hashlib.sha1(url.encode()).hexdigest()[: 8]
49
+
50
+ def _generate_zip(self, specs):
51
+ # The ZIP format requires random access (specifically, the file list is at EOF)
52
+ # so it is better to cache the file locally before opening.
53
+ with gfs.open_local(self._url, mode='rb', **self._kwargs) as stream:
54
+ zfile = zipfile.ZipFile(stream, mode='r')
55
+ for zinfo in zfile.infolist():
56
+ if not zinfo.is_dir():
57
+ data = zfile.read(zinfo)
58
+ yield ArchiveEntry(name=zinfo.filename, data=data)
59
+
60
+ def _generate_tar(self, specs):
61
+ with gfs.open(self._url, mode='rb', **self._kwargs) as stream:
62
+ tfile = tarfile.open(mode=f'r|{specs.compression or ""}', fileobj=stream)
63
+ for tinfo in tfile:
64
+ data = tfile.extractfile(tinfo).read()
65
+ yield ArchiveEntry(name=tinfo.name, data=data)
66
+
67
+ def _generate_parquet(self, specs):
68
+ # Keep the import dependency local, to make it required only if parquet is used.
69
+ from . import parquet_streamer as pqs
70
+
71
+ uid = self._url_uid(self._url)
72
+
73
+ pq_streamer = pqs.ParquetStreamer(self._url, **self._kwargs)
74
+ for i, recd in enumerate(pq_streamer):
75
+ # Simulate a streaming similar to what a Web Dataset would expect, with a
76
+ # UID.ENTITY naming, where the UID is constant for all the entities of a record
77
+ # (which are streamed sequentially).
78
+ ruid = f'{uid}_{i}'
79
+ for name, data in recd.items():
80
+ yield ArchiveEntry(name=f'{ruid}.{name}', data=data)
81
+
82
+ def _generate_msgpack(self, specs):
83
+ # Keep the import dependency local, to make it required only if parquet is used.
84
+ from . import msgpack_streamer as mps
85
+
86
+ uid = self._url_uid(self._url)
87
+
88
+ mps_streamer = mps.MsgPackStreamer(self._url, **self._kwargs)
89
+ for i, recd in enumerate(mps_streamer):
90
+ # Simulate a streaming similar to what a Web Dataset would expect, with a
91
+ # UID.ENTITY naming, where the UID is constant for all the entities of a record
92
+ # (which are streamed sequentially).
93
+ ruid = f'{uid}_{i}'
94
+ for name, data in recd.items():
95
+ yield ArchiveEntry(name=f'{ruid}.{name}', data=data)
96
+
97
+ def generate(self):
98
+ specs = parse_specs(self._url)
99
+ if specs.kind == 'zip':
100
+ yield from self._generate_zip(specs)
101
+ elif specs.kind == 'tar':
102
+ yield from self._generate_tar(specs)
103
+ elif specs.kind == 'parquet':
104
+ yield from self._generate_parquet(specs)
105
+ elif specs.kind == 'msgpack':
106
+ yield from self._generate_msgpack(specs)
107
+ else:
108
+ alog.xraise(RuntimeError, f'Unknown archive type "{specs.kind}": {self._url}')
109
+
110
+ def __iter__(self):
111
+ return self.generate()
112
+