cdxcore 0.1.9__tar.gz → 0.1.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cdxcore might be problematic. Click here for more details.

Files changed (43) hide show
  1. {cdxcore-0.1.9 → cdxcore-0.1.11}/PKG-INFO +1 -1
  2. {cdxcore-0.1.9 → cdxcore-0.1.11}/cdxcore/__init__.py +1 -1
  3. {cdxcore-0.1.9 → cdxcore-0.1.11}/cdxcore/crman.py +4 -1
  4. cdxcore-0.1.11/cdxcore/jcpool.py +643 -0
  5. {cdxcore-0.1.9 → cdxcore-0.1.11}/cdxcore/subdir.py +14 -6
  6. {cdxcore-0.1.9 → cdxcore-0.1.11}/cdxcore/version.py +1 -1
  7. {cdxcore-0.1.9 → cdxcore-0.1.11}/cdxcore.egg-info/PKG-INFO +1 -1
  8. {cdxcore-0.1.9 → cdxcore-0.1.11}/cdxcore.egg-info/SOURCES.txt +2 -2
  9. {cdxcore-0.1.9 → cdxcore-0.1.11}/cdxcore.egg-info/top_level.txt +0 -1
  10. {cdxcore-0.1.9 → cdxcore-0.1.11}/pyproject.toml +1 -1
  11. {cdxcore-0.1.9 → cdxcore-0.1.11}/tests/test_config.py +1 -3
  12. {cdxcore-0.1.9 → cdxcore-0.1.11}/tests/test_crman.py +2 -4
  13. {cdxcore-0.1.9 → cdxcore-0.1.11}/tests/test_err.py +1 -1
  14. cdxcore-0.1.11/tests/test_jcpool.py +119 -0
  15. {cdxcore-0.1.9 → cdxcore-0.1.11}/tests/test_pretty.py +1 -1
  16. {cdxcore-0.1.9 → cdxcore-0.1.11}/tests/test_subdir.py +1 -1
  17. {cdxcore-0.1.9 → cdxcore-0.1.11}/tests/test_uniquehash.py +1 -1
  18. {cdxcore-0.1.9 → cdxcore-0.1.11}/tests/test_util.py +1 -1
  19. {cdxcore-0.1.9 → cdxcore-0.1.11}/tests/test_verbose.py +1 -1
  20. {cdxcore-0.1.9 → cdxcore-0.1.11}/tests/test_version.py +1 -1
  21. cdxcore-0.1.9/docs2/source/conf.py +0 -35
  22. cdxcore-0.1.9/tmp/jcpool.py +0 -411
  23. {cdxcore-0.1.9 → cdxcore-0.1.11}/LICENSE +0 -0
  24. {cdxcore-0.1.9 → cdxcore-0.1.11}/README.md +0 -0
  25. {cdxcore-0.1.9 → cdxcore-0.1.11}/cdxcore/config.py +0 -0
  26. {cdxcore-0.1.9 → cdxcore-0.1.11}/cdxcore/err.py +0 -0
  27. {cdxcore-0.1.9 → cdxcore-0.1.11}/cdxcore/pretty.py +0 -0
  28. {cdxcore-0.1.9 → cdxcore-0.1.11}/cdxcore/pretty.py_bak.py +0 -0
  29. {cdxcore-0.1.9 → cdxcore-0.1.11}/cdxcore/uniquehash.py +0 -0
  30. {cdxcore-0.1.9 → cdxcore-0.1.11}/cdxcore/util.py +0 -0
  31. {cdxcore-0.1.9 → cdxcore-0.1.11}/cdxcore/verbose.py +0 -0
  32. {cdxcore-0.1.9 → cdxcore-0.1.11}/cdxcore.egg-info/dependency_links.txt +0 -0
  33. {cdxcore-0.1.9 → cdxcore-0.1.11}/cdxcore.egg-info/requires.txt +0 -0
  34. {cdxcore-0.1.9 → cdxcore-0.1.11}/docs/source/conf.py +0 -0
  35. {cdxcore-0.1.9 → cdxcore-0.1.11}/setup.cfg +0 -0
  36. {cdxcore-0.1.9 → cdxcore-0.1.11}/tmp/deferred.py +0 -0
  37. {cdxcore-0.1.9 → cdxcore-0.1.11}/tmp/dynaplot.py +0 -0
  38. {cdxcore-0.1.9 → cdxcore-0.1.11}/tmp/filelock.py +0 -0
  39. {cdxcore-0.1.9 → cdxcore-0.1.11}/tmp/np.py +0 -0
  40. {cdxcore-0.1.9 → cdxcore-0.1.11}/tmp/npio.py +0 -0
  41. {cdxcore-0.1.9 → cdxcore-0.1.11}/tmp/sharedarray.py +0 -0
  42. {cdxcore-0.1.9 → cdxcore-0.1.11}/up/git_message.py +0 -0
  43. {cdxcore-0.1.9 → cdxcore-0.1.11}/up/pip_modify_setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cdxcore
3
- Version: 0.1.9
3
+ Version: 0.1.11
4
4
  Summary: Basic Python Tools; upgraded cdxbasics
5
5
  Author-email: Hans Buehler <github@buehler.london>
6
6
  License-Expression: MIT
@@ -4,4 +4,4 @@ Created on June 2022
4
4
  @author: hansb
5
5
  """
6
6
 
7
- __version__ = "0.1.9" # auto-updated by setup.py
7
+ __version__ = "0.1.11" # auto-updated by setup.py
@@ -147,7 +147,10 @@ class CRMan(object):
147
147
  """
148
148
  return self._current
149
149
 
150
- def write(self, text : str, end : str = '', flush : bool = True, channel : Callable = None ):
150
+ def write(self, text : str,
151
+ end : str = '',
152
+ flush : bool = True,
153
+ channel : Callable = None ):
151
154
  r"""
152
155
  Write to a ``channel``,
153
156
 
@@ -0,0 +1,643 @@
1
+ """
2
+ Overview
3
+ --------
4
+
5
+ Simple multi-processing conv wrapper around (already great)
6
+ `joblib.Parallel() <https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html>`__.
7
+
8
+ The minor additions are that parallel processing will be a tad more convenient for dictionaries,
9
+ and that it supports routing :class:`cdxcore.verbose.Context` messaging via a
10
+ :class:`multiprocessing.Queue` to a single thread.
11
+
12
+ Import
13
+ ------
14
+ .. code-block:: python
15
+
16
+ from cdxcore.jcpool import JCPool
17
+
18
+ Documentation
19
+ -------------
20
+ """
21
+
22
+ from joblib import Parallel as joblib_Parallel, delayed as _jl_delayed, cpu_count
23
+ from multiprocessing import Manager, Queue
24
+ from threading import Thread, get_ident as get_thread_id
25
+ import gc as gc
26
+ from collections import OrderedDict
27
+ from collections.abc import Mapping, Callable, Sequence, Iterable
28
+ import functools as functools
29
+ import uuid as uuid
30
+ import os as os
31
+ import datetime as datetime
32
+
33
+ from .verbose import Context, Timer
34
+ from .subdir import SubDir
35
+ from .uniquehash import unique_hash8
36
+
37
+ class ParallelContextChannel( Context ):
38
+ """
39
+ Lightweight :class:`cdxcore.verbose.Context` ``channel`` which is pickle'able.
40
+
41
+ This channel sends messages it receives to a :class:`multiprocessing.Queue`.
42
+ """
43
+ def __init__(self, *, cid, maintid, queue, f_verbose):
44
+ self._queue = queue
45
+ self._cid = cid
46
+ self._maintid = maintid
47
+ self._f_verbose = f_verbose
48
+ def __call__(self, msg : str, flush : bool ):
49
+ """
50
+ Sends ``msg`` via a :class:`multiprocessing.Queue` to the main thread for
51
+ printing.
52
+ """
53
+ if get_thread_id() == self._maintid:
54
+ self._f_verbose.write(msg, end='', flush=flush)
55
+ else:
56
+ return self._queue.put( (msg, flush) )
57
+
58
+ class _ParallelContextOperator( object ):
59
+ """
60
+ Queue-based channel backbone for _ParallelContextChannel
61
+ This object cannot be pickled; use self.mp_context as object to pass to other processes.
62
+ """
63
+ def __init__(self, pool_verbose : Context, # context to print Pool progress to (in thread)
64
+ f_verbose : Context, # original function context (in thread)
65
+ verbose_interval : float = None # throttling for reporting
66
+ ):
67
+ cid = id(f_verbose)
68
+ tid = get_thread_id()
69
+ with pool_verbose.write_t(f"Launching messaging queue '{cid}' using thread '{tid}'... ", end='') as tme:
70
+ self._cid = cid
71
+ self._tid = tid
72
+ self._pool_verbose = pool_verbose
73
+ self._mgr = Manager()
74
+ self._queue = self._mgr.Queue()
75
+ self._thread = Thread(target=self.report, kwargs=dict(cid=cid, queue=self._queue, f_verbose=f_verbose, verbose_interval=verbose_interval), daemon=True)
76
+ self._mp_context = Context( f_verbose,
77
+ channel=ParallelContextChannel(
78
+ cid=self._cid,
79
+ queue=self._queue,
80
+ maintid=self._tid,
81
+ f_verbose=f_verbose
82
+ ) )
83
+ self._thread.start()
84
+ pool_verbose.write(f"done; this took {tme}.", head=False)
85
+
86
+ def __del__(self):
87
+ """ clean up; should not be necessary """
88
+ self.terminate()
89
+
90
+ def terminate(self):
91
+ """ stop all multi-thread/processing activity """
92
+ if self._queue is None:
93
+ return
94
+ tme = Timer()
95
+ self._queue.put( None )
96
+ self._thread.join(timeout=2)
97
+ if self._thread.is_alive():
98
+ raise RuntimeError("Failed to terminate thread")
99
+ self._thread = None
100
+ self._queue = None
101
+ self._mgr = None
102
+ gc.collect()
103
+ self._pool_verbose.write(f"Terminated message queue '{self.cid}'. This took {tme}.")
104
+
105
+ @property
106
+ def cid(self) -> str:
107
+ """ context ID. Useful for debugging """
108
+ return self._cid
109
+
110
+ @property
111
+ def mp_context(self):
112
+ """ Return the actual channel as a pickleable object """
113
+ return self._mp_context
114
+
115
+ @staticmethod
116
+ def report( cid : str, queue : Queue, f_verbose : Context, verbose_interval : float ):
117
+ """ Thread program to keep reporting messages until None is received """
118
+ tme = f_verbose.timer()
119
+ while True:
120
+ r = queue.get()
121
+ if r is None:
122
+ break
123
+ if isinstance(r, Exception):
124
+ print(f"*** Messaging queue {cid} encountered an exception: {r}. Aborting.")
125
+ raise r
126
+ msg, flush = r
127
+ if tme.interval_test(verbose_interval):
128
+ f_verbose.write(msg, end='', flush=flush)
129
+
130
+ def __enter__(self):
131
+ return self.mp_context
132
+
133
+ def __exit__(self, *kargs, **kwargs):
134
+ #self.terminate()
135
+ return False#raise exceptions
136
+
137
+ class _DIF(object):
138
+ """ _DictIterator 'F' """
139
+ def __init__(self, k : str, f : Callable, merge_tuple : bool ):
140
+ self._f = f
141
+ self._k = k
142
+ self._merge_tuple = merge_tuple
143
+ def __call__(self, *args, **kwargs):
144
+ r = self._f(*args, **kwargs)
145
+ if not self._merge_tuple or not isinstance(r, tuple):
146
+ return (self._k, r)
147
+ return ((self._k,) + r)
148
+
149
+ class _DictIterator(object):
150
+ """ Dictionary iterator """
151
+ def __init__(self, jobs : Mapping, merge_tuple : bool):
152
+ self._jobs = jobs
153
+ self._merge_tuple = merge_tuple
154
+ def __iter__(self):
155
+ for k, v in self._jobs.items():
156
+ f, args, kwargs = v
157
+ yield _DIF(k,f, self._merge_tuple), args, kwargs
158
+ def __len__(self):#don't really need that but good to have
159
+ return len(self._jobs)
160
+
161
+ def _parallel(pool, jobs : Iterable) -> Iterable:
162
+ """
163
+ Process 'jobs' in parallel using the current multiprocessing pool.
164
+ All (function) values of 'jobs' must be generated using self.delayed.
165
+ See help(JCPool) for usage patterns.
166
+
167
+ Parameters
168
+ ----------
169
+ jobs:
170
+ can be a sequence, a generator, or a dictionary.
171
+ Each function value must have been generated using JCPool.delayed()
172
+
173
+ Returns
174
+ -------
175
+ An iterator which yields results as soon as they are available.
176
+ If 'jobs' is a dictionary, then the resutling iterator will generate tuples with the first
177
+ element equal to the dictionary key of the respective function job.
178
+ """
179
+ if not isinstance(jobs, Mapping):
180
+ return pool( jobs )
181
+ return pool( _DictIterator(jobs,merge_tuple=True) )
182
+
183
+ def _parallel_to_dict(pool, jobs : Mapping) -> Mapping:
184
+ """
185
+ Process 'jobs' in parallel using the current multiprocessing pool.
186
+ All values of the dictionary 'jobs' must be generated using self.delayed.
187
+ This function awaits the calculation of all elements of 'jobs' and
188
+ returns a dictionary with the results.
189
+
190
+ See help(JCPool) for usage patterns.
191
+
192
+ Parameters
193
+ ----------
194
+ jobs:
195
+ A dictionary where all (function) values must have been generated using JCPool.delayed.
196
+
197
+ Returns
198
+ -------
199
+ A dictionary with results.
200
+ If 'jobs' is an OrderedDict, then this function will return an OrderedDict
201
+ with the same order as 'jobs'.
202
+ """
203
+ assert isinstance(jobs, Mapping), ("'jobs' must be a Mapping.", type(jobs))
204
+ r = dict( pool( _DictIterator(jobs,merge_tuple=False) ) )
205
+ if isinstance( jobs, OrderedDict ):
206
+ q = OrderedDict()
207
+ for k in jobs:
208
+ q[k] = r[k]
209
+ r = q
210
+ return r
211
+
212
+ def _parallel_to_list(pool, jobs : Sequence ) -> Sequence:
213
+ """
214
+ Call parallel() and convert the resulting generator into a list.
215
+
216
+ Parameters
217
+ ----------
218
+ jobs:
219
+ can be a sequence, a generator, or a dictionary.
220
+ Each function value must have been generated using JCPool.delayed()
221
+
222
+ Returns
223
+ -------
224
+ An list with the results in order of the input.
225
+ """
226
+ assert not isinstance( jobs, Mapping ), ("'jobs' is a Mapping. Use parallel_to_dict() instead.", type(jobs))
227
+ lst = { i: j for i, j in enumerate(jobs) }
228
+ r = _parallel_to_dict( pool, lst )
229
+ return list( r[i] for i in lst )
230
+
231
+ class JCPool( object ):
232
+ r"""
233
+ Parallel Job Context Pool.
234
+
235
+ Simple wrapper around `joblib.Parallel() <https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html>`__
236
+ which allows worker processes to use :class:`cdxcore.verbose.Context` to report
237
+ progress updates. For this purpose, :class:`cdxcore.verbose.Context`
238
+ will send output messages via a :class:`multiprocessing.Queue`
239
+ to the main process
240
+ where a sepeate thread prints these messages out.
241
+
242
+ Using a fixed central pool object in your code base
243
+ avoids relaunching processes.
244
+
245
+ Functions passed to :meth:`cdxcore.jcpool.JCPool.parallel` and related functions must
246
+ be decorated with :dec:`cdxcore.jcpool.JCPool.delayed`.
247
+
248
+ **List/Generator Usage**
249
+
250
+ The following code is a standard prototype for using :func:`cdxcore.jcpool.JCPool.parallel`
251
+ following closely the `joblib paradigm <https://joblib.readthedocs.io/en/latest/parallel.html>`__:
252
+
253
+ .. code-block:: python
254
+
255
+ from cdxcore.verbose import Context
256
+ from cdxcore.jcpool import JCPool
257
+ import time as time
258
+ import numpy as np
259
+
260
+ pool = JCPool( num_workers=4 ) # global pool. Reuse where possible
261
+
262
+ def f( ticker, tdata, verbose : Context ):
263
+ # some made up function
264
+ q = np.quantile( tdata, 0.35, axis=0 )
265
+ tx = q[0]
266
+ ty = q[1]
267
+ time.sleep(0.5)
268
+ verbose.write(f"Result for {ticker}: {tx:.2f}, {ty:.2f}")
269
+ return tx, ty
270
+
271
+ tickerdata =\
272
+ { 'SPY': np.random.normal(size=(1000,2)),
273
+ 'GLD': np.random.normal(size=(1000,2)),
274
+ 'BTC': np.random.normal(size=(1000,2))
275
+ }
276
+
277
+ verbose = Context("all")
278
+ with verbose.write_t("Launching analysis") as tme:
279
+ with pool.context( verbose ) as verbose:
280
+ for tx, ty in pool.parallel(
281
+ pool.delayed(f)( ticker=ticker, tdata=tdata, verbose=verbose(2) )
282
+ for ticker, tdata in tickerdata.items() ):
283
+ verbose.report(1,f"Returned {tx:.2f}, {ty:.2f}")
284
+ verbose.write(f"Analysis done; this took {tme}.")
285
+
286
+ The output from this code is asynchronous:
287
+
288
+ .. code-block:: python
289
+
290
+ 00: Launching analysis
291
+ 02: Result for SPY: -0.43, -0.39
292
+ 01: Returned -0.43, -0.39
293
+ 02: Result for BTC: -0.39, -0.45
294
+ 01: Returned -0.39, -0.45
295
+ 02: Result for GLD: -0.41, -0.43
296
+ 01: Returned -0.41, -0.43
297
+ 00: Analysis done; this took 0.73s.
298
+
299
+ **Dict**
300
+
301
+ Considering the asynchronous nature of the returned data it is often desirable
302
+ to keep track of results by some identifier. In above example ``ticker``
303
+ was not available in the main loop.
304
+ This pattern is automated with the dictionary usage pattern:
305
+
306
+ .. code-block:: python
307
+ :emphasize-lines: 26,27,28,29
308
+
309
+ from cdxcore.verbose import Context
310
+ from cdxcore.jcpool import JCPool
311
+ import time as time
312
+ import numpy as np
313
+
314
+ pool = JCPool( num_workers=4 ) # global pool. Reuse where possible
315
+
316
+ def f( ticker, tdata, verbose : Context ):
317
+ # some made up function
318
+ q = np.quantile( tdata, 0.35, axis=0 )
319
+ tx = q[0]
320
+ ty = q[1]
321
+ time.sleep(0.5)
322
+ verbose.write(f"Result for {ticker}: {tx:.2f}, {ty:.2f}")
323
+ return tx, ty
324
+
325
+ tickerdata =\
326
+ { 'SPY': np.random.normal(size=(1000,2)),
327
+ 'GLD': np.random.normal(size=(1000,2)),
328
+ 'BTC': np.random.normal(size=(1000,2))
329
+ }
330
+
331
+ verbose = Context("all")
332
+ with verbose.write_t("Launching analysis") as tme:
333
+ with pool.context( verbose ) as verbose:
334
+ for ticker, tx, ty in pool.parallel(
335
+ { ticker: pool.delayed(f)( ticker=ticker, tdata=tdata, verbose=verbose(2) )
336
+ for ticker, tdata in tickerdata.items() } ):
337
+ verbose.report(1,f"Returned {ticker} {tx:.2f}, {ty:.2f}")
338
+ verbose.write(f"Analysis done; this took {tme}.")
339
+
340
+ This generates the following output::
341
+
342
+ 00: Launching analysis
343
+ 02: Result for SPY: -0.34, -0.41
344
+ 01: Returned SPY -0.34, -0.41
345
+ 02: Result for GLD: -0.38, -0.41
346
+ 01: Returned GLD -0.38, -0.41
347
+ 02: Result for BTC: -0.34, -0.32
348
+ 01: Returned BTC -0.34, -0.32
349
+ 00: Analysis done; this took 5s.
350
+
351
+ Note that :func:`cdxcore.jcpool.JCPool.parallel` when applied to a dictionary does not return a dictionary,
352
+ but a sequence of tuples.
353
+ As in the example this also works if the function being called returns tuples itself; in this case the returned data
354
+ is extended by the key of the dictionary provided.
355
+
356
+ In order to retrieve a dictionary use :func:`cdxcore.jcpool.JCPool.parallel_to_dict`::
357
+
358
+ verbose = Context("all")
359
+ with pool.context( verbose ) as verbose:
360
+ r = pool.parallel_to_dict( { ticker: pool.delayed(f)( ticker=ticker, tdata=tdata, verbose=verbose )
361
+ for ticker, tdata in self.data.items() } )
362
+
363
+ Note that in this case the function returns only after all jobs have been processed.
364
+
365
+ Parameters
366
+ ----------
367
+ num_workers : int, optional
368
+
369
+ The number of workers. If ``num_workers`` is ``1`` then no parallel process or thread is started.
370
+ Just as for `joblib <https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html>`__ you can
371
+ use a negative ``num_workers`` to set the number of workers to the ``number of CPUs + num_workers + 1``.
372
+ For example, a ``num_workers`` of ``-2`` will use as many jobs as CPUs are present less one.
373
+ If ``num_workers`` is negative, the effective number of workers will be at least ``1``.
374
+
375
+ Default is ``1``.
376
+
377
+ threading : bool, optional
378
+
379
+ If ``False``, the default, then the pool will act as a ``"loky"`` multi-process pool with the associated overhead
380
+ of managing data accross processes.
381
+
382
+ If ``True``, then the pool is a ``"threading"`` pool. This helps for functions whose code releases
383
+ Python's `global interpreter lock <https://wiki.python.org/moin/GlobalInterpreterLock>`__, for example
384
+ when engaged in heavy I/O or compiled code such as :mod:`numpy`., :mod:`pandas`,
385
+ or generated with `numba <https://numba.pydata.org/>`__.
386
+
387
+ tmp_root_dir : str | SubDir, optional
388
+
389
+ Temporary directory for memory mapping large arrays. This is a root directory; the function
390
+ will create a temporary sub-directory with a name generated from the current state of the system.
391
+ This sub-directory will be deleted upon destruction of ``JCPool`` or when :meth:`cdxcore.jcpool.JCPool.terminate`
392
+ is called.
393
+
394
+ This parameter can also be ``None`` in which case the `default behaviour <https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html>`__
395
+ of :class:`joblib.Parallel` is used.
396
+
397
+ Default is ``"!/.cdxmp"``.
398
+
399
+ verbose : Context, optional
400
+
401
+ A :class:`cdxcore.verbose.Context` object used to print out multi-processing/threading information.
402
+ This is *not* the ``Context`` provided to child processes/threads.
403
+
404
+ Default is ``quiet``.
405
+
406
+ parallel_kwargs : dict, optional
407
+
408
+ Additional keywords for :class:`joblib.Parallel`.
409
+
410
+ """
411
+ def __init__(self, num_workers : int = 1,
412
+ threading : bool = False,
413
+ tmp_root_dir : str|SubDir= "!/.cdxmp", *,
414
+ verbose : Context = Context.quiet,
415
+ parallel_kwargs : dict = {} ):
416
+ """
417
+ Initialize a multi-processing pool. Thin wrapper aroud joblib.parallel for cdxcore.verbose.Context() output
418
+ """
419
+ tmp_dir_ext = unique_hash8( uuid.getnode(), os.getpid(), get_thread_id(), datetime.datetime.now() )
420
+ num_workers = int(num_workers)
421
+ tmp_root_dir = SubDir(tmp_root_dir) if not tmp_root_dir is None else None
422
+ self._tmp_dir = tmp_root_dir(tmp_dir_ext, ext='') if not tmp_root_dir is None else None
423
+ self._verbose = verbose if not verbose is None else Context("quiet")
424
+ self._threading = threading
425
+
426
+ if num_workers < 0:
427
+ num_workers = max( self.cpu_count() + num_workers + 1, 1 )
428
+
429
+ path_info = f" with temporary directory '{self.tmp_path}'" if not self.tmp_path is None else ''
430
+ with self._verbose.write_t(f"Launching {num_workers} processes{path_info}... ", end='') as tme:
431
+ self._pool = joblib_Parallel( n_jobs=num_workers,
432
+ backend="loky" if not threading else "threading",
433
+ return_as="generator_unordered",
434
+ temp_folder=self.tmp_path,
435
+ **parallel_kwargs)
436
+ self._verbose.write(f"done; this took {tme}.", head=False)
437
+
438
+ def __del__(self):
439
+ self.terminate()
440
+
441
+ @property
442
+ def tmp_path(self) -> str|None:
443
+ """ Path to the temporary directory for this object. """
444
+ return self._tmp_dir.path if not self._tmp_dir is None else None
445
+ @property
446
+ def is_threading(self) -> bool:
447
+ """ Whether we are threading or mulit-processing. """
448
+ return self._threading
449
+
450
+ @staticmethod
451
+ def cpu_count( only_physical_cores : bool = False ) -> int:
452
+ """
453
+ Return the number of physical CPUs.
454
+
455
+ Parameters
456
+ ----------
457
+ only_physical_cores : boolean, optional
458
+
459
+ If ``True``, does not take hyperthreading / SMT logical cores into account.
460
+ Default is ``False``.
461
+
462
+ Returns
463
+ -------
464
+ cpus : int
465
+ Count
466
+ """
467
+ return cpu_count(only_physical_cores=only_physical_cores)
468
+
469
+ def terminate(self):
470
+ """
471
+ Stop the current parallel pool, and delete any temporary files (if managed by ``JCPool``).
472
+ """
473
+ if not self._pool is None:
474
+ tme = Timer()
475
+ del self._pool
476
+ self._pool = None
477
+ self._verbose.write(f"Shut down parallel pool. This took {tme}.")
478
+ gc.collect()
479
+ if not self._tmp_dir is None:
480
+ dir_name = self._tmp_dir.path
481
+ self._tmp_dir.delete_everything(keep_directory=False)
482
+ self._verbose.write(f"Deleted temporary directoru {dir_name}.")
483
+
484
+ def context( self, verbose : Context, verbose_interval : float = None ):
485
+ """
486
+ Parallel processing ``Context`` object.
487
+
488
+ This function returns a :class:`cdxcore.verbose.Context` object whose ``channel`` is a queue towards a utility thread
489
+ which will outout all messages to ``verbose``.
490
+ As a result a worker process is able to use ``verbose`` as if it were in-process
491
+
492
+ A standard usage pattern is:
493
+
494
+ .. code-block:: python
495
+ :emphasize-lines: 13, 14
496
+
497
+ from cdxcore.verbose import Context
498
+ from cdxcore.jcpool import JCPool
499
+ import time as time
500
+ import numpy as np
501
+
502
+ pool = JCPool( num_workers=4 ) # global pool. Reuse where possible
503
+
504
+ def f( x, verbose : Context ):
505
+ verbose.write(f"Found {x}") # <- text "Found 1" etc will be sent
506
+ return x # to main thread via Queue
507
+
508
+ verbose = Context("all")
509
+ with pool.context( verbose ) as verbose:
510
+ for x in pool.parallel( pool.delayed(f)( x=x, verbose=verbose(1) ) for x in [1,2,3,4] ):
511
+ verbose.write(f"Returned {x}")
512
+
513
+ See :class:`cdxcore.jcpool.JCPool` for more usage patterns.
514
+ """
515
+ if self._threading:
516
+ return verbose
517
+ return _ParallelContextOperator( pool_verbose=self._verbose,
518
+ f_verbose=verbose,
519
+ verbose_interval=verbose_interval )
520
+
521
+ @staticmethod
522
+ def _validate( F : Callable, args : list, kwargs : Mapping ):
523
+ """ Check that ``args`` and ``kwargs`` do not contain ``Context`` objects without channel """
524
+ for k, v in enumerate(args):
525
+ if isinstance(v, Context) and not isinstance(v.channel, ParallelContextChannel):
526
+ raise RuntimeError(f"Argument #{k} for {F.__qualname__} is a Context object, but its channel is not set to 'ParallelContextChannel'. Use JPool.context().")
527
+ for k, v in kwargs.items():
528
+ if isinstance(v, Context) and not isinstance(v.channel, ParallelContextChannel):
529
+ raise RuntimeError(f"Keyword argument '{k}' for {F.__qualname__} is a Context object, but its channel is not set to 'ParallelContextChannel'. Use JPool.context().")
530
+
531
+ def delayed(self, F : Callable):
532
+ """
533
+ Decorate a function for parallel execution.
534
+
535
+ This decorate adds minor synthatical sugar on top of :func:`joblib.delayed`
536
+ (which in turn is discussed `here <https://joblib.readthedocs.io/en/latest/parallel.html#parallel>`__).
537
+
538
+ When called, this decorator checks that no :class:`cdxcore.verbose.Context`
539
+ arguments are passed to the pooled function which have no ``ParallelContextChannel`` present. In other words,
540
+ the function detects if the user forgot to use :meth:`cdxcore.jcpool.JCPool.context`.
541
+
542
+ Parameters
543
+ ----------
544
+ F : Callable
545
+ Function.
546
+
547
+ Returns
548
+ -------
549
+ wrapped F : Callable
550
+ Decorated function.
551
+ """
552
+ if self._threading:
553
+ return _jl_delayed(F)
554
+ def delayed_function( *args, **kwargs ):
555
+ JCPool._validate( F, args, kwargs )
556
+ return F, args, kwargs # mimic joblin.delayed()
557
+ try:
558
+ delayed_function = functools.wraps(F)(delayed_function)
559
+ except AttributeError:
560
+ " functools.wraps fails on some callable objects "
561
+ return delayed_function
562
+
563
+ def parallel(self, jobs : Sequence|Mapping) -> Iterable:
564
+ """
565
+ Process a number of jobs in parallel using the current multiprocessing pool.
566
+
567
+ All functions used in ``jobs`` must have been decorated using :dec:`cdxcore.jcpool.JCPool.delayed`.
568
+
569
+ This function returns an iterator which yields results as soon as they
570
+ are computed.
571
+
572
+ If ``jobs`` is a ``Sequence`` you can also use
573
+ :meth:`cdxcore.jcpool.JCPool.parallel_to_list` to retrieve
574
+ a :class:`list` of all results upon completion of the last job. Similarly, if ``jobs``
575
+ is a ``Mapping``, use :meth:`cdxcore.jcpool.JCPool.parallel_to_dict` to retrieve
576
+ a :class:`dict` of results upon completion of the last job.
577
+
578
+ Parameters
579
+ ----------
580
+ jobs : Sequence | Mapping
581
+ Can be a :class:`Sequence` containing ``Callable`` functions,
582
+ or a :class:`Mapping` whose values are ``Callable`` functions.
583
+
584
+ Each ``Callable`` used as part of either must
585
+ have been decorated with :dec:`cdxcore.jcpool.JCPool.delayed`.
586
+
587
+ Returns
588
+ -------
589
+ parallel : Iterator
590
+ An iterator which yields results as soon as they are available.
591
+ If ``jobs`` is a :class:`Mapping`, then the resutling iterator will generate tuples with the first
592
+ element equal to the mapping key of the respective function job. This function will *not*
593
+ return a dictionary.
594
+ """
595
+ return _parallel( self._pool, jobs )
596
+
597
+ def parallel_to_dict(self, jobs : Mapping) -> dict:
598
+ """
599
+ Process a number of jobs in parallel using the current multiprocessing pool,
600
+ and return all results in a dictionary upon completion.
601
+
602
+ This function awaits the calculation of all elements of ``jobs`` and
603
+ returns a :class:`dict` with the results.
604
+
605
+ Parameters
606
+ ----------
607
+ jobs : Mapping
608
+ A dictionary where all (function) values must have been decorated
609
+ with :dec:`cdxcore.jcpool.JCPool.delayed`.
610
+
611
+ Returns
612
+ -------
613
+ Results : dict
614
+ A dictionary with results.
615
+
616
+ If ``jobs`` is an :class:`OrderedDict`, then this function will return an :class:`OrderedDict`
617
+ with the same order as ``jobs``. Otherwise the elements of the ``dict`` returned
618
+ by this function are in completion order.
619
+ """
620
+ return _parallel_to_dict( self._pool, jobs )
621
+
622
+ def parallel_to_list(self, jobs : Sequence ) -> Sequence:
623
+ """
624
+ Process a number of jobs in parallel using the current multiprocessing pool,
625
+ and return all results in a list upon completion.
626
+
627
+ This function awaits the calculation of all elements of ``jobs`` and
628
+ returns a :class:`list` with the results.
629
+
630
+ Parameters
631
+ ----------
632
+ jobs : Sequence
633
+ An sequence of ``Callable`` functions, each of which
634
+ must have been decorated
635
+ with :dec:`cdxcore.jcpool.JCPool.delayed`.
636
+
637
+ Returns
638
+ -------
639
+ Results : list
640
+ A list with results, in the order of ``jobs``.
641
+ """
642
+ return _parallel_to_list( self._pool, jobs )
643
+