cdxcore 0.1.10__py3-none-any.whl → 0.1.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of cdxcore might be problematic. Click here for more details.
- cdxcore/__init__.py +1 -1
- cdxcore/crman.py +4 -1
- cdxcore/jcpool.py +337 -105
- cdxcore/subdir.py +1 -1
- {cdxcore-0.1.10.dist-info → cdxcore-0.1.11.dist-info}/METADATA +1 -1
- {cdxcore-0.1.10.dist-info → cdxcore-0.1.11.dist-info}/RECORD +11 -10
- tests/test_crman.py +1 -3
- tests/test_jcpool.py +119 -0
- {cdxcore-0.1.10.dist-info → cdxcore-0.1.11.dist-info}/WHEEL +0 -0
- {cdxcore-0.1.10.dist-info → cdxcore-0.1.11.dist-info}/licenses/LICENSE +0 -0
- {cdxcore-0.1.10.dist-info → cdxcore-0.1.11.dist-info}/top_level.txt +0 -0
cdxcore/__init__.py
CHANGED
cdxcore/crman.py
CHANGED
|
@@ -147,7 +147,10 @@ class CRMan(object):
|
|
|
147
147
|
"""
|
|
148
148
|
return self._current
|
|
149
149
|
|
|
150
|
-
def write(self, text : str,
|
|
150
|
+
def write(self, text : str,
|
|
151
|
+
end : str = '',
|
|
152
|
+
flush : bool = True,
|
|
153
|
+
channel : Callable = None ):
|
|
151
154
|
r"""
|
|
152
155
|
Write to a ``channel``,
|
|
153
156
|
|
cdxcore/jcpool.py
CHANGED
|
@@ -1,34 +1,57 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
1
|
"""
|
|
3
|
-
|
|
2
|
+
Overview
|
|
3
|
+
--------
|
|
4
|
+
|
|
5
|
+
Simple multi-processing conv wrapper around (already great)
|
|
6
|
+
`joblib.Parallel() <https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html>`__.
|
|
7
|
+
|
|
4
8
|
The minor additions are that parallel processing will be a tad more convenient for dictionaries,
|
|
5
|
-
and that it supports routing
|
|
9
|
+
and that it supports routing :class:`cdxcore.verbose.Context` messaging via a
|
|
10
|
+
:class:`multiprocessing.Queue` to a single thread.
|
|
11
|
+
|
|
12
|
+
Import
|
|
13
|
+
------
|
|
14
|
+
.. code-block:: python
|
|
15
|
+
|
|
16
|
+
from cdxcore.jcpool import JCPool
|
|
17
|
+
|
|
18
|
+
Documentation
|
|
19
|
+
-------------
|
|
6
20
|
"""
|
|
7
21
|
|
|
8
|
-
from joblib import Parallel as joblib_Parallel, delayed as
|
|
22
|
+
from joblib import Parallel as joblib_Parallel, delayed as _jl_delayed, cpu_count
|
|
9
23
|
from multiprocessing import Manager, Queue
|
|
10
24
|
from threading import Thread, get_ident as get_thread_id
|
|
11
25
|
import gc as gc
|
|
12
26
|
from collections import OrderedDict
|
|
13
27
|
from collections.abc import Mapping, Callable, Sequence, Iterable
|
|
14
28
|
import functools as functools
|
|
29
|
+
import uuid as uuid
|
|
30
|
+
import os as os
|
|
31
|
+
import datetime as datetime
|
|
15
32
|
|
|
16
33
|
from .verbose import Context, Timer
|
|
17
34
|
from .subdir import SubDir
|
|
35
|
+
from .uniquehash import unique_hash8
|
|
18
36
|
|
|
19
37
|
class ParallelContextChannel( Context ):
|
|
20
38
|
"""
|
|
21
|
-
Lightweight
|
|
22
|
-
|
|
39
|
+
Lightweight :class:`cdxcore.verbose.Context` ``channel`` which is pickle'able.
|
|
40
|
+
|
|
41
|
+
This channel sends messages it receives to a :class:`multiprocessing.Queue`.
|
|
23
42
|
"""
|
|
24
|
-
def __init__(self, *, cid, maintid, queue):
|
|
43
|
+
def __init__(self, *, cid, maintid, queue, f_verbose):
|
|
25
44
|
self._queue = queue
|
|
26
45
|
self._cid = cid
|
|
27
46
|
self._maintid = maintid
|
|
47
|
+
self._f_verbose = f_verbose
|
|
28
48
|
def __call__(self, msg : str, flush : bool ):
|
|
29
|
-
"""
|
|
49
|
+
"""
|
|
50
|
+
Sends ``msg`` via a :class:`multiprocessing.Queue` to the main thread for
|
|
51
|
+
printing.
|
|
52
|
+
"""
|
|
30
53
|
if get_thread_id() == self._maintid:
|
|
31
|
-
|
|
54
|
+
self._f_verbose.write(msg, end='', flush=flush)
|
|
32
55
|
else:
|
|
33
56
|
return self._queue.put( (msg, flush) )
|
|
34
57
|
|
|
@@ -51,7 +74,12 @@ class _ParallelContextOperator( object ):
|
|
|
51
74
|
self._queue = self._mgr.Queue()
|
|
52
75
|
self._thread = Thread(target=self.report, kwargs=dict(cid=cid, queue=self._queue, f_verbose=f_verbose, verbose_interval=verbose_interval), daemon=True)
|
|
53
76
|
self._mp_context = Context( f_verbose,
|
|
54
|
-
channel=ParallelContextChannel(
|
|
77
|
+
channel=ParallelContextChannel(
|
|
78
|
+
cid=self._cid,
|
|
79
|
+
queue=self._queue,
|
|
80
|
+
maintid=self._tid,
|
|
81
|
+
f_verbose=f_verbose
|
|
82
|
+
) )
|
|
55
83
|
self._thread.start()
|
|
56
84
|
pool_verbose.write(f"done; this took {tme}.", head=False)
|
|
57
85
|
|
|
@@ -97,7 +125,7 @@ class _ParallelContextOperator( object ):
|
|
|
97
125
|
raise r
|
|
98
126
|
msg, flush = r
|
|
99
127
|
if tme.interval_test(verbose_interval):
|
|
100
|
-
|
|
128
|
+
f_verbose.write(msg, end='', flush=flush)
|
|
101
129
|
|
|
102
130
|
def __enter__(self):
|
|
103
131
|
return self.mp_context
|
|
@@ -196,104 +224,251 @@ def _parallel_to_list(pool, jobs : Sequence ) -> Sequence:
|
|
|
196
224
|
An list with the results in order of the input.
|
|
197
225
|
"""
|
|
198
226
|
assert not isinstance( jobs, Mapping ), ("'jobs' is a Mapping. Use parallel_to_dict() instead.", type(jobs))
|
|
199
|
-
|
|
200
|
-
|
|
227
|
+
lst = { i: j for i, j in enumerate(jobs) }
|
|
228
|
+
r = _parallel_to_dict( pool, lst )
|
|
229
|
+
return list( r[i] for i in lst )
|
|
201
230
|
|
|
202
231
|
class JCPool( object ):
|
|
203
|
-
"""
|
|
204
|
-
Parallel Job Context Pool
|
|
232
|
+
r"""
|
|
233
|
+
Parallel Job Context Pool.
|
|
205
234
|
|
|
206
|
-
Simple wrapper around joblib.Parallel
|
|
207
|
-
|
|
235
|
+
Simple wrapper around `joblib.Parallel() <https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html>`__
|
|
236
|
+
which allows worker processes to use :class:`cdxcore.verbose.Context` to report
|
|
237
|
+
progress updates. For this purpose, :class:`cdxcore.verbose.Context`
|
|
238
|
+
will send output messages via a :class:`multiprocessing.Queue`
|
|
239
|
+
to the main process
|
|
208
240
|
where a sepeate thread prints these messages out.
|
|
209
|
-
|
|
241
|
+
|
|
242
|
+
Using a fixed central pool object in your code base
|
|
243
|
+
avoids relaunching processes.
|
|
210
244
|
|
|
211
|
-
|
|
212
|
-
|
|
245
|
+
Functions passed to :meth:`cdxcore.jcpool.JCPool.parallel` and related functions must
|
|
246
|
+
be decorated with :dec:`cdxcore.jcpool.JCPool.delayed`.
|
|
213
247
|
|
|
214
|
-
Usage
|
|
215
|
-
-----
|
|
216
|
-
Assume we have a function such as:
|
|
248
|
+
**List/Generator Usage**
|
|
217
249
|
|
|
250
|
+
The following code is a standard prototype for using :func:`cdxcore.jcpool.JCPool.parallel`
|
|
251
|
+
following closely the `joblib paradigm <https://joblib.readthedocs.io/en/latest/parallel.html>`__:
|
|
252
|
+
|
|
253
|
+
.. code-block:: python
|
|
254
|
+
|
|
255
|
+
from cdxcore.verbose import Context
|
|
256
|
+
from cdxcore.jcpool import JCPool
|
|
257
|
+
import time as time
|
|
258
|
+
import numpy as np
|
|
259
|
+
|
|
260
|
+
pool = JCPool( num_workers=4 ) # global pool. Reuse where possible
|
|
261
|
+
|
|
218
262
|
def f( ticker, tdata, verbose : Context ):
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
263
|
+
# some made up function
|
|
264
|
+
q = np.quantile( tdata, 0.35, axis=0 )
|
|
265
|
+
tx = q[0]
|
|
266
|
+
ty = q[1]
|
|
267
|
+
time.sleep(0.5)
|
|
268
|
+
verbose.write(f"Result for {ticker}: {tx:.2f}, {ty:.2f}")
|
|
269
|
+
return tx, ty
|
|
270
|
+
|
|
271
|
+
tickerdata =\
|
|
272
|
+
{ 'SPY': np.random.normal(size=(1000,2)),
|
|
273
|
+
'GLD': np.random.normal(size=(1000,2)),
|
|
274
|
+
'BTC': np.random.normal(size=(1000,2))
|
|
275
|
+
}
|
|
276
|
+
|
|
231
277
|
verbose = Context("all")
|
|
232
|
-
with
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
278
|
+
with verbose.write_t("Launching analysis") as tme:
|
|
279
|
+
with pool.context( verbose ) as verbose:
|
|
280
|
+
for tx, ty in pool.parallel(
|
|
281
|
+
pool.delayed(f)( ticker=ticker, tdata=tdata, verbose=verbose(2) )
|
|
282
|
+
for ticker, tdata in tickerdata.items() ):
|
|
283
|
+
verbose.report(1,f"Returned {tx:.2f}, {ty:.2f}")
|
|
284
|
+
verbose.write(f"Analysis done; this took {tme}.")
|
|
236
285
|
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
286
|
+
The output from this code is asynchronous:
|
|
287
|
+
|
|
288
|
+
.. code-block:: python
|
|
289
|
+
|
|
290
|
+
00: Launching analysis
|
|
291
|
+
02: Result for SPY: -0.43, -0.39
|
|
292
|
+
01: Returned -0.43, -0.39
|
|
293
|
+
02: Result for BTC: -0.39, -0.45
|
|
294
|
+
01: Returned -0.39, -0.45
|
|
295
|
+
02: Result for GLD: -0.41, -0.43
|
|
296
|
+
01: Returned -0.41, -0.43
|
|
297
|
+
00: Analysis done; this took 0.73s.
|
|
298
|
+
|
|
299
|
+
**Dict**
|
|
300
|
+
|
|
301
|
+
Considering the asynchronous nature of the returned data it is often desirable
|
|
302
|
+
to keep track of results by some identifier. In above example ``ticker``
|
|
303
|
+
was not available in the main loop.
|
|
304
|
+
This pattern is automated with the dictionary usage pattern:
|
|
241
305
|
|
|
242
|
-
|
|
306
|
+
.. code-block:: python
|
|
307
|
+
:emphasize-lines: 26,27,28,29
|
|
308
|
+
|
|
309
|
+
from cdxcore.verbose import Context
|
|
310
|
+
from cdxcore.jcpool import JCPool
|
|
311
|
+
import time as time
|
|
312
|
+
import numpy as np
|
|
313
|
+
|
|
314
|
+
pool = JCPool( num_workers=4 ) # global pool. Reuse where possible
|
|
315
|
+
|
|
316
|
+
def f( ticker, tdata, verbose : Context ):
|
|
317
|
+
# some made up function
|
|
318
|
+
q = np.quantile( tdata, 0.35, axis=0 )
|
|
319
|
+
tx = q[0]
|
|
320
|
+
ty = q[1]
|
|
321
|
+
time.sleep(0.5)
|
|
322
|
+
verbose.write(f"Result for {ticker}: {tx:.2f}, {ty:.2f}")
|
|
323
|
+
return tx, ty
|
|
324
|
+
|
|
325
|
+
tickerdata =\
|
|
326
|
+
{ 'SPY': np.random.normal(size=(1000,2)),
|
|
327
|
+
'GLD': np.random.normal(size=(1000,2)),
|
|
328
|
+
'BTC': np.random.normal(size=(1000,2))
|
|
329
|
+
}
|
|
330
|
+
|
|
243
331
|
verbose = Context("all")
|
|
244
|
-
with
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
332
|
+
with verbose.write_t("Launching analysis") as tme:
|
|
333
|
+
with pool.context( verbose ) as verbose:
|
|
334
|
+
for ticker, tx, ty in pool.parallel(
|
|
335
|
+
{ ticker: pool.delayed(f)( ticker=ticker, tdata=tdata, verbose=verbose(2) )
|
|
336
|
+
for ticker, tdata in tickerdata.items() } ):
|
|
337
|
+
verbose.report(1,f"Returned {ticker} {tx:.2f}, {ty:.2f}")
|
|
338
|
+
verbose.write(f"Analysis done; this took {tme}.")
|
|
339
|
+
|
|
340
|
+
This generates the following output::
|
|
341
|
+
|
|
342
|
+
00: Launching analysis
|
|
343
|
+
02: Result for SPY: -0.34, -0.41
|
|
344
|
+
01: Returned SPY -0.34, -0.41
|
|
345
|
+
02: Result for GLD: -0.38, -0.41
|
|
346
|
+
01: Returned GLD -0.38, -0.41
|
|
347
|
+
02: Result for BTC: -0.34, -0.32
|
|
348
|
+
01: Returned BTC -0.34, -0.32
|
|
349
|
+
00: Analysis done; this took 5s.
|
|
350
|
+
|
|
351
|
+
Note that :func:`cdxcore.jcpool.JCPool.parallel` when applied to a dictionary does not return a dictionary,
|
|
352
|
+
but a sequence of tuples.
|
|
250
353
|
As in the example this also works if the function being called returns tuples itself; in this case the returned data
|
|
251
354
|
is extended by the key of the dictionary provided.
|
|
252
355
|
|
|
253
|
-
In order to retrieve a dictionary use
|
|
356
|
+
In order to retrieve a dictionary use :func:`cdxcore.jcpool.JCPool.parallel_to_dict`::
|
|
254
357
|
|
|
255
|
-
pool = JPool( num_workers=4 )
|
|
256
358
|
verbose = Context("all")
|
|
257
359
|
with pool.context( verbose ) as verbose:
|
|
258
|
-
r = pool.parallel_to_dict( { ticker: pool.delayed(f)( ticker=ticker, tdata=tdata, verbose=verbose )
|
|
259
|
-
|
|
360
|
+
r = pool.parallel_to_dict( { ticker: pool.delayed(f)( ticker=ticker, tdata=tdata, verbose=verbose )
|
|
361
|
+
for ticker, tdata in self.data.items() } )
|
|
260
362
|
|
|
261
|
-
Note that in this case the function returns after all
|
|
363
|
+
Note that in this case the function returns only after all jobs have been processed.
|
|
364
|
+
|
|
365
|
+
Parameters
|
|
366
|
+
----------
|
|
367
|
+
num_workers : int, optional
|
|
368
|
+
|
|
369
|
+
The number of workers. If ``num_workers`` is ``1`` then no parallel process or thread is started.
|
|
370
|
+
Just as for `joblib <https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html>`__ you can
|
|
371
|
+
use a negative ``num_workers`` to set the number of workers to the ``number of CPUs + num_workers + 1``.
|
|
372
|
+
For example, a ``num_workers`` of ``-2`` will use as many jobs as CPUs are present less one.
|
|
373
|
+
If ``num_workers`` is negative, the effective number of workers will be at least ``1``.
|
|
374
|
+
|
|
375
|
+
Default is ``1``.
|
|
376
|
+
|
|
377
|
+
threading : bool, optional
|
|
378
|
+
|
|
379
|
+
If ``False``, the default, then the pool will act as a ``"loky"`` multi-process pool with the associated overhead
|
|
380
|
+
of managing data accross processes.
|
|
381
|
+
|
|
382
|
+
If ``True``, then the pool is a ``"threading"`` pool. This helps for functions whose code releases
|
|
383
|
+
Python's `global interpreter lock <https://wiki.python.org/moin/GlobalInterpreterLock>`__, for example
|
|
384
|
+
when engaged in heavy I/O or compiled code such as :mod:`numpy`., :mod:`pandas`,
|
|
385
|
+
or generated with `numba <https://numba.pydata.org/>`__.
|
|
386
|
+
|
|
387
|
+
tmp_root_dir : str | SubDir, optional
|
|
388
|
+
|
|
389
|
+
Temporary directory for memory mapping large arrays. This is a root directory; the function
|
|
390
|
+
will create a temporary sub-directory with a name generated from the current state of the system.
|
|
391
|
+
This sub-directory will be deleted upon destruction of ``JCPool`` or when :meth:`cdxcore.jcpool.JCPool.terminate`
|
|
392
|
+
is called.
|
|
393
|
+
|
|
394
|
+
This parameter can also be ``None`` in which case the `default behaviour <https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html>`__
|
|
395
|
+
of :class:`joblib.Parallel` is used.
|
|
396
|
+
|
|
397
|
+
Default is ``"!/.cdxmp"``.
|
|
398
|
+
|
|
399
|
+
verbose : Context, optional
|
|
400
|
+
|
|
401
|
+
A :class:`cdxcore.verbose.Context` object used to print out multi-processing/threading information.
|
|
402
|
+
This is *not* the ``Context`` provided to child processes/threads.
|
|
403
|
+
|
|
404
|
+
Default is ``quiet``.
|
|
405
|
+
|
|
406
|
+
parallel_kwargs : dict, optional
|
|
407
|
+
|
|
408
|
+
Additional keywords for :class:`joblib.Parallel`.
|
|
409
|
+
|
|
262
410
|
"""
|
|
263
411
|
def __init__(self, num_workers : int = 1,
|
|
264
412
|
threading : bool = False,
|
|
265
|
-
|
|
413
|
+
tmp_root_dir : str|SubDir= "!/.cdxmp", *,
|
|
266
414
|
verbose : Context = Context.quiet,
|
|
267
415
|
parallel_kwargs : dict = {} ):
|
|
268
416
|
"""
|
|
269
|
-
Initialize a multi-processing pool. Thin wrapper aroud joblib.parallel for
|
|
417
|
+
Initialize a multi-processing pool. Thin wrapper aroud joblib.parallel for cdxcore.verbose.Context() output
|
|
270
418
|
"""
|
|
419
|
+
tmp_dir_ext = unique_hash8( uuid.getnode(), os.getpid(), get_thread_id(), datetime.datetime.now() )
|
|
271
420
|
num_workers = int(num_workers)
|
|
272
|
-
|
|
421
|
+
tmp_root_dir = SubDir(tmp_root_dir) if not tmp_root_dir is None else None
|
|
422
|
+
self._tmp_dir = tmp_root_dir(tmp_dir_ext, ext='') if not tmp_root_dir is None else None
|
|
273
423
|
self._verbose = verbose if not verbose is None else Context("quiet")
|
|
274
424
|
self._threading = threading
|
|
275
|
-
|
|
425
|
+
|
|
426
|
+
if num_workers < 0:
|
|
427
|
+
num_workers = max( self.cpu_count() + num_workers + 1, 1 )
|
|
276
428
|
|
|
277
|
-
|
|
429
|
+
path_info = f" with temporary directory '{self.tmp_path}'" if not self.tmp_path is None else ''
|
|
430
|
+
with self._verbose.write_t(f"Launching {num_workers} processes{path_info}... ", end='') as tme:
|
|
278
431
|
self._pool = joblib_Parallel( n_jobs=num_workers,
|
|
279
432
|
backend="loky" if not threading else "threading",
|
|
280
433
|
return_as="generator_unordered",
|
|
281
|
-
temp_folder=self.tmp_path,
|
|
434
|
+
temp_folder=self.tmp_path,
|
|
435
|
+
**parallel_kwargs)
|
|
282
436
|
self._verbose.write(f"done; this took {tme}.", head=False)
|
|
283
437
|
|
|
284
438
|
def __del__(self):
|
|
285
439
|
self.terminate()
|
|
286
440
|
|
|
287
441
|
@property
|
|
288
|
-
def tmp_path(self) -> str:
|
|
289
|
-
|
|
442
|
+
def tmp_path(self) -> str|None:
|
|
443
|
+
""" Path to the temporary directory for this object. """
|
|
444
|
+
return self._tmp_dir.path if not self._tmp_dir is None else None
|
|
290
445
|
@property
|
|
291
446
|
def is_threading(self) -> bool:
|
|
447
|
+
""" Whether we are threading or mulit-processing. """
|
|
292
448
|
return self._threading
|
|
449
|
+
|
|
450
|
+
@staticmethod
|
|
451
|
+
def cpu_count( only_physical_cores : bool = False ) -> int:
|
|
452
|
+
"""
|
|
453
|
+
Return the number of physical CPUs.
|
|
454
|
+
|
|
455
|
+
Parameters
|
|
456
|
+
----------
|
|
457
|
+
only_physical_cores : boolean, optional
|
|
458
|
+
|
|
459
|
+
If ``True``, does not take hyperthreading / SMT logical cores into account.
|
|
460
|
+
Default is ``False``.
|
|
461
|
+
|
|
462
|
+
Returns
|
|
463
|
+
-------
|
|
464
|
+
cpus : int
|
|
465
|
+
Count
|
|
466
|
+
"""
|
|
467
|
+
return cpu_count(only_physical_cores=only_physical_cores)
|
|
293
468
|
|
|
294
469
|
def terminate(self):
|
|
295
470
|
"""
|
|
296
|
-
Stop the current parallel pool, and delete any temporary files.
|
|
471
|
+
Stop the current parallel pool, and delete any temporary files (if managed by ``JCPool``).
|
|
297
472
|
"""
|
|
298
473
|
if not self._pool is None:
|
|
299
474
|
tme = Timer()
|
|
@@ -301,14 +476,41 @@ class JCPool( object ):
|
|
|
301
476
|
self._pool = None
|
|
302
477
|
self._verbose.write(f"Shut down parallel pool. This took {tme}.")
|
|
303
478
|
gc.collect()
|
|
304
|
-
self._tmp_dir
|
|
479
|
+
if not self._tmp_dir is None:
|
|
480
|
+
dir_name = self._tmp_dir.path
|
|
481
|
+
self._tmp_dir.delete_everything(keep_directory=False)
|
|
482
|
+
self._verbose.write(f"Deleted temporary directoru {dir_name}.")
|
|
305
483
|
|
|
306
484
|
def context( self, verbose : Context, verbose_interval : float = None ):
|
|
307
485
|
"""
|
|
308
|
-
|
|
309
|
-
|
|
486
|
+
Parallel processing ``Context`` object.
|
|
487
|
+
|
|
488
|
+
This function returns a :class:`cdxcore.verbose.Context` object whose ``channel`` is a queue towards a utility thread
|
|
489
|
+
which will outout all messages to ``verbose``.
|
|
490
|
+
As a result a worker process is able to use ``verbose`` as if it were in-process
|
|
491
|
+
|
|
492
|
+
A standard usage pattern is:
|
|
493
|
+
|
|
494
|
+
.. code-block:: python
|
|
495
|
+
:emphasize-lines: 13, 14
|
|
496
|
+
|
|
497
|
+
from cdxcore.verbose import Context
|
|
498
|
+
from cdxcore.jcpool import JCPool
|
|
499
|
+
import time as time
|
|
500
|
+
import numpy as np
|
|
501
|
+
|
|
502
|
+
pool = JCPool( num_workers=4 ) # global pool. Reuse where possible
|
|
503
|
+
|
|
504
|
+
def f( x, verbose : Context ):
|
|
505
|
+
verbose.write(f"Found {x}") # <- text "Found 1" etc will be sent
|
|
506
|
+
return x # to main thread via Queue
|
|
507
|
+
|
|
508
|
+
verbose = Context("all")
|
|
509
|
+
with pool.context( verbose ) as verbose:
|
|
510
|
+
for x in pool.parallel( pool.delayed(f)( x=x, verbose=verbose(1) ) for x in [1,2,3,4] ):
|
|
511
|
+
verbose.write(f"Returned {x}")
|
|
310
512
|
|
|
311
|
-
See
|
|
513
|
+
See :class:`cdxcore.jcpool.JCPool` for more usage patterns.
|
|
312
514
|
"""
|
|
313
515
|
if self._threading:
|
|
314
516
|
return verbose
|
|
@@ -317,8 +519,8 @@ class JCPool( object ):
|
|
|
317
519
|
verbose_interval=verbose_interval )
|
|
318
520
|
|
|
319
521
|
@staticmethod
|
|
320
|
-
def
|
|
321
|
-
""" Check that
|
|
522
|
+
def _validate( F : Callable, args : list, kwargs : Mapping ):
|
|
523
|
+
""" Check that ``args`` and ``kwargs`` do not contain ``Context`` objects without channel """
|
|
322
524
|
for k, v in enumerate(args):
|
|
323
525
|
if isinstance(v, Context) and not isinstance(v.channel, ParallelContextChannel):
|
|
324
526
|
raise RuntimeError(f"Argument #{k} for {F.__qualname__} is a Context object, but its channel is not set to 'ParallelContextChannel'. Use JPool.context().")
|
|
@@ -328,22 +530,29 @@ class JCPool( object ):
|
|
|
328
530
|
|
|
329
531
|
def delayed(self, F : Callable):
|
|
330
532
|
"""
|
|
331
|
-
Decorate a function
|
|
332
|
-
|
|
333
|
-
|
|
533
|
+
Decorate a function for parallel execution.
|
|
534
|
+
|
|
535
|
+
This decorate adds minor synthatical sugar on top of :func:`joblib.delayed`
|
|
536
|
+
(which in turn is discussed `here <https://joblib.readthedocs.io/en/latest/parallel.html#parallel>`__).
|
|
537
|
+
|
|
538
|
+
When called, this decorator checks that no :class:`cdxcore.verbose.Context`
|
|
539
|
+
arguments are passed to the pooled function which have no ``ParallelContextChannel`` present. In other words,
|
|
540
|
+
the function detects if the user forgot to use :meth:`cdxcore.jcpool.JCPool.context`.
|
|
334
541
|
|
|
335
542
|
Parameters
|
|
336
543
|
----------
|
|
337
|
-
F :
|
|
544
|
+
F : Callable
|
|
545
|
+
Function.
|
|
338
546
|
|
|
339
547
|
Returns
|
|
340
548
|
-------
|
|
341
|
-
|
|
549
|
+
wrapped F : Callable
|
|
550
|
+
Decorated function.
|
|
342
551
|
"""
|
|
343
552
|
if self._threading:
|
|
344
|
-
return
|
|
553
|
+
return _jl_delayed(F)
|
|
345
554
|
def delayed_function( *args, **kwargs ):
|
|
346
|
-
JCPool.
|
|
555
|
+
JCPool._validate( F, args, kwargs )
|
|
347
556
|
return F, args, kwargs # mimic joblin.delayed()
|
|
348
557
|
try:
|
|
349
558
|
delayed_function = functools.wraps(F)(delayed_function)
|
|
@@ -351,61 +560,84 @@ class JCPool( object ):
|
|
|
351
560
|
" functools.wraps fails on some callable objects "
|
|
352
561
|
return delayed_function
|
|
353
562
|
|
|
354
|
-
def parallel(self, jobs :
|
|
563
|
+
def parallel(self, jobs : Sequence|Mapping) -> Iterable:
|
|
355
564
|
"""
|
|
356
|
-
Process
|
|
357
|
-
|
|
358
|
-
|
|
565
|
+
Process a number of jobs in parallel using the current multiprocessing pool.
|
|
566
|
+
|
|
567
|
+
All functions used in ``jobs`` must have been decorated using :dec:`cdxcore.jcpool.JCPool.delayed`.
|
|
568
|
+
|
|
569
|
+
This function returns an iterator which yields results as soon as they
|
|
570
|
+
are computed.
|
|
571
|
+
|
|
572
|
+
If ``jobs`` is a ``Sequence`` you can also use
|
|
573
|
+
:meth:`cdxcore.jcpool.JCPool.parallel_to_list` to retrieve
|
|
574
|
+
a :class:`list` of all results upon completion of the last job. Similarly, if ``jobs``
|
|
575
|
+
is a ``Mapping``, use :meth:`cdxcore.jcpool.JCPool.parallel_to_dict` to retrieve
|
|
576
|
+
a :class:`dict` of results upon completion of the last job.
|
|
359
577
|
|
|
360
578
|
Parameters
|
|
361
579
|
----------
|
|
362
|
-
jobs:
|
|
363
|
-
|
|
364
|
-
|
|
580
|
+
jobs : Sequence | Mapping
|
|
581
|
+
Can be a :class:`Sequence` containing ``Callable`` functions,
|
|
582
|
+
or a :class:`Mapping` whose values are ``Callable`` functions.
|
|
583
|
+
|
|
584
|
+
Each ``Callable`` used as part of either must
|
|
585
|
+
have been decorated with :dec:`cdxcore.jcpool.JCPool.delayed`.
|
|
365
586
|
|
|
366
587
|
Returns
|
|
367
588
|
-------
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
589
|
+
parallel : Iterator
|
|
590
|
+
An iterator which yields results as soon as they are available.
|
|
591
|
+
If ``jobs`` is a :class:`Mapping`, then the resutling iterator will generate tuples with the first
|
|
592
|
+
element equal to the mapping key of the respective function job. This function will *not*
|
|
593
|
+
return a dictionary.
|
|
371
594
|
"""
|
|
372
595
|
return _parallel( self._pool, jobs )
|
|
373
596
|
|
|
374
|
-
def parallel_to_dict(self, jobs : Mapping) ->
|
|
597
|
+
def parallel_to_dict(self, jobs : Mapping) -> dict:
|
|
375
598
|
"""
|
|
376
|
-
Process
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
599
|
+
Process a number of jobs in parallel using the current multiprocessing pool,
|
|
600
|
+
and return all results in a dictionary upon completion.
|
|
601
|
+
|
|
602
|
+
This function awaits the calculation of all elements of ``jobs`` and
|
|
603
|
+
returns a :class:`dict` with the results.
|
|
380
604
|
|
|
381
|
-
See help(JCPool) for usage patterns.
|
|
382
|
-
|
|
383
605
|
Parameters
|
|
384
606
|
----------
|
|
385
|
-
jobs:
|
|
386
|
-
A dictionary where all (function) values must have been
|
|
607
|
+
jobs : Mapping
|
|
608
|
+
A dictionary where all (function) values must have been decorated
|
|
609
|
+
with :dec:`cdxcore.jcpool.JCPool.delayed`.
|
|
387
610
|
|
|
388
611
|
Returns
|
|
389
612
|
-------
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
613
|
+
Results : dict
|
|
614
|
+
A dictionary with results.
|
|
615
|
+
|
|
616
|
+
If ``jobs`` is an :class:`OrderedDict`, then this function will return an :class:`OrderedDict`
|
|
617
|
+
with the same order as ``jobs``. Otherwise the elements of the ``dict`` returned
|
|
618
|
+
by this function are in completion order.
|
|
393
619
|
"""
|
|
394
620
|
return _parallel_to_dict( self._pool, jobs )
|
|
395
621
|
|
|
396
622
|
def parallel_to_list(self, jobs : Sequence ) -> Sequence:
|
|
397
623
|
"""
|
|
398
|
-
|
|
399
|
-
|
|
624
|
+
Process a number of jobs in parallel using the current multiprocessing pool,
|
|
625
|
+
and return all results in a list upon completion.
|
|
626
|
+
|
|
627
|
+
This function awaits the calculation of all elements of ``jobs`` and
|
|
628
|
+
returns a :class:`list` with the results.
|
|
629
|
+
|
|
400
630
|
Parameters
|
|
401
631
|
----------
|
|
402
|
-
jobs:
|
|
403
|
-
|
|
404
|
-
|
|
632
|
+
jobs : Sequence
|
|
633
|
+
An sequence of ``Callable`` functions, each of which
|
|
634
|
+
must have been decorated
|
|
635
|
+
with :dec:`cdxcore.jcpool.JCPool.delayed`.
|
|
405
636
|
|
|
406
637
|
Returns
|
|
407
638
|
-------
|
|
408
|
-
|
|
639
|
+
Results : list
|
|
640
|
+
A list with results, in the order of ``jobs``.
|
|
409
641
|
"""
|
|
410
642
|
return _parallel_to_list( self._pool, jobs )
|
|
411
643
|
|
cdxcore/subdir.py
CHANGED
|
@@ -1786,7 +1786,7 @@ class SubDir(object):
|
|
|
1786
1786
|
# write to temp file, then rename into target file
|
|
1787
1787
|
# this reduces collision when i/o operations are slow
|
|
1788
1788
|
full_file_name = self.full_file_name(file,ext=ext)
|
|
1789
|
-
tmp_file = unique_hash48(
|
|
1789
|
+
tmp_file = unique_hash48( file, uuid.getnode(), os.getpid(), threading.get_ident(), datetime.datetime.now() )
|
|
1790
1790
|
tmp_i = 0
|
|
1791
1791
|
fullTmpFile = self.full_file_name(tmp_file,ext="tmp" if not ext=="tmp" else "_tmp")
|
|
1792
1792
|
while os.path.exists(fullTmpFile):
|
|
@@ -1,20 +1,21 @@
|
|
|
1
|
-
cdxcore/__init__.py,sha256=
|
|
1
|
+
cdxcore/__init__.py,sha256=QkwdykAwr_r64uUVCGkEd6qZ7xPa-uKA44rrlJGMtKI,127
|
|
2
2
|
cdxcore/config.py,sha256=YnIEJVFtMZ5EHlwzaB2JsSkCPhWPvEw9QSpe6mof_V4,98472
|
|
3
|
-
cdxcore/crman.py,sha256=
|
|
3
|
+
cdxcore/crman.py,sha256=83oKpuzNy98ObpmUeJg2McGRxaLk2AP5lN1yd1F9ueQ,5759
|
|
4
4
|
cdxcore/err.py,sha256=SIJMBXKXYI_hiysv5iSPRy0w_BbxyTDPbNEs56Y94Rk,14541
|
|
5
|
-
cdxcore/jcpool.py,sha256=
|
|
5
|
+
cdxcore/jcpool.py,sha256=uQyOBmOz7EPBsXU96J6Zb2Y-YmA2GwfsRYj6NzFEPKc,27335
|
|
6
6
|
cdxcore/pretty.py,sha256=iUpgUCwmI8yb5O-WZFJEk3QvNYcj_GIFHUgZ5lK8F2I,17082
|
|
7
7
|
cdxcore/pretty.py_bak.py,sha256=JgWr5044HzCNGG0wKSAWlWiPRs7-bNzkwiKH0T3n0to,28658
|
|
8
|
-
cdxcore/subdir.py,sha256=
|
|
8
|
+
cdxcore/subdir.py,sha256=NScdtAG-Wrt0D7_FcpO62qzII4bS9ABXFerkILPg4uE,173864
|
|
9
9
|
cdxcore/uniquehash.py,sha256=g-D8pqPIppSdRq5QfdE5aP3paZ-NkXWHfnn-uNB7fmg,50648
|
|
10
10
|
cdxcore/util.py,sha256=0fp0EzeZvnje1Q7SUcgB_JtKpsYgGTfvlHVfq0mE_ug,31930
|
|
11
11
|
cdxcore/verbose.py,sha256=nKNoZQwl3eF1zBf-JZwPC-lL9d_o5mJsDsSUMixTMLw,29882
|
|
12
12
|
cdxcore/version.py,sha256=m30oI2Ortg44dKSim-sIoeh9PioD1FWsSfVEP5rubhk,27173
|
|
13
|
-
cdxcore-0.1.
|
|
13
|
+
cdxcore-0.1.11.dist-info/licenses/LICENSE,sha256=M-cisgK9kb1bqVRJ7vrCxHcMQQfDxdY3c2YFJJWfNQg,1090
|
|
14
14
|
docs/source/conf.py,sha256=Owctibh5XcSpSNcrpOr3ROIDjoklmFVrMhu8cOSe50o,4180
|
|
15
15
|
tests/test_config.py,sha256=0U9vFIKDex0Il-7Vc_C4saAuXoHIsdQ8YhhS8AO7FQI,15950
|
|
16
|
-
tests/test_crman.py,sha256=
|
|
16
|
+
tests/test_crman.py,sha256=hek6a-51-i6o5XhDa1vqFjUKYJggJ3pnb0Am0wunhwY,1692
|
|
17
17
|
tests/test_err.py,sha256=VbVmbaB6o49G-n3t7yuJ4M0d9pyUQyJuVDqK-xRrLo8,3458
|
|
18
|
+
tests/test_jcpool.py,sha256=vgKt5wTz1BXt8ruubtE2qINIlqH507IoTC1lpP7nejQ,4733
|
|
18
19
|
tests/test_pretty.py,sha256=5TmF7c1TRDSN-YR5yo04SiLJiW3bZaxpXHJ-4ZEO8hg,11952
|
|
19
20
|
tests/test_subdir.py,sha256=tO-zoOIKQtZEMpQM-tsrisyLRmMH8txCSOzh6jPRhYY,11721
|
|
20
21
|
tests/test_uniquehash.py,sha256=ldoQLT77R7odMAok4Yo3jmiUIH3VPHKoSiSLKbbM_mo,24907
|
|
@@ -29,7 +30,7 @@ tmp/npio.py,sha256=4Kwp5H4MgKHkOEhu4UJ5CcwpM7Pm8UFkaoL5FvOEFRI,10310
|
|
|
29
30
|
tmp/sharedarray.py,sha256=JuHuSlxA0evD0a-bEZgTFrfdlVPMgzfQNgfSjr1212w,11484
|
|
30
31
|
up/git_message.py,sha256=EfSH7Pit3ZoCiRqSMwRCUN_QyuwreU4LTIyGSutBlm4,123
|
|
31
32
|
up/pip_modify_setup.py,sha256=Esaml4yA9tFsqxLhk5bWSwvKCURONjQqfyChgFV2TSY,1584
|
|
32
|
-
cdxcore-0.1.
|
|
33
|
-
cdxcore-0.1.
|
|
34
|
-
cdxcore-0.1.
|
|
35
|
-
cdxcore-0.1.
|
|
33
|
+
cdxcore-0.1.11.dist-info/METADATA,sha256=q8JLhUt0fOG-sxNyd_8GJKg-DBhFwjBOVaEpo00bVbE,754
|
|
34
|
+
cdxcore-0.1.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
35
|
+
cdxcore-0.1.11.dist-info/top_level.txt,sha256=phNSwCyJFe7UP2YMoi8o6ykhotatlIbJHjTp9EHM51k,26
|
|
36
|
+
cdxcore-0.1.11.dist-info/RECORD,,
|
tests/test_crman.py
CHANGED
tests/test_jcpool.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
|
|
2
|
+
# -*- coding: utf-8 -*-
|
|
3
|
+
"""
|
|
4
|
+
Created on Tue Apr 14 21:24:52 2020
|
|
5
|
+
@author: hansb
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import unittest as unittest
|
|
9
|
+
|
|
10
|
+
def import_local():
|
|
11
|
+
"""
|
|
12
|
+
In order to be able to run our tests manually from the 'tests' directory
|
|
13
|
+
we force import from the local package.
|
|
14
|
+
We also force reloading all modules to make sure we are not running old code.
|
|
15
|
+
"""
|
|
16
|
+
me = "cdxcore"
|
|
17
|
+
import os
|
|
18
|
+
import sys
|
|
19
|
+
cwd = os.getcwd()
|
|
20
|
+
if cwd[-len(me):] == me:
|
|
21
|
+
return
|
|
22
|
+
assert cwd[-5:] == "tests",("Expected current working directory to be in a 'tests' directory", cwd[-5:], "from", cwd)
|
|
23
|
+
assert cwd[-6] in ['/', '\\'],("Expected current working directory 'tests' to be lead by a '\\' or '/'", cwd[-6:], "from", cwd)
|
|
24
|
+
sys.path.insert( 0, cwd[:-6] )
|
|
25
|
+
|
|
26
|
+
# reload modules
|
|
27
|
+
import importlib as imp
|
|
28
|
+
modules = sys.modules.copy()
|
|
29
|
+
for name, mdata in modules.items():
|
|
30
|
+
if name[:len(me)] == me:
|
|
31
|
+
imp.reload(mdata)
|
|
32
|
+
print("Reloaded", name)
|
|
33
|
+
#import_local()
|
|
34
|
+
|
|
35
|
+
from cdxcore.jcpool import JCPool, Context
|
|
36
|
+
import numpy as np
|
|
37
|
+
|
|
38
|
+
class Test(unittest.TestCase):
|
|
39
|
+
|
|
40
|
+
def test_pool(self):
|
|
41
|
+
|
|
42
|
+
self.maxDiff = None
|
|
43
|
+
|
|
44
|
+
pool = JCPool(2)
|
|
45
|
+
|
|
46
|
+
class Channel(object):
|
|
47
|
+
""" utility to collect all traced messages """
|
|
48
|
+
def __init__(self):
|
|
49
|
+
self.messages = []
|
|
50
|
+
def __call__(self, msg, flush):
|
|
51
|
+
self.messages.append( msg )
|
|
52
|
+
|
|
53
|
+
def f( ticker, tdata, verbose : Context ):
|
|
54
|
+
# some made up results
|
|
55
|
+
q = np.quantile( tdata, 0.35, axis=0 )
|
|
56
|
+
tx = q[0]
|
|
57
|
+
ty = q[1]
|
|
58
|
+
# not in a unittest --> time.sleep( np.exp(tdata[0,0]) )
|
|
59
|
+
verbose.write(f"Result for {ticker}: {tx:.2f}, {ty:.2f}")
|
|
60
|
+
return tx, ty
|
|
61
|
+
|
|
62
|
+
np.random.seed(1231)
|
|
63
|
+
tickerdata =\
|
|
64
|
+
{ 'SPY': np.random.normal(size=(1000,2)),
|
|
65
|
+
'GLD': np.random.normal(size=(1000,2)),
|
|
66
|
+
'BTC': np.random.normal(size=(1000,2))
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
# iterator mode
|
|
70
|
+
channel = Channel()
|
|
71
|
+
verbose_main = Context("all", channel=channel)
|
|
72
|
+
|
|
73
|
+
verbose_main.write("Launching analysis")
|
|
74
|
+
with pool.context( verbose_main ) as verbose:
|
|
75
|
+
for ticker, tx, ty in pool.parallel(
|
|
76
|
+
{ ticker: pool.delayed(f)( ticker=ticker, tdata=tdata, verbose=verbose(2) )
|
|
77
|
+
for ticker, tdata in tickerdata.items() } ):
|
|
78
|
+
verbose.report(1,f"Returned {ticker} {tx:.2f}, {ty:.2f}")
|
|
79
|
+
verbose_main.write("Analysis done")
|
|
80
|
+
|
|
81
|
+
l = sorted( channel.messages )
|
|
82
|
+
self.assertEqual( str(l), r"['00: 01: Returned BTC -0.38, -0.42\n', '00: 01: Returned GLD -0.47, -0.42\n', '00: 01: Returned SPY -0.42, -0.41\n', '00: 02: Result for BTC: -0.38, -0.42\n', '00: 02: Result for GLD: -0.47, -0.42\n', '00: 02: Result for SPY: -0.42, -0.41\n', '00: Analysis done\n', '00: Launching analysis\n']")
|
|
83
|
+
|
|
84
|
+
# dict mode
|
|
85
|
+
channel = Channel()
|
|
86
|
+
verbose_main = Context("all", channel=channel)
|
|
87
|
+
|
|
88
|
+
verbose_main.write("Launching analysis")
|
|
89
|
+
with pool.context( verbose_main ) as verbose:
|
|
90
|
+
l = pool.parallel_to_dict(
|
|
91
|
+
{ ticker: pool.delayed(f)( ticker=ticker, tdata=tdata, verbose=verbose(2) )
|
|
92
|
+
for ticker, tdata in tickerdata.items() } )
|
|
93
|
+
verbose_main.write("Analysis done")
|
|
94
|
+
self.assertEqual( type(l), dict )
|
|
95
|
+
|
|
96
|
+
l = sorted( channel.messages )
|
|
97
|
+
self.assertEqual( str(l), r"['00: 02: Result for BTC: -0.38, -0.42\n', '00: 02: Result for GLD: -0.47, -0.42\n', '00: 02: Result for SPY: -0.42, -0.41\n', '00: Analysis done\n', '00: Launching analysis\n']")
|
|
98
|
+
|
|
99
|
+
# list mode
|
|
100
|
+
channel = Channel()
|
|
101
|
+
verbose_main = Context("all", channel=channel)
|
|
102
|
+
|
|
103
|
+
verbose_main.write("Launching analysis")
|
|
104
|
+
with pool.context( verbose_main ) as verbose:
|
|
105
|
+
l = pool.parallel_to_list(
|
|
106
|
+
pool.delayed(f)( ticker=ticker, tdata=tdata, verbose=verbose(2) )
|
|
107
|
+
for ticker, tdata in tickerdata.items() )
|
|
108
|
+
verbose_main.write("Analysis done")
|
|
109
|
+
self.assertEqual( type(l), list )
|
|
110
|
+
|
|
111
|
+
l = sorted( channel.messages )
|
|
112
|
+
self.assertEqual( str(l), r"['00: 02: Result for BTC: -0.38, -0.42\n', '00: 02: Result for GLD: -0.47, -0.42\n', '00: 02: Result for SPY: -0.42, -0.41\n', '00: Analysis done\n', '00: Launching analysis\n']")
|
|
113
|
+
|
|
114
|
+
if __name__ == '__main__':
|
|
115
|
+
unittest.main()
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
|
|
File without changes
|
|
File without changes
|
|
File without changes
|