cdxcore 0.1.10__py3-none-any.whl → 0.1.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cdxcore might be problematic. Click here for more details.

cdxcore/__init__.py CHANGED
@@ -4,4 +4,4 @@ Created on June 2022
4
4
  @author: hansb
5
5
  """
6
6
 
7
- __version__ = "0.1.10" # auto-updated by setup.py
7
+ __version__ = "0.1.11" # auto-updated by setup.py
cdxcore/crman.py CHANGED
@@ -147,7 +147,10 @@ class CRMan(object):
147
147
  """
148
148
  return self._current
149
149
 
150
- def write(self, text : str, end : str = '', flush : bool = True, channel : Callable = None ):
150
+ def write(self, text : str,
151
+ end : str = '',
152
+ flush : bool = True,
153
+ channel : Callable = None ):
151
154
  r"""
152
155
  Write to a ``channel``,
153
156
 
cdxcore/jcpool.py CHANGED
@@ -1,34 +1,57 @@
1
- # -*- coding: utf-8 -*-
2
1
  """
3
- Simple multi-processing wrapper around the already great joblib.paralllel.
2
+ Overview
3
+ --------
4
+
5
+ Simple multi-processing conv wrapper around (already great)
6
+ `joblib.Parallel() <https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html>`__.
7
+
4
8
  The minor additions are that parallel processing will be a tad more convenient for dictionaries,
5
- and that it supports routing cdxbasics.cdxbasics.Context messaging via a Queue to a single thread.
9
+ and that it supports routing :class:`cdxcore.verbose.Context` messaging via a
10
+ :class:`multiprocessing.Queue` to a single thread.
11
+
12
+ Import
13
+ ------
14
+ .. code-block:: python
15
+
16
+ from cdxcore.jcpool import JCPool
17
+
18
+ Documentation
19
+ -------------
6
20
  """
7
21
 
8
- from joblib import Parallel as joblib_Parallel, delayed as jl_delayed
22
+ from joblib import Parallel as joblib_Parallel, delayed as _jl_delayed, cpu_count
9
23
  from multiprocessing import Manager, Queue
10
24
  from threading import Thread, get_ident as get_thread_id
11
25
  import gc as gc
12
26
  from collections import OrderedDict
13
27
  from collections.abc import Mapping, Callable, Sequence, Iterable
14
28
  import functools as functools
29
+ import uuid as uuid
30
+ import os as os
31
+ import datetime as datetime
15
32
 
16
33
  from .verbose import Context, Timer
17
34
  from .subdir import SubDir
35
+ from .uniquehash import unique_hash8
18
36
 
19
37
  class ParallelContextChannel( Context ):
20
38
  """
21
- Lightweight channel for cdxbasics.verbose.Context which is pickle'able
22
- Implements trivial Context channel() protocol.
39
+ Lightweight :class:`cdxcore.verbose.Context` ``channel`` which is pickle'able.
40
+
41
+ This channel sends messages it receives to a :class:`multiprocessing.Queue`.
23
42
  """
24
- def __init__(self, *, cid, maintid, queue):
43
+ def __init__(self, *, cid, maintid, queue, f_verbose):
25
44
  self._queue = queue
26
45
  self._cid = cid
27
46
  self._maintid = maintid
47
+ self._f_verbose = f_verbose
28
48
  def __call__(self, msg : str, flush : bool ):
29
- """ Context channel call (outside process) to send messages to 'report' """
49
+ """
50
+ Sends ``msg`` via a :class:`multiprocessing.Queue` to the main thread for
51
+ printing.
52
+ """
30
53
  if get_thread_id() == self._maintid:
31
- print(msg, end='', flush=True)
54
+ self._f_verbose.write(msg, end='', flush=flush)
32
55
  else:
33
56
  return self._queue.put( (msg, flush) )
34
57
 
@@ -51,7 +74,12 @@ class _ParallelContextOperator( object ):
51
74
  self._queue = self._mgr.Queue()
52
75
  self._thread = Thread(target=self.report, kwargs=dict(cid=cid, queue=self._queue, f_verbose=f_verbose, verbose_interval=verbose_interval), daemon=True)
53
76
  self._mp_context = Context( f_verbose,
54
- channel=ParallelContextChannel( cid=self._cid, queue=self._queue, maintid=self._tid ) )
77
+ channel=ParallelContextChannel(
78
+ cid=self._cid,
79
+ queue=self._queue,
80
+ maintid=self._tid,
81
+ f_verbose=f_verbose
82
+ ) )
55
83
  self._thread.start()
56
84
  pool_verbose.write(f"done; this took {tme}.", head=False)
57
85
 
@@ -97,7 +125,7 @@ class _ParallelContextOperator( object ):
97
125
  raise r
98
126
  msg, flush = r
99
127
  if tme.interval_test(verbose_interval):
100
- print(msg, end='', flush=flush)
128
+ f_verbose.write(msg, end='', flush=flush)
101
129
 
102
130
  def __enter__(self):
103
131
  return self.mp_context
@@ -196,104 +224,251 @@ def _parallel_to_list(pool, jobs : Sequence ) -> Sequence:
196
224
  An list with the results in order of the input.
197
225
  """
198
226
  assert not isinstance( jobs, Mapping ), ("'jobs' is a Mapping. Use parallel_to_dict() instead.", type(jobs))
199
- r = _parallel_to_dict( pool, { i: j for i, j in enumerate(jobs) } )
200
- return list( r[i] for i in range(len(jobs)) )
227
+ lst = { i: j for i, j in enumerate(jobs) }
228
+ r = _parallel_to_dict( pool, lst )
229
+ return list( r[i] for i in lst )
201
230
 
202
231
  class JCPool( object ):
203
- """
204
- Parallel Job Context Pool
232
+ r"""
233
+ Parallel Job Context Pool.
205
234
 
206
- Simple wrapper around joblib.Parallel which allows using cdxbasics.verbose.Context objects seemlessly:
207
- use of any contexts from a different process will send messages via a Queue to the main process
235
+ Simple wrapper around `joblib.Parallel() <https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html>`__
236
+ which allows worker processes to use :class:`cdxcore.verbose.Context` to report
237
+ progress updates. For this purpose, :class:`cdxcore.verbose.Context`
238
+ will send output messages via a :class:`multiprocessing.Queue`
239
+ to the main process
208
240
  where a sepeate thread prints these messages out.
209
- Using a fixed pool object also avoids relaunching processes.
241
+
242
+ Using a fixed central pool object in your code base
243
+ avoids relaunching processes.
210
244
 
211
- Finally, the parallel pool allows working directly with dictionaries which is useful for asynchronous
212
- processing (which is the default).
245
+ Functions passed to :meth:`cdxcore.jcpool.JCPool.parallel` and related functions must
246
+ be decorated with :dec:`cdxcore.jcpool.JCPool.delayed`.
213
247
 
214
- Usage
215
- -----
216
- Assume we have a function such as:
248
+ **List/Generator Usage**
217
249
 
250
+ The following code is a standard prototype for using :func:`cdxcore.jcpool.JCPool.parallel`
251
+ following closely the `joblib paradigm <https://joblib.readthedocs.io/en/latest/parallel.html>`__:
252
+
253
+ .. code-block:: python
254
+
255
+ from cdxcore.verbose import Context
256
+ from cdxcore.jcpool import JCPool
257
+ import time as time
258
+ import numpy as np
259
+
260
+ pool = JCPool( num_workers=4 ) # global pool. Reuse where possible
261
+
218
262
  def f( ticker, tdata, verbose : Context ):
219
- #...
220
- tx = 0.
221
- ty = 1.
222
- verbose.write(f"Result for {ticker}: {tx}, {ty}")
223
- return tx, ty # tuple result for illustration
224
-
225
- List/Generator
226
- --------------
227
- Use the pool.context() context handler to convert a Context 'verbose' object into a multi-processing channel.
228
- Then pass a generator to pool.parallel
229
-
230
- pool = JPool( num_workers=4 )
263
+ # some made up function
264
+ q = np.quantile( tdata, 0.35, axis=0 )
265
+ tx = q[0]
266
+ ty = q[1]
267
+ time.sleep(0.5)
268
+ verbose.write(f"Result for {ticker}: {tx:.2f}, {ty:.2f}")
269
+ return tx, ty
270
+
271
+ tickerdata =\
272
+ { 'SPY': np.random.normal(size=(1000,2)),
273
+ 'GLD': np.random.normal(size=(1000,2)),
274
+ 'BTC': np.random.normal(size=(1000,2))
275
+ }
276
+
231
277
  verbose = Context("all")
232
- with pool.context( verbose ) as verbose:
233
- for tx, ty in pool.parallel( pool.delayed(f)( ticker=ticker, tdata=tdata, verbose=verbose ) for ticker, tdata in self.data.items() ):
234
- print(f"Returned {tx}, {ty}")
235
- print("Done")
278
+ with verbose.write_t("Launching analysis") as tme:
279
+ with pool.context( verbose ) as verbose:
280
+ for tx, ty in pool.parallel(
281
+ pool.delayed(f)( ticker=ticker, tdata=tdata, verbose=verbose(2) )
282
+ for ticker, tdata in tickerdata.items() ):
283
+ verbose.report(1,f"Returned {tx:.2f}, {ty:.2f}")
284
+ verbose.write(f"Analysis done; this took {tme}.")
236
285
 
237
- Dict
238
- ----
239
- Similar construct, but with a dictionary. Considering the asynchronous nature of the returned data it is often desirable
240
- to keep track of a result identifier. This is automated with the dictionary usage pattern:
286
+ The output from this code is asynchronous:
287
+
288
+ .. code-block:: python
289
+
290
+ 00: Launching analysis
291
+ 02: Result for SPY: -0.43, -0.39
292
+ 01: Returned -0.43, -0.39
293
+ 02: Result for BTC: -0.39, -0.45
294
+ 01: Returned -0.39, -0.45
295
+ 02: Result for GLD: -0.41, -0.43
296
+ 01: Returned -0.41, -0.43
297
+ 00: Analysis done; this took 0.73s.
298
+
299
+ **Dict**
300
+
301
+ Considering the asynchronous nature of the returned data it is often desirable
302
+ to keep track of results by some identifier. In above example ``ticker``
303
+ was not available in the main loop.
304
+ This pattern is automated with the dictionary usage pattern:
241
305
 
242
- pool = JPool( num_workers=4 )
306
+ .. code-block:: python
307
+ :emphasize-lines: 26,27,28,29
308
+
309
+ from cdxcore.verbose import Context
310
+ from cdxcore.jcpool import JCPool
311
+ import time as time
312
+ import numpy as np
313
+
314
+ pool = JCPool( num_workers=4 ) # global pool. Reuse where possible
315
+
316
+ def f( ticker, tdata, verbose : Context ):
317
+ # some made up function
318
+ q = np.quantile( tdata, 0.35, axis=0 )
319
+ tx = q[0]
320
+ ty = q[1]
321
+ time.sleep(0.5)
322
+ verbose.write(f"Result for {ticker}: {tx:.2f}, {ty:.2f}")
323
+ return tx, ty
324
+
325
+ tickerdata =\
326
+ { 'SPY': np.random.normal(size=(1000,2)),
327
+ 'GLD': np.random.normal(size=(1000,2)),
328
+ 'BTC': np.random.normal(size=(1000,2))
329
+ }
330
+
243
331
  verbose = Context("all")
244
- with pool.context( verbose ) as verbose:
245
- for ticker, tx, ty in pool.parallel( { ticker: pool.delayed(f)( ticker=ticker, tdata=tdata, verbose=verbose ) for ticker, tdata in self.data.items() } ):
246
- print(f"Returned {tx}, {ty} for {ticker}")
247
- print("Done")
248
-
249
- Note that pool.parallel when applied to a dictionary does not return a dictionary, but a sequence of tuples.
332
+ with verbose.write_t("Launching analysis") as tme:
333
+ with pool.context( verbose ) as verbose:
334
+ for ticker, tx, ty in pool.parallel(
335
+ { ticker: pool.delayed(f)( ticker=ticker, tdata=tdata, verbose=verbose(2) )
336
+ for ticker, tdata in tickerdata.items() } ):
337
+ verbose.report(1,f"Returned {ticker} {tx:.2f}, {ty:.2f}")
338
+ verbose.write(f"Analysis done; this took {tme}.")
339
+
340
+ This generates the following output::
341
+
342
+ 00: Launching analysis
343
+ 02: Result for SPY: -0.34, -0.41
344
+ 01: Returned SPY -0.34, -0.41
345
+ 02: Result for GLD: -0.38, -0.41
346
+ 01: Returned GLD -0.38, -0.41
347
+ 02: Result for BTC: -0.34, -0.32
348
+ 01: Returned BTC -0.34, -0.32
349
+ 00: Analysis done; this took 5s.
350
+
351
+ Note that :func:`cdxcore.jcpool.JCPool.parallel` when applied to a dictionary does not return a dictionary,
352
+ but a sequence of tuples.
250
353
  As in the example this also works if the function being called returns tuples itself; in this case the returned data
251
354
  is extended by the key of the dictionary provided.
252
355
 
253
- In order to retrieve a dictionary use
356
+ In order to retrieve a dictionary use :func:`cdxcore.jcpool.JCPool.parallel_to_dict`::
254
357
 
255
- pool = JPool( num_workers=4 )
256
358
  verbose = Context("all")
257
359
  with pool.context( verbose ) as verbose:
258
- r = pool.parallel_to_dict( { ticker: pool.delayed(f)( ticker=ticker, tdata=tdata, verbose=verbose ) for ticker, tdata in self.data.items() } )
259
- print("Done")
360
+ r = pool.parallel_to_dict( { ticker: pool.delayed(f)( ticker=ticker, tdata=tdata, verbose=verbose )
361
+ for ticker, tdata in self.data.items() } )
260
362
 
261
- Note that in this case the function returns after all items have been processed.
363
+ Note that in this case the function returns only after all jobs have been processed.
364
+
365
+ Parameters
366
+ ----------
367
+ num_workers : int, optional
368
+
369
+ The number of workers. If ``num_workers`` is ``1`` then no parallel process or thread is started.
370
+ Just as for `joblib <https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html>`__ you can
371
+ use a negative ``num_workers`` to set the number of workers to the ``number of CPUs + num_workers + 1``.
372
+ For example, a ``num_workers`` of ``-2`` will use as many jobs as CPUs are present less one.
373
+ If ``num_workers`` is negative, the effective number of workers will be at least ``1``.
374
+
375
+ Default is ``1``.
376
+
377
+ threading : bool, optional
378
+
379
+ If ``False``, the default, then the pool will act as a ``"loky"`` multi-process pool with the associated overhead
380
+ of managing data accross processes.
381
+
382
+ If ``True``, then the pool is a ``"threading"`` pool. This helps for functions whose code releases
383
+ Python's `global interpreter lock <https://wiki.python.org/moin/GlobalInterpreterLock>`__, for example
384
+ when engaged in heavy I/O or compiled code such as :mod:`numpy`., :mod:`pandas`,
385
+ or generated with `numba <https://numba.pydata.org/>`__.
386
+
387
+ tmp_root_dir : str | SubDir, optional
388
+
389
+ Temporary directory for memory mapping large arrays. This is a root directory; the function
390
+ will create a temporary sub-directory with a name generated from the current state of the system.
391
+ This sub-directory will be deleted upon destruction of ``JCPool`` or when :meth:`cdxcore.jcpool.JCPool.terminate`
392
+ is called.
393
+
394
+ This parameter can also be ``None`` in which case the `default behaviour <https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html>`__
395
+ of :class:`joblib.Parallel` is used.
396
+
397
+ Default is ``"!/.cdxmp"``.
398
+
399
+ verbose : Context, optional
400
+
401
+ A :class:`cdxcore.verbose.Context` object used to print out multi-processing/threading information.
402
+ This is *not* the ``Context`` provided to child processes/threads.
403
+
404
+ Default is ``quiet``.
405
+
406
+ parallel_kwargs : dict, optional
407
+
408
+ Additional keywords for :class:`joblib.Parallel`.
409
+
262
410
  """
263
411
  def __init__(self, num_workers : int = 1,
264
412
  threading : bool = False,
265
- tmp_dir : str = "!/.cdxmp", *,
413
+ tmp_root_dir : str|SubDir= "!/.cdxmp", *,
266
414
  verbose : Context = Context.quiet,
267
415
  parallel_kwargs : dict = {} ):
268
416
  """
269
- Initialize a multi-processing pool. Thin wrapper aroud joblib.parallel for cdxbasics.verbose.Context() output
417
+ Initialize a multi-processing pool. Thin wrapper aroud joblib.parallel for cdxcore.verbose.Context() output
270
418
  """
419
+ tmp_dir_ext = unique_hash8( uuid.getnode(), os.getpid(), get_thread_id(), datetime.datetime.now() )
271
420
  num_workers = int(num_workers)
272
- self._tmp_dir = SubDir(tmp_dir, ext='')
421
+ tmp_root_dir = SubDir(tmp_root_dir) if not tmp_root_dir is None else None
422
+ self._tmp_dir = tmp_root_dir(tmp_dir_ext, ext='') if not tmp_root_dir is None else None
273
423
  self._verbose = verbose if not verbose is None else Context("quiet")
274
424
  self._threading = threading
275
- assert num_workers > 0, ("'num_workers' must be positive", num_workers)
425
+
426
+ if num_workers < 0:
427
+ num_workers = max( self.cpu_count() + num_workers + 1, 1 )
276
428
 
277
- with self._verbose.write_t(f"Launching {num_workers} processes with temporary path '{self.tmp_path}'... ", end='') as tme:
429
+ path_info = f" with temporary directory '{self.tmp_path}'" if not self.tmp_path is None else ''
430
+ with self._verbose.write_t(f"Launching {num_workers} processes{path_info}... ", end='') as tme:
278
431
  self._pool = joblib_Parallel( n_jobs=num_workers,
279
432
  backend="loky" if not threading else "threading",
280
433
  return_as="generator_unordered",
281
- temp_folder=self.tmp_path, **parallel_kwargs)
434
+ temp_folder=self.tmp_path,
435
+ **parallel_kwargs)
282
436
  self._verbose.write(f"done; this took {tme}.", head=False)
283
437
 
284
438
  def __del__(self):
285
439
  self.terminate()
286
440
 
287
441
  @property
288
- def tmp_path(self) -> str:
289
- return self._tmp_dir.path
442
+ def tmp_path(self) -> str|None:
443
+ """ Path to the temporary directory for this object. """
444
+ return self._tmp_dir.path if not self._tmp_dir is None else None
290
445
  @property
291
446
  def is_threading(self) -> bool:
447
+ """ Whether we are threading or mulit-processing. """
292
448
  return self._threading
449
+
450
+ @staticmethod
451
+ def cpu_count( only_physical_cores : bool = False ) -> int:
452
+ """
453
+ Return the number of physical CPUs.
454
+
455
+ Parameters
456
+ ----------
457
+ only_physical_cores : boolean, optional
458
+
459
+ If ``True``, does not take hyperthreading / SMT logical cores into account.
460
+ Default is ``False``.
461
+
462
+ Returns
463
+ -------
464
+ cpus : int
465
+ Count
466
+ """
467
+ return cpu_count(only_physical_cores=only_physical_cores)
293
468
 
294
469
  def terminate(self):
295
470
  """
296
- Stop the current parallel pool, and delete any temporary files.
471
+ Stop the current parallel pool, and delete any temporary files (if managed by ``JCPool``).
297
472
  """
298
473
  if not self._pool is None:
299
474
  tme = Timer()
@@ -301,14 +476,41 @@ class JCPool( object ):
301
476
  self._pool = None
302
477
  self._verbose.write(f"Shut down parallel pool. This took {tme}.")
303
478
  gc.collect()
304
- self._tmp_dir.eraseEverything(keepDirectory=True)
479
+ if not self._tmp_dir is None:
480
+ dir_name = self._tmp_dir.path
481
+ self._tmp_dir.delete_everything(keep_directory=False)
482
+ self._verbose.write(f"Deleted temporary directoru {dir_name}.")
305
483
 
306
484
  def context( self, verbose : Context, verbose_interval : float = None ):
307
485
  """
308
- Return a cdxbasics.verbose.Context object whose 'channel' is a queue towards a parallel thread.
309
- As a result the worker process is able to use 'verbose' as if it were in-process
486
+ Parallel processing ``Context`` object.
487
+
488
+ This function returns a :class:`cdxcore.verbose.Context` object whose ``channel`` is a queue towards a utility thread
489
+ which will outout all messages to ``verbose``.
490
+ As a result a worker process is able to use ``verbose`` as if it were in-process
491
+
492
+ A standard usage pattern is:
493
+
494
+ .. code-block:: python
495
+ :emphasize-lines: 13, 14
496
+
497
+ from cdxcore.verbose import Context
498
+ from cdxcore.jcpool import JCPool
499
+ import time as time
500
+ import numpy as np
501
+
502
+ pool = JCPool( num_workers=4 ) # global pool. Reuse where possible
503
+
504
+ def f( x, verbose : Context ):
505
+ verbose.write(f"Found {x}") # <- text "Found 1" etc will be sent
506
+ return x # to main thread via Queue
507
+
508
+ verbose = Context("all")
509
+ with pool.context( verbose ) as verbose:
510
+ for x in pool.parallel( pool.delayed(f)( x=x, verbose=verbose(1) ) for x in [1,2,3,4] ):
511
+ verbose.write(f"Returned {x}")
310
512
 
311
- See help(JCPool) for usage patterns.
513
+ See :class:`cdxcore.jcpool.JCPool` for more usage patterns.
312
514
  """
313
515
  if self._threading:
314
516
  return verbose
@@ -317,8 +519,8 @@ class JCPool( object ):
317
519
  verbose_interval=verbose_interval )
318
520
 
319
521
  @staticmethod
320
- def validate( F : Callable, args : list, kwargs : Mapping ):
321
- """ Check that 'args' and 'kwargs' do not contain Context objects without channel """
522
+ def _validate( F : Callable, args : list, kwargs : Mapping ):
523
+ """ Check that ``args`` and ``kwargs`` do not contain ``Context`` objects without channel """
322
524
  for k, v in enumerate(args):
323
525
  if isinstance(v, Context) and not isinstance(v.channel, ParallelContextChannel):
324
526
  raise RuntimeError(f"Argument #{k} for {F.__qualname__} is a Context object, but its channel is not set to 'ParallelContextChannel'. Use JPool.context().")
@@ -328,22 +530,29 @@ class JCPool( object ):
328
530
 
329
531
  def delayed(self, F : Callable):
330
532
  """
331
- Decorate a function F for parallel execution.
332
- Synthatical sugar aroud joblib.delayed().
333
- Checks that there are no Context arguments without ParallelContextChannel present.
533
+ Decorate a function for parallel execution.
534
+
535
+ This decorate adds minor synthatical sugar on top of :func:`joblib.delayed`
536
+ (which in turn is discussed `here <https://joblib.readthedocs.io/en/latest/parallel.html#parallel>`__).
537
+
538
+ When called, this decorator checks that no :class:`cdxcore.verbose.Context`
539
+ arguments are passed to the pooled function which have no ``ParallelContextChannel`` present. In other words,
540
+ the function detects if the user forgot to use :meth:`cdxcore.jcpool.JCPool.context`.
334
541
 
335
542
  Parameters
336
543
  ----------
337
- F : function.
544
+ F : Callable
545
+ Function.
338
546
 
339
547
  Returns
340
548
  -------
341
- Decorated function.
549
+ wrapped F : Callable
550
+ Decorated function.
342
551
  """
343
552
  if self._threading:
344
- return jl_delayed(F)
553
+ return _jl_delayed(F)
345
554
  def delayed_function( *args, **kwargs ):
346
- JCPool.validate( F, args, kwargs )
555
+ JCPool._validate( F, args, kwargs )
347
556
  return F, args, kwargs # mimic joblin.delayed()
348
557
  try:
349
558
  delayed_function = functools.wraps(F)(delayed_function)
@@ -351,61 +560,84 @@ class JCPool( object ):
351
560
  " functools.wraps fails on some callable objects "
352
561
  return delayed_function
353
562
 
354
- def parallel(self, jobs : Iterable) -> Iterable:
563
+ def parallel(self, jobs : Sequence|Mapping) -> Iterable:
355
564
  """
356
- Process 'jobs' in parallel using the current multiprocessing pool.
357
- All (function) values of 'jobs' must be generated using self.delayed.
358
- See help(JCPool) for usage patterns.
565
+ Process a number of jobs in parallel using the current multiprocessing pool.
566
+
567
+ All functions used in ``jobs`` must have been decorated using :dec:`cdxcore.jcpool.JCPool.delayed`.
568
+
569
+ This function returns an iterator which yields results as soon as they
570
+ are computed.
571
+
572
+ If ``jobs`` is a ``Sequence`` you can also use
573
+ :meth:`cdxcore.jcpool.JCPool.parallel_to_list` to retrieve
574
+ a :class:`list` of all results upon completion of the last job. Similarly, if ``jobs``
575
+ is a ``Mapping``, use :meth:`cdxcore.jcpool.JCPool.parallel_to_dict` to retrieve
576
+ a :class:`dict` of results upon completion of the last job.
359
577
 
360
578
  Parameters
361
579
  ----------
362
- jobs:
363
- can be a sequence, a generator, or a dictionary.
364
- Each function value must have been generated using JCPool.delayed()
580
+ jobs : Sequence | Mapping
581
+ Can be a :class:`Sequence` containing ``Callable`` functions,
582
+ or a :class:`Mapping` whose values are ``Callable`` functions.
583
+
584
+ Each ``Callable`` used as part of either must
585
+ have been decorated with :dec:`cdxcore.jcpool.JCPool.delayed`.
365
586
 
366
587
  Returns
367
588
  -------
368
- An iterator which yields results as soon as they are available.
369
- If 'jobs' is a dictionary, then the resutling iterator will generate tuples with the first
370
- element equal to the dictionary key of the respective function job.
589
+ parallel : Iterator
590
+ An iterator which yields results as soon as they are available.
591
+ If ``jobs`` is a :class:`Mapping`, then the resutling iterator will generate tuples with the first
592
+ element equal to the mapping key of the respective function job. This function will *not*
593
+ return a dictionary.
371
594
  """
372
595
  return _parallel( self._pool, jobs )
373
596
 
374
- def parallel_to_dict(self, jobs : Mapping) -> Mapping:
597
+ def parallel_to_dict(self, jobs : Mapping) -> dict:
375
598
  """
376
- Process 'jobs' in parallel using the current multiprocessing pool.
377
- All values of the dictionary 'jobs' must be generated using self.delayed.
378
- This function awaits the calculation of all elements of 'jobs' and
379
- returns a dictionary with the results.
599
+ Process a number of jobs in parallel using the current multiprocessing pool,
600
+ and return all results in a dictionary upon completion.
601
+
602
+ This function awaits the calculation of all elements of ``jobs`` and
603
+ returns a :class:`dict` with the results.
380
604
 
381
- See help(JCPool) for usage patterns.
382
-
383
605
  Parameters
384
606
  ----------
385
- jobs:
386
- A dictionary where all (function) values must have been generated using JCPool.delayed.
607
+ jobs : Mapping
608
+ A dictionary where all (function) values must have been decorated
609
+ with :dec:`cdxcore.jcpool.JCPool.delayed`.
387
610
 
388
611
  Returns
389
612
  -------
390
- A dictionary with results.
391
- If 'jobs' is an OrderedDict, then this function will return an OrderedDict
392
- with the same order as 'jobs'.
613
+ Results : dict
614
+ A dictionary with results.
615
+
616
+ If ``jobs`` is an :class:`OrderedDict`, then this function will return an :class:`OrderedDict`
617
+ with the same order as ``jobs``. Otherwise the elements of the ``dict`` returned
618
+ by this function are in completion order.
393
619
  """
394
620
  return _parallel_to_dict( self._pool, jobs )
395
621
 
396
622
  def parallel_to_list(self, jobs : Sequence ) -> Sequence:
397
623
  """
398
- Call parallel() and convert the resulting generator into a list.
399
-
624
+ Process a number of jobs in parallel using the current multiprocessing pool,
625
+ and return all results in a list upon completion.
626
+
627
+ This function awaits the calculation of all elements of ``jobs`` and
628
+ returns a :class:`list` with the results.
629
+
400
630
  Parameters
401
631
  ----------
402
- jobs:
403
- can be a sequence, a generator, or a dictionary.
404
- Each function value must have been generated using JCPool.delayed()
632
+ jobs : Sequence
633
+ An sequence of ``Callable`` functions, each of which
634
+ must have been decorated
635
+ with :dec:`cdxcore.jcpool.JCPool.delayed`.
405
636
 
406
637
  Returns
407
638
  -------
408
- An list with the results in order of the input.
639
+ Results : list
640
+ A list with results, in the order of ``jobs``.
409
641
  """
410
642
  return _parallel_to_list( self._pool, jobs )
411
643
 
cdxcore/subdir.py CHANGED
@@ -1786,7 +1786,7 @@ class SubDir(object):
1786
1786
  # write to temp file, then rename into target file
1787
1787
  # this reduces collision when i/o operations are slow
1788
1788
  full_file_name = self.full_file_name(file,ext=ext)
1789
- tmp_file = unique_hash48( [ file, uuid.getnode(), os.getpid(), threading.get_ident(), datetime.datetime.now() ] )
1789
+ tmp_file = unique_hash48( file, uuid.getnode(), os.getpid(), threading.get_ident(), datetime.datetime.now() )
1790
1790
  tmp_i = 0
1791
1791
  fullTmpFile = self.full_file_name(tmp_file,ext="tmp" if not ext=="tmp" else "_tmp")
1792
1792
  while os.path.exists(fullTmpFile):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cdxcore
3
- Version: 0.1.10
3
+ Version: 0.1.11
4
4
  Summary: Basic Python Tools; upgraded cdxbasics
5
5
  Author-email: Hans Buehler <github@buehler.london>
6
6
  License-Expression: MIT
@@ -1,20 +1,21 @@
1
- cdxcore/__init__.py,sha256=DzMRN-6SAKCjFPYC72fGXBql03834TWP2QnjtPSb_vc,127
1
+ cdxcore/__init__.py,sha256=QkwdykAwr_r64uUVCGkEd6qZ7xPa-uKA44rrlJGMtKI,127
2
2
  cdxcore/config.py,sha256=YnIEJVFtMZ5EHlwzaB2JsSkCPhWPvEw9QSpe6mof_V4,98472
3
- cdxcore/crman.py,sha256=jOw40Bh6PkmEiabA7OS9NsqF-9viam3CBiKzPwxVkVw,5693
3
+ cdxcore/crman.py,sha256=83oKpuzNy98ObpmUeJg2McGRxaLk2AP5lN1yd1F9ueQ,5759
4
4
  cdxcore/err.py,sha256=SIJMBXKXYI_hiysv5iSPRy0w_BbxyTDPbNEs56Y94Rk,14541
5
- cdxcore/jcpool.py,sha256=OzoXWBJKWaDgJm1OeUj9ERz9skvwslGOwNq0wbKtcFM,17222
5
+ cdxcore/jcpool.py,sha256=uQyOBmOz7EPBsXU96J6Zb2Y-YmA2GwfsRYj6NzFEPKc,27335
6
6
  cdxcore/pretty.py,sha256=iUpgUCwmI8yb5O-WZFJEk3QvNYcj_GIFHUgZ5lK8F2I,17082
7
7
  cdxcore/pretty.py_bak.py,sha256=JgWr5044HzCNGG0wKSAWlWiPRs7-bNzkwiKH0T3n0to,28658
8
- cdxcore/subdir.py,sha256=ceL-Od5NAlJX3f4mYgwki5EMzbs-LpG5M2cikC8eFIE,173868
8
+ cdxcore/subdir.py,sha256=NScdtAG-Wrt0D7_FcpO62qzII4bS9ABXFerkILPg4uE,173864
9
9
  cdxcore/uniquehash.py,sha256=g-D8pqPIppSdRq5QfdE5aP3paZ-NkXWHfnn-uNB7fmg,50648
10
10
  cdxcore/util.py,sha256=0fp0EzeZvnje1Q7SUcgB_JtKpsYgGTfvlHVfq0mE_ug,31930
11
11
  cdxcore/verbose.py,sha256=nKNoZQwl3eF1zBf-JZwPC-lL9d_o5mJsDsSUMixTMLw,29882
12
12
  cdxcore/version.py,sha256=m30oI2Ortg44dKSim-sIoeh9PioD1FWsSfVEP5rubhk,27173
13
- cdxcore-0.1.10.dist-info/licenses/LICENSE,sha256=M-cisgK9kb1bqVRJ7vrCxHcMQQfDxdY3c2YFJJWfNQg,1090
13
+ cdxcore-0.1.11.dist-info/licenses/LICENSE,sha256=M-cisgK9kb1bqVRJ7vrCxHcMQQfDxdY3c2YFJJWfNQg,1090
14
14
  docs/source/conf.py,sha256=Owctibh5XcSpSNcrpOr3ROIDjoklmFVrMhu8cOSe50o,4180
15
15
  tests/test_config.py,sha256=0U9vFIKDex0Il-7Vc_C4saAuXoHIsdQ8YhhS8AO7FQI,15950
16
- tests/test_crman.py,sha256=jYDxqF__iq3fEjaZQoq66CNChWRoR79Ntyng5mr3sIA,1698
16
+ tests/test_crman.py,sha256=hek6a-51-i6o5XhDa1vqFjUKYJggJ3pnb0Am0wunhwY,1692
17
17
  tests/test_err.py,sha256=VbVmbaB6o49G-n3t7yuJ4M0d9pyUQyJuVDqK-xRrLo8,3458
18
+ tests/test_jcpool.py,sha256=vgKt5wTz1BXt8ruubtE2qINIlqH507IoTC1lpP7nejQ,4733
18
19
  tests/test_pretty.py,sha256=5TmF7c1TRDSN-YR5yo04SiLJiW3bZaxpXHJ-4ZEO8hg,11952
19
20
  tests/test_subdir.py,sha256=tO-zoOIKQtZEMpQM-tsrisyLRmMH8txCSOzh6jPRhYY,11721
20
21
  tests/test_uniquehash.py,sha256=ldoQLT77R7odMAok4Yo3jmiUIH3VPHKoSiSLKbbM_mo,24907
@@ -29,7 +30,7 @@ tmp/npio.py,sha256=4Kwp5H4MgKHkOEhu4UJ5CcwpM7Pm8UFkaoL5FvOEFRI,10310
29
30
  tmp/sharedarray.py,sha256=JuHuSlxA0evD0a-bEZgTFrfdlVPMgzfQNgfSjr1212w,11484
30
31
  up/git_message.py,sha256=EfSH7Pit3ZoCiRqSMwRCUN_QyuwreU4LTIyGSutBlm4,123
31
32
  up/pip_modify_setup.py,sha256=Esaml4yA9tFsqxLhk5bWSwvKCURONjQqfyChgFV2TSY,1584
32
- cdxcore-0.1.10.dist-info/METADATA,sha256=CR4ZGaCL36hx08u3KnBdB5TWKM8Sknk2W_KsCptAv4g,754
33
- cdxcore-0.1.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
34
- cdxcore-0.1.10.dist-info/top_level.txt,sha256=phNSwCyJFe7UP2YMoi8o6ykhotatlIbJHjTp9EHM51k,26
35
- cdxcore-0.1.10.dist-info/RECORD,,
33
+ cdxcore-0.1.11.dist-info/METADATA,sha256=q8JLhUt0fOG-sxNyd_8GJKg-DBhFwjBOVaEpo00bVbE,754
34
+ cdxcore-0.1.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
35
+ cdxcore-0.1.11.dist-info/top_level.txt,sha256=phNSwCyJFe7UP2YMoi8o6ykhotatlIbJHjTp9EHM51k,26
36
+ cdxcore-0.1.11.dist-info/RECORD,,
tests/test_crman.py CHANGED
@@ -49,6 +49,4 @@ class Test(unittest.TestCase):
49
49
  self.assertEqual( crman.current, "" )
50
50
 
51
51
  if __name__ == '__main__':
52
- unittest.main()
53
-
54
-
52
+ unittest.main()
tests/test_jcpool.py ADDED
@@ -0,0 +1,119 @@
1
+
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on Tue Apr 14 21:24:52 2020
5
+ @author: hansb
6
+ """
7
+
8
+ import unittest as unittest
9
+
10
+ def import_local():
11
+ """
12
+ In order to be able to run our tests manually from the 'tests' directory
13
+ we force import from the local package.
14
+ We also force reloading all modules to make sure we are not running old code.
15
+ """
16
+ me = "cdxcore"
17
+ import os
18
+ import sys
19
+ cwd = os.getcwd()
20
+ if cwd[-len(me):] == me:
21
+ return
22
+ assert cwd[-5:] == "tests",("Expected current working directory to be in a 'tests' directory", cwd[-5:], "from", cwd)
23
+ assert cwd[-6] in ['/', '\\'],("Expected current working directory 'tests' to be lead by a '\\' or '/'", cwd[-6:], "from", cwd)
24
+ sys.path.insert( 0, cwd[:-6] )
25
+
26
+ # reload modules
27
+ import importlib as imp
28
+ modules = sys.modules.copy()
29
+ for name, mdata in modules.items():
30
+ if name[:len(me)] == me:
31
+ imp.reload(mdata)
32
+ print("Reloaded", name)
33
+ #import_local()
34
+
35
+ from cdxcore.jcpool import JCPool, Context
36
+ import numpy as np
37
+
38
+ class Test(unittest.TestCase):
39
+
40
+ def test_pool(self):
41
+
42
+ self.maxDiff = None
43
+
44
+ pool = JCPool(2)
45
+
46
+ class Channel(object):
47
+ """ utility to collect all traced messages """
48
+ def __init__(self):
49
+ self.messages = []
50
+ def __call__(self, msg, flush):
51
+ self.messages.append( msg )
52
+
53
+ def f( ticker, tdata, verbose : Context ):
54
+ # some made up results
55
+ q = np.quantile( tdata, 0.35, axis=0 )
56
+ tx = q[0]
57
+ ty = q[1]
58
+ # not in a unittest --> time.sleep( np.exp(tdata[0,0]) )
59
+ verbose.write(f"Result for {ticker}: {tx:.2f}, {ty:.2f}")
60
+ return tx, ty
61
+
62
+ np.random.seed(1231)
63
+ tickerdata =\
64
+ { 'SPY': np.random.normal(size=(1000,2)),
65
+ 'GLD': np.random.normal(size=(1000,2)),
66
+ 'BTC': np.random.normal(size=(1000,2))
67
+ }
68
+
69
+ # iterator mode
70
+ channel = Channel()
71
+ verbose_main = Context("all", channel=channel)
72
+
73
+ verbose_main.write("Launching analysis")
74
+ with pool.context( verbose_main ) as verbose:
75
+ for ticker, tx, ty in pool.parallel(
76
+ { ticker: pool.delayed(f)( ticker=ticker, tdata=tdata, verbose=verbose(2) )
77
+ for ticker, tdata in tickerdata.items() } ):
78
+ verbose.report(1,f"Returned {ticker} {tx:.2f}, {ty:.2f}")
79
+ verbose_main.write("Analysis done")
80
+
81
+ l = sorted( channel.messages )
82
+ self.assertEqual( str(l), r"['00: 01: Returned BTC -0.38, -0.42\n', '00: 01: Returned GLD -0.47, -0.42\n', '00: 01: Returned SPY -0.42, -0.41\n', '00: 02: Result for BTC: -0.38, -0.42\n', '00: 02: Result for GLD: -0.47, -0.42\n', '00: 02: Result for SPY: -0.42, -0.41\n', '00: Analysis done\n', '00: Launching analysis\n']")
83
+
84
+ # dict mode
85
+ channel = Channel()
86
+ verbose_main = Context("all", channel=channel)
87
+
88
+ verbose_main.write("Launching analysis")
89
+ with pool.context( verbose_main ) as verbose:
90
+ l = pool.parallel_to_dict(
91
+ { ticker: pool.delayed(f)( ticker=ticker, tdata=tdata, verbose=verbose(2) )
92
+ for ticker, tdata in tickerdata.items() } )
93
+ verbose_main.write("Analysis done")
94
+ self.assertEqual( type(l), dict )
95
+
96
+ l = sorted( channel.messages )
97
+ self.assertEqual( str(l), r"['00: 02: Result for BTC: -0.38, -0.42\n', '00: 02: Result for GLD: -0.47, -0.42\n', '00: 02: Result for SPY: -0.42, -0.41\n', '00: Analysis done\n', '00: Launching analysis\n']")
98
+
99
+ # list mode
100
+ channel = Channel()
101
+ verbose_main = Context("all", channel=channel)
102
+
103
+ verbose_main.write("Launching analysis")
104
+ with pool.context( verbose_main ) as verbose:
105
+ l = pool.parallel_to_list(
106
+ pool.delayed(f)( ticker=ticker, tdata=tdata, verbose=verbose(2) )
107
+ for ticker, tdata in tickerdata.items() )
108
+ verbose_main.write("Analysis done")
109
+ self.assertEqual( type(l), list )
110
+
111
+ l = sorted( channel.messages )
112
+ self.assertEqual( str(l), r"['00: 02: Result for BTC: -0.38, -0.42\n', '00: 02: Result for GLD: -0.47, -0.42\n', '00: 02: Result for SPY: -0.42, -0.41\n', '00: Analysis done\n', '00: Launching analysis\n']")
113
+
114
+ if __name__ == '__main__':
115
+ unittest.main()
116
+
117
+
118
+
119
+