cdxcore 0.1.10__py3-none-any.whl → 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of cdxcore might be problematic. Click here for more details.

cdxcore/jcpool.py CHANGED
@@ -1,34 +1,57 @@
1
- # -*- coding: utf-8 -*-
2
1
  """
3
- Simple multi-processing wrapper around the already great joblib.paralllel.
2
+ Overview
3
+ --------
4
+
5
+ Simple multi-processing conv wrapper around (already great)
6
+ `joblib.Parallel() <https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html>`__.
7
+
4
8
  The minor additions are that parallel processing will be a tad more convenient for dictionaries,
5
- and that it supports routing cdxbasics.cdxbasics.Context messaging via a Queue to a single thread.
9
+ and that it supports routing :class:`cdxcore.verbose.Context` messaging via a
10
+ :class:`multiprocessing.Queue` to a single thread.
11
+
12
+ Import
13
+ ------
14
+ .. code-block:: python
15
+
16
+ from cdxcore.jcpool import JCPool
17
+
18
+ Documentation
19
+ -------------
6
20
  """
7
21
 
8
- from joblib import Parallel as joblib_Parallel, delayed as jl_delayed
22
+ from joblib import Parallel as joblib_Parallel, delayed as _jl_delayed, cpu_count
9
23
  from multiprocessing import Manager, Queue
10
24
  from threading import Thread, get_ident as get_thread_id
11
25
  import gc as gc
12
26
  from collections import OrderedDict
13
27
  from collections.abc import Mapping, Callable, Sequence, Iterable
14
28
  import functools as functools
29
+ import uuid as uuid
30
+ import os as os
31
+ import datetime as datetime
15
32
 
16
33
  from .verbose import Context, Timer
17
34
  from .subdir import SubDir
35
+ from .uniquehash import unique_hash8
18
36
 
19
37
  class ParallelContextChannel( Context ):
20
38
  """
21
- Lightweight channel for cdxbasics.verbose.Context which is pickle'able
22
- Implements trivial Context channel() protocol.
39
+ Lightweight :class:`cdxcore.verbose.Context` ``channel`` which is pickle'able.
40
+
41
+ This channel sends messages it receives to a :class:`multiprocessing.Queue`.
23
42
  """
24
- def __init__(self, *, cid, maintid, queue):
43
+ def __init__(self, *, cid, maintid, queue, f_verbose):
25
44
  self._queue = queue
26
45
  self._cid = cid
27
46
  self._maintid = maintid
47
+ self._f_verbose = f_verbose
28
48
  def __call__(self, msg : str, flush : bool ):
29
- """ Context channel call (outside process) to send messages to 'report' """
49
+ """
50
+ Sends ``msg`` via a :class:`multiprocessing.Queue` to the main thread for
51
+ printing.
52
+ """
30
53
  if get_thread_id() == self._maintid:
31
- print(msg, end='', flush=True)
54
+ self._f_verbose._raw(msg,end='',flush=flush)
32
55
  else:
33
56
  return self._queue.put( (msg, flush) )
34
57
 
@@ -51,7 +74,12 @@ class _ParallelContextOperator( object ):
51
74
  self._queue = self._mgr.Queue()
52
75
  self._thread = Thread(target=self.report, kwargs=dict(cid=cid, queue=self._queue, f_verbose=f_verbose, verbose_interval=verbose_interval), daemon=True)
53
76
  self._mp_context = Context( f_verbose,
54
- channel=ParallelContextChannel( cid=self._cid, queue=self._queue, maintid=self._tid ) )
77
+ channel=ParallelContextChannel(
78
+ cid=self._cid,
79
+ queue=self._queue,
80
+ maintid=self._tid,
81
+ f_verbose=f_verbose
82
+ ) )
55
83
  self._thread.start()
56
84
  pool_verbose.write(f"done; this took {tme}.", head=False)
57
85
 
@@ -97,13 +125,12 @@ class _ParallelContextOperator( object ):
97
125
  raise r
98
126
  msg, flush = r
99
127
  if tme.interval_test(verbose_interval):
100
- print(msg, end='', flush=flush)
128
+ f_verbose._raw(msg,end='',flush=flush)
101
129
 
102
130
  def __enter__(self):
103
131
  return self.mp_context
104
132
 
105
133
  def __exit__(self, *kargs, **kwargs):
106
- #self.terminate()
107
134
  return False#raise exceptions
108
135
 
109
136
  class _DIF(object):
@@ -196,104 +223,251 @@ def _parallel_to_list(pool, jobs : Sequence ) -> Sequence:
196
223
  An list with the results in order of the input.
197
224
  """
198
225
  assert not isinstance( jobs, Mapping ), ("'jobs' is a Mapping. Use parallel_to_dict() instead.", type(jobs))
199
- r = _parallel_to_dict( pool, { i: j for i, j in enumerate(jobs) } )
200
- return list( r[i] for i in range(len(jobs)) )
226
+ lst = { i: j for i, j in enumerate(jobs) }
227
+ r = _parallel_to_dict( pool, lst )
228
+ return list( r[i] for i in lst )
201
229
 
202
230
  class JCPool( object ):
203
- """
204
- Parallel Job Context Pool
231
+ r"""
232
+ Parallel Job Context Pool.
205
233
 
206
- Simple wrapper around joblib.Parallel which allows using cdxbasics.verbose.Context objects seemlessly:
207
- use of any contexts from a different process will send messages via a Queue to the main process
234
+ Simple wrapper around `joblib.Parallel() <https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html>`__
235
+ which allows worker processes to use :class:`cdxcore.verbose.Context` to report
236
+ progress updates. For this purpose, :class:`cdxcore.verbose.Context`
237
+ will send output messages via a :class:`multiprocessing.Queue`
238
+ to the main process
208
239
  where a sepeate thread prints these messages out.
209
- Using a fixed pool object also avoids relaunching processes.
240
+
241
+ Using a fixed central pool object in your code base
242
+ avoids relaunching processes.
210
243
 
211
- Finally, the parallel pool allows working directly with dictionaries which is useful for asynchronous
212
- processing (which is the default).
244
+ Functions passed to :meth:`cdxcore.jcpool.JCPool.parallel` and related functions must
245
+ be decorated with :dec:`cdxcore.jcpool.JCPool.delayed`.
213
246
 
214
- Usage
215
- -----
216
- Assume we have a function such as:
247
+ **List/Generator Usage**
217
248
 
249
+ The following code is a standard prototype for using :func:`cdxcore.jcpool.JCPool.parallel`
250
+ following closely the `joblib paradigm <https://joblib.readthedocs.io/en/latest/parallel.html>`__:
251
+
252
+ .. code-block:: python
253
+
254
+ from cdxcore.verbose import Context
255
+ from cdxcore.jcpool import JCPool
256
+ import time as time
257
+ import numpy as np
258
+
259
+ pool = JCPool( num_workers=4 ) # global pool. Reuse where possible
260
+
218
261
  def f( ticker, tdata, verbose : Context ):
219
- #...
220
- tx = 0.
221
- ty = 1.
222
- verbose.write(f"Result for {ticker}: {tx}, {ty}")
223
- return tx, ty # tuple result for illustration
224
-
225
- List/Generator
226
- --------------
227
- Use the pool.context() context handler to convert a Context 'verbose' object into a multi-processing channel.
228
- Then pass a generator to pool.parallel
229
-
230
- pool = JPool( num_workers=4 )
262
+ # some made up function
263
+ q = np.quantile( tdata, 0.35, axis=0 )
264
+ tx = q[0]
265
+ ty = q[1]
266
+ time.sleep(0.5)
267
+ verbose.write(f"Result for {ticker}: {tx:.2f}, {ty:.2f}")
268
+ return tx, ty
269
+
270
+ tickerdata =\
271
+ { 'SPY': np.random.normal(size=(1000,2)),
272
+ 'GLD': np.random.normal(size=(1000,2)),
273
+ 'BTC': np.random.normal(size=(1000,2))
274
+ }
275
+
231
276
  verbose = Context("all")
232
- with pool.context( verbose ) as verbose:
233
- for tx, ty in pool.parallel( pool.delayed(f)( ticker=ticker, tdata=tdata, verbose=verbose ) for ticker, tdata in self.data.items() ):
234
- print(f"Returned {tx}, {ty}")
235
- print("Done")
277
+ with verbose.write_t("Launching analysis") as tme:
278
+ with pool.context( verbose ) as verbose:
279
+ for tx, ty in pool.parallel(
280
+ pool.delayed(f)( ticker=ticker, tdata=tdata, verbose=verbose(2) )
281
+ for ticker, tdata in tickerdata.items() ):
282
+ verbose.report(1,f"Returned {tx:.2f}, {ty:.2f}")
283
+ verbose.write(f"Analysis done; this took {tme}.")
236
284
 
237
- Dict
238
- ----
239
- Similar construct, but with a dictionary. Considering the asynchronous nature of the returned data it is often desirable
240
- to keep track of a result identifier. This is automated with the dictionary usage pattern:
285
+ The output from this code is asynchronous:
286
+
287
+ .. code-block:: python
288
+
289
+ 00: Launching analysis
290
+ 02: Result for SPY: -0.43, -0.39
291
+ 01: Returned -0.43, -0.39
292
+ 02: Result for BTC: -0.39, -0.45
293
+ 01: Returned -0.39, -0.45
294
+ 02: Result for GLD: -0.41, -0.43
295
+ 01: Returned -0.41, -0.43
296
+ 00: Analysis done; this took 0.73s.
297
+
298
+ **Dict**
299
+
300
+ Considering the asynchronous nature of the returned data it is often desirable
301
+ to keep track of results by some identifier. In above example ``ticker``
302
+ was not available in the main loop.
303
+ This pattern is automated with the dictionary usage pattern:
241
304
 
242
- pool = JPool( num_workers=4 )
305
+ .. code-block:: python
306
+ :emphasize-lines: 26,27,28,29
307
+
308
+ from cdxcore.verbose import Context
309
+ from cdxcore.jcpool import JCPool
310
+ import time as time
311
+ import numpy as np
312
+
313
+ pool = JCPool( num_workers=4 ) # global pool. Reuse where possible
314
+
315
+ def f( ticker, tdata, verbose : Context ):
316
+ # some made up function
317
+ q = np.quantile( tdata, 0.35, axis=0 )
318
+ tx = q[0]
319
+ ty = q[1]
320
+ time.sleep(0.5)
321
+ verbose.write(f"Result for {ticker}: {tx:.2f}, {ty:.2f}")
322
+ return tx, ty
323
+
324
+ tickerdata =\
325
+ { 'SPY': np.random.normal(size=(1000,2)),
326
+ 'GLD': np.random.normal(size=(1000,2)),
327
+ 'BTC': np.random.normal(size=(1000,2))
328
+ }
329
+
243
330
  verbose = Context("all")
244
- with pool.context( verbose ) as verbose:
245
- for ticker, tx, ty in pool.parallel( { ticker: pool.delayed(f)( ticker=ticker, tdata=tdata, verbose=verbose ) for ticker, tdata in self.data.items() } ):
246
- print(f"Returned {tx}, {ty} for {ticker}")
247
- print("Done")
248
-
249
- Note that pool.parallel when applied to a dictionary does not return a dictionary, but a sequence of tuples.
331
+ with verbose.write_t("Launching analysis") as tme:
332
+ with pool.context( verbose ) as verbose:
333
+ for ticker, tx, ty in pool.parallel(
334
+ { ticker: pool.delayed(f)( ticker=ticker, tdata=tdata, verbose=verbose(2) )
335
+ for ticker, tdata in tickerdata.items() } ):
336
+ verbose.report(1,f"Returned {ticker} {tx:.2f}, {ty:.2f}")
337
+ verbose.write(f"Analysis done; this took {tme}.")
338
+
339
+ This generates the following output::
340
+
341
+ 00: Launching analysis
342
+ 02: Result for SPY: -0.34, -0.41
343
+ 01: Returned SPY -0.34, -0.41
344
+ 02: Result for GLD: -0.38, -0.41
345
+ 01: Returned GLD -0.38, -0.41
346
+ 02: Result for BTC: -0.34, -0.32
347
+ 01: Returned BTC -0.34, -0.32
348
+ 00: Analysis done; this took 5s.
349
+
350
+ Note that :func:`cdxcore.jcpool.JCPool.parallel` when applied to a dictionary does not return a dictionary,
351
+ but a sequence of tuples.
250
352
  As in the example this also works if the function being called returns tuples itself; in this case the returned data
251
353
  is extended by the key of the dictionary provided.
252
354
 
253
- In order to retrieve a dictionary use
355
+ In order to retrieve a dictionary use :func:`cdxcore.jcpool.JCPool.parallel_to_dict`::
254
356
 
255
- pool = JPool( num_workers=4 )
256
357
  verbose = Context("all")
257
358
  with pool.context( verbose ) as verbose:
258
- r = pool.parallel_to_dict( { ticker: pool.delayed(f)( ticker=ticker, tdata=tdata, verbose=verbose ) for ticker, tdata in self.data.items() } )
259
- print("Done")
359
+ r = pool.parallel_to_dict( { ticker: pool.delayed(f)( ticker=ticker, tdata=tdata, verbose=verbose )
360
+ for ticker, tdata in self.data.items() } )
260
361
 
261
- Note that in this case the function returns after all items have been processed.
362
+ Note that in this case the function returns only after all jobs have been processed.
363
+
364
+ Parameters
365
+ ----------
366
+ num_workers : int, optional
367
+
368
+ The number of workers. If ``num_workers`` is ``1`` then no parallel process or thread is started.
369
+ Just as for `joblib <https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html>`__ you can
370
+ use a negative ``num_workers`` to set the number of workers to the ``number of CPUs + num_workers + 1``.
371
+ For example, a ``num_workers`` of ``-2`` will use as many jobs as CPUs are present less one.
372
+ If ``num_workers`` is negative, the effective number of workers will be at least ``1``.
373
+
374
+ Default is ``1``.
375
+
376
+ threading : bool, optional
377
+
378
+ If ``False``, the default, then the pool will act as a ``"loky"`` multi-process pool with the associated overhead
379
+ of managing data accross processes.
380
+
381
+ If ``True``, then the pool is a ``"threading"`` pool. This helps for functions whose code releases
382
+ Python's `global interpreter lock <https://wiki.python.org/moin/GlobalInterpreterLock>`__, for example
383
+ when engaged in heavy I/O or compiled code such as :mod:`numpy`., :mod:`pandas`,
384
+ or generated with `numba <https://numba.pydata.org/>`__.
385
+
386
+ tmp_root_dir : str | SubDir, optional
387
+
388
+ Temporary directory for memory mapping large arrays. This is a root directory; the function
389
+ will create a temporary sub-directory with a name generated from the current state of the system.
390
+ This sub-directory will be deleted upon destruction of ``JCPool`` or when :meth:`cdxcore.jcpool.JCPool.terminate`
391
+ is called.
392
+
393
+ This parameter can also be ``None`` in which case the `default behaviour <https://joblib.readthedocs.io/en/latest/generated/joblib.Parallel.html>`__
394
+ of :class:`joblib.Parallel` is used.
395
+
396
+ Default is ``"!/.cdxmp"``.
397
+
398
+ verbose : Context, optional
399
+
400
+ A :class:`cdxcore.verbose.Context` object used to print out multi-processing/threading information.
401
+ This is *not* the ``Context`` provided to child processes/threads.
402
+
403
+ Default is ``quiet``.
404
+
405
+ parallel_kwargs : dict, optional
406
+
407
+ Additional keywords for :class:`joblib.Parallel`.
408
+
262
409
  """
263
410
  def __init__(self, num_workers : int = 1,
264
411
  threading : bool = False,
265
- tmp_dir : str = "!/.cdxmp", *,
412
+ tmp_root_dir : str|SubDir= "!/.cdxmp", *,
266
413
  verbose : Context = Context.quiet,
267
414
  parallel_kwargs : dict = {} ):
268
415
  """
269
- Initialize a multi-processing pool. Thin wrapper aroud joblib.parallel for cdxbasics.verbose.Context() output
416
+ Initialize a multi-processing pool. Thin wrapper aroud joblib.parallel for cdxcore.verbose.Context() output
270
417
  """
418
+ tmp_dir_ext = unique_hash8( uuid.getnode(), os.getpid(), get_thread_id(), datetime.datetime.now() )
271
419
  num_workers = int(num_workers)
272
- self._tmp_dir = SubDir(tmp_dir, ext='')
420
+ tmp_root_dir = SubDir(tmp_root_dir) if not tmp_root_dir is None else None
421
+ self._tmp_dir = tmp_root_dir(tmp_dir_ext, ext='') if not tmp_root_dir is None else None
273
422
  self._verbose = verbose if not verbose is None else Context("quiet")
274
423
  self._threading = threading
275
- assert num_workers > 0, ("'num_workers' must be positive", num_workers)
424
+
425
+ if num_workers < 0:
426
+ num_workers = max( self.cpu_count() + num_workers + 1, 1 )
276
427
 
277
- with self._verbose.write_t(f"Launching {num_workers} processes with temporary path '{self.tmp_path}'... ", end='') as tme:
428
+ path_info = f" with temporary directory '{self.tmp_path}'" if not self.tmp_path is None else ''
429
+ with self._verbose.write_t(f"Launching {num_workers} processes{path_info}... ", end='') as tme:
278
430
  self._pool = joblib_Parallel( n_jobs=num_workers,
279
431
  backend="loky" if not threading else "threading",
280
432
  return_as="generator_unordered",
281
- temp_folder=self.tmp_path, **parallel_kwargs)
433
+ temp_folder=self.tmp_path,
434
+ **parallel_kwargs)
282
435
  self._verbose.write(f"done; this took {tme}.", head=False)
283
436
 
284
437
  def __del__(self):
285
438
  self.terminate()
286
439
 
287
440
  @property
288
- def tmp_path(self) -> str:
289
- return self._tmp_dir.path
441
+ def tmp_path(self) -> str|None:
442
+ """ Path to the temporary directory for this object. """
443
+ return self._tmp_dir.path if not self._tmp_dir is None else None
290
444
  @property
291
445
  def is_threading(self) -> bool:
446
+ """ Whether we are threading or mulit-processing. """
292
447
  return self._threading
448
+
449
+ @staticmethod
450
+ def cpu_count( only_physical_cores : bool = False ) -> int:
451
+ """
452
+ Return the number of physical CPUs.
453
+
454
+ Parameters
455
+ ----------
456
+ only_physical_cores : boolean, optional
457
+
458
+ If ``True``, does not take hyperthreading / SMT logical cores into account.
459
+ Default is ``False``.
460
+
461
+ Returns
462
+ -------
463
+ cpus : int
464
+ Count
465
+ """
466
+ return cpu_count(only_physical_cores=only_physical_cores)
293
467
 
294
468
  def terminate(self):
295
469
  """
296
- Stop the current parallel pool, and delete any temporary files.
470
+ Stop the current parallel pool, and delete any temporary files (if managed by ``JCPool``).
297
471
  """
298
472
  if not self._pool is None:
299
473
  tme = Timer()
@@ -301,14 +475,41 @@ class JCPool( object ):
301
475
  self._pool = None
302
476
  self._verbose.write(f"Shut down parallel pool. This took {tme}.")
303
477
  gc.collect()
304
- self._tmp_dir.eraseEverything(keepDirectory=True)
478
+ if not self._tmp_dir is None:
479
+ dir_name = self._tmp_dir.path
480
+ self._tmp_dir.delete_everything(keep_directory=False)
481
+ self._verbose.write(f"Deleted temporary directoru {dir_name}.")
305
482
 
306
483
  def context( self, verbose : Context, verbose_interval : float = None ):
307
484
  """
308
- Return a cdxbasics.verbose.Context object whose 'channel' is a queue towards a parallel thread.
309
- As a result the worker process is able to use 'verbose' as if it were in-process
485
+ Parallel processing ``Context`` object.
486
+
487
+ This function returns a :class:`cdxcore.verbose.Context` object whose ``channel`` is a queue towards a utility thread
488
+ which will outout all messages to ``verbose``.
489
+ As a result a worker process is able to use ``verbose`` as if it were in-process
490
+
491
+ A standard usage pattern is:
492
+
493
+ .. code-block:: python
494
+ :emphasize-lines: 13, 14
495
+
496
+ from cdxcore.verbose import Context
497
+ from cdxcore.jcpool import JCPool
498
+ import time as time
499
+ import numpy as np
500
+
501
+ pool = JCPool( num_workers=4 ) # global pool. Reuse where possible
502
+
503
+ def f( x, verbose : Context ):
504
+ verbose.write(f"Found {x}") # <- text "Found 1" etc will be sent
505
+ return x # to main thread via Queue
506
+
507
+ verbose = Context("all")
508
+ with pool.context( verbose ) as verbose:
509
+ for x in pool.parallel( pool.delayed(f)( x=x, verbose=verbose(1) ) for x in [1,2,3,4] ):
510
+ verbose.write(f"Returned {x}")
310
511
 
311
- See help(JCPool) for usage patterns.
512
+ See :class:`cdxcore.jcpool.JCPool` for more usage patterns.
312
513
  """
313
514
  if self._threading:
314
515
  return verbose
@@ -317,8 +518,8 @@ class JCPool( object ):
317
518
  verbose_interval=verbose_interval )
318
519
 
319
520
  @staticmethod
320
- def validate( F : Callable, args : list, kwargs : Mapping ):
321
- """ Check that 'args' and 'kwargs' do not contain Context objects without channel """
521
+ def _validate( F : Callable, args : list, kwargs : Mapping ):
522
+ """ Check that ``args`` and ``kwargs`` do not contain ``Context`` objects without channel """
322
523
  for k, v in enumerate(args):
323
524
  if isinstance(v, Context) and not isinstance(v.channel, ParallelContextChannel):
324
525
  raise RuntimeError(f"Argument #{k} for {F.__qualname__} is a Context object, but its channel is not set to 'ParallelContextChannel'. Use JPool.context().")
@@ -328,22 +529,29 @@ class JCPool( object ):
328
529
 
329
530
  def delayed(self, F : Callable):
330
531
  """
331
- Decorate a function F for parallel execution.
332
- Synthatical sugar aroud joblib.delayed().
333
- Checks that there are no Context arguments without ParallelContextChannel present.
532
+ Decorate a function for parallel execution.
533
+
534
+ This decorate adds minor synthatical sugar on top of :func:`joblib.delayed`
535
+ (which in turn is discussed `here <https://joblib.readthedocs.io/en/latest/parallel.html#parallel>`__).
536
+
537
+ When called, this decorator checks that no :class:`cdxcore.verbose.Context`
538
+ arguments are passed to the pooled function which have no ``ParallelContextChannel`` present. In other words,
539
+ the function detects if the user forgot to use :meth:`cdxcore.jcpool.JCPool.context`.
334
540
 
335
541
  Parameters
336
542
  ----------
337
- F : function.
543
+ F : Callable
544
+ Function.
338
545
 
339
546
  Returns
340
547
  -------
341
- Decorated function.
548
+ wrapped F : Callable
549
+ Decorated function.
342
550
  """
343
551
  if self._threading:
344
- return jl_delayed(F)
552
+ return _jl_delayed(F)
345
553
  def delayed_function( *args, **kwargs ):
346
- JCPool.validate( F, args, kwargs )
554
+ JCPool._validate( F, args, kwargs )
347
555
  return F, args, kwargs # mimic joblin.delayed()
348
556
  try:
349
557
  delayed_function = functools.wraps(F)(delayed_function)
@@ -351,61 +559,84 @@ class JCPool( object ):
351
559
  " functools.wraps fails on some callable objects "
352
560
  return delayed_function
353
561
 
354
- def parallel(self, jobs : Iterable) -> Iterable:
562
+ def parallel(self, jobs : Sequence|Mapping) -> Iterable:
355
563
  """
356
- Process 'jobs' in parallel using the current multiprocessing pool.
357
- All (function) values of 'jobs' must be generated using self.delayed.
358
- See help(JCPool) for usage patterns.
564
+ Process a number of jobs in parallel using the current multiprocessing pool.
565
+
566
+ All functions used in ``jobs`` must have been decorated using :dec:`cdxcore.jcpool.JCPool.delayed`.
567
+
568
+ This function returns an iterator which yields results as soon as they
569
+ are computed.
570
+
571
+ If ``jobs`` is a ``Sequence`` you can also use
572
+ :meth:`cdxcore.jcpool.JCPool.parallel_to_list` to retrieve
573
+ a :class:`list` of all results upon completion of the last job. Similarly, if ``jobs``
574
+ is a ``Mapping``, use :meth:`cdxcore.jcpool.JCPool.parallel_to_dict` to retrieve
575
+ a :class:`dict` of results upon completion of the last job.
359
576
 
360
577
  Parameters
361
578
  ----------
362
- jobs:
363
- can be a sequence, a generator, or a dictionary.
364
- Each function value must have been generated using JCPool.delayed()
579
+ jobs : Sequence | Mapping
580
+ Can be a :class:`Sequence` containing ``Callable`` functions,
581
+ or a :class:`Mapping` whose values are ``Callable`` functions.
582
+
583
+ Each ``Callable`` used as part of either must
584
+ have been decorated with :dec:`cdxcore.jcpool.JCPool.delayed`.
365
585
 
366
586
  Returns
367
587
  -------
368
- An iterator which yields results as soon as they are available.
369
- If 'jobs' is a dictionary, then the resutling iterator will generate tuples with the first
370
- element equal to the dictionary key of the respective function job.
588
+ parallel : Iterator
589
+ An iterator which yields results as soon as they are available.
590
+ If ``jobs`` is a :class:`Mapping`, then the resutling iterator will generate tuples with the first
591
+ element equal to the mapping key of the respective function job. This function will *not*
592
+ return a dictionary.
371
593
  """
372
594
  return _parallel( self._pool, jobs )
373
595
 
374
- def parallel_to_dict(self, jobs : Mapping) -> Mapping:
596
+ def parallel_to_dict(self, jobs : Mapping) -> dict:
375
597
  """
376
- Process 'jobs' in parallel using the current multiprocessing pool.
377
- All values of the dictionary 'jobs' must be generated using self.delayed.
378
- This function awaits the calculation of all elements of 'jobs' and
379
- returns a dictionary with the results.
598
+ Process a number of jobs in parallel using the current multiprocessing pool,
599
+ and return all results in a dictionary upon completion.
600
+
601
+ This function awaits the calculation of all elements of ``jobs`` and
602
+ returns a :class:`dict` with the results.
380
603
 
381
- See help(JCPool) for usage patterns.
382
-
383
604
  Parameters
384
605
  ----------
385
- jobs:
386
- A dictionary where all (function) values must have been generated using JCPool.delayed.
606
+ jobs : Mapping
607
+ A dictionary where all (function) values must have been decorated
608
+ with :dec:`cdxcore.jcpool.JCPool.delayed`.
387
609
 
388
610
  Returns
389
611
  -------
390
- A dictionary with results.
391
- If 'jobs' is an OrderedDict, then this function will return an OrderedDict
392
- with the same order as 'jobs'.
612
+ Results : dict
613
+ A dictionary with results.
614
+
615
+ If ``jobs`` is an :class:`OrderedDict`, then this function will return an :class:`OrderedDict`
616
+ with the same order as ``jobs``. Otherwise the elements of the ``dict`` returned
617
+ by this function are in completion order.
393
618
  """
394
619
  return _parallel_to_dict( self._pool, jobs )
395
620
 
396
621
  def parallel_to_list(self, jobs : Sequence ) -> Sequence:
397
622
  """
398
- Call parallel() and convert the resulting generator into a list.
399
-
623
+ Process a number of jobs in parallel using the current multiprocessing pool,
624
+ and return all results in a list upon completion.
625
+
626
+ This function awaits the calculation of all elements of ``jobs`` and
627
+ returns a :class:`list` with the results.
628
+
400
629
  Parameters
401
630
  ----------
402
- jobs:
403
- can be a sequence, a generator, or a dictionary.
404
- Each function value must have been generated using JCPool.delayed()
631
+ jobs : Sequence
632
+ An sequence of ``Callable`` functions, each of which
633
+ must have been decorated
634
+ with :dec:`cdxcore.jcpool.JCPool.delayed`.
405
635
 
406
636
  Returns
407
637
  -------
408
- An list with the results in order of the input.
638
+ Results : list
639
+ A list with results, in the order of ``jobs``.
409
640
  """
410
641
  return _parallel_to_list( self._pool, jobs )
411
642
 
cdxcore/subdir.py CHANGED
@@ -1786,7 +1786,7 @@ class SubDir(object):
1786
1786
  # write to temp file, then rename into target file
1787
1787
  # this reduces collision when i/o operations are slow
1788
1788
  full_file_name = self.full_file_name(file,ext=ext)
1789
- tmp_file = unique_hash48( [ file, uuid.getnode(), os.getpid(), threading.get_ident(), datetime.datetime.now() ] )
1789
+ tmp_file = unique_hash48( file, uuid.getnode(), os.getpid(), threading.get_ident(), datetime.datetime.now() )
1790
1790
  tmp_i = 0
1791
1791
  fullTmpFile = self.full_file_name(tmp_file,ext="tmp" if not ext=="tmp" else "_tmp")
1792
1792
  while os.path.exists(fullTmpFile):