datastock 0.0.32__py3-none-any.whl → 0.0.34__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datastock/_class0.py +62 -50
- datastock/_class1_binning.py +169 -142
- datastock/_generic_utils.py +338 -82
- datastock/_saveload.py +220 -115
- datastock/version.py +1 -1
- {datastock-0.0.32.dist-info → datastock-0.0.34.dist-info}/METADATA +1 -1
- {datastock-0.0.32.dist-info → datastock-0.0.34.dist-info}/RECORD +10 -10
- {datastock-0.0.32.dist-info → datastock-0.0.34.dist-info}/WHEEL +1 -1
- {datastock-0.0.32.dist-info → datastock-0.0.34.dist-info}/LICENSE +0 -0
- {datastock-0.0.32.dist-info → datastock-0.0.34.dist-info}/top_level.txt +0 -0
datastock/_generic_utils.py
CHANGED
@@ -1,8 +1,14 @@
|
|
1
1
|
|
2
2
|
|
3
|
+
import itertools as itt
|
4
|
+
from copy import deepcopy
|
5
|
+
|
6
|
+
|
3
7
|
import numpy as np
|
4
8
|
import scipy.sparse as scpsp
|
5
9
|
import astropy.units as asunits
|
10
|
+
from functools import reduce # forward compatibility for Python 3
|
11
|
+
import operator
|
6
12
|
|
7
13
|
|
8
14
|
from . import _generic_check
|
@@ -407,8 +413,14 @@ def compare_dict(
|
|
407
413
|
return _compare_dict_verb_return(dkeys, returnas, verb)
|
408
414
|
|
409
415
|
|
410
|
-
def compare_obj(
|
411
|
-
|
416
|
+
def compare_obj(
|
417
|
+
obj0=None,
|
418
|
+
obj1=None,
|
419
|
+
excluded=None,
|
420
|
+
returnas=None,
|
421
|
+
verb=None,
|
422
|
+
):
|
423
|
+
""" Compare the content of 2 instances """
|
412
424
|
|
413
425
|
# -----------
|
414
426
|
# Check class
|
@@ -423,8 +435,8 @@ def compare_obj(obj0=None, obj1=None, returnas=None, verb=None):
|
|
423
435
|
# Check
|
424
436
|
|
425
437
|
return compare_dict(
|
426
|
-
d0=obj0.to_dict(),
|
427
|
-
d1=obj1.to_dict(),
|
438
|
+
d0=obj0.to_dict(excluded=excluded, returnas='values', copy=False),
|
439
|
+
d1=obj1.to_dict(excluded=excluded, returnas='values', copy=False),
|
428
440
|
dname=None,
|
429
441
|
returnas=returnas,
|
430
442
|
verb=verb,
|
@@ -437,34 +449,255 @@ def compare_obj(obj0=None, obj1=None, returnas=None, verb=None):
|
|
437
449
|
# ###############################################################
|
438
450
|
|
439
451
|
|
440
|
-
def
|
441
|
-
|
442
|
-
|
452
|
+
def to_dict(
|
453
|
+
coll=None,
|
454
|
+
flatten=None,
|
443
455
|
sep=None,
|
456
|
+
excluded=None,
|
457
|
+
# copy vs ref
|
444
458
|
asarray=None,
|
445
|
-
|
446
|
-
|
459
|
+
copy=None,
|
460
|
+
# dtypes
|
461
|
+
returnas=None,
|
447
462
|
):
|
448
|
-
""" Return a flattened version of the input dict """
|
449
463
|
|
450
464
|
# ------------
|
451
465
|
# check inputs
|
452
466
|
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
467
|
+
flatten = _generic_check._check_var(
|
468
|
+
flatten, 'flatten',
|
469
|
+
default=True,
|
470
|
+
types=bool,
|
471
|
+
)
|
472
|
+
|
473
|
+
returnas = _generic_check._check_var(
|
474
|
+
returnas, 'returnas',
|
475
|
+
default='types',
|
457
476
|
types=str,
|
477
|
+
allowed=['types', 'values', 'both', 'blended'],
|
478
|
+
)
|
479
|
+
|
480
|
+
# ----------------------
|
481
|
+
# get flat key/type tree
|
482
|
+
|
483
|
+
dtypes, sep = flatten_dict_keys(
|
484
|
+
din=coll,
|
485
|
+
parent_key=None,
|
486
|
+
sep=sep,
|
487
|
+
excluded=excluded,
|
488
|
+
)
|
489
|
+
|
490
|
+
if returnas == 'types':
|
491
|
+
if flatten is False:
|
492
|
+
return reshape_dict(dtypes, sep=sep)
|
493
|
+
else:
|
494
|
+
return dtypes
|
495
|
+
|
496
|
+
# ---------------------------
|
497
|
+
# Get list of dict attributes
|
498
|
+
|
499
|
+
dout = dict_from_dtypes(
|
500
|
+
coll,
|
501
|
+
dtypes=dtypes,
|
502
|
+
flatten=flatten,
|
503
|
+
sep=sep,
|
504
|
+
asarray=asarray,
|
505
|
+
copy=copy,
|
458
506
|
)
|
459
507
|
|
508
|
+
# ---------
|
509
|
+
# return
|
510
|
+
|
511
|
+
if returnas == 'blended':
|
512
|
+
return _blend_dicts(
|
513
|
+
dbase=dout,
|
514
|
+
dextra=dtypes,
|
515
|
+
extra_key='__type',
|
516
|
+
flatten=flatten,
|
517
|
+
)
|
518
|
+
if returnas == 'both':
|
519
|
+
return dtypes, dout
|
520
|
+
else:
|
521
|
+
return dout
|
522
|
+
|
523
|
+
|
524
|
+
def _flatten_dict_check(
|
525
|
+
din=None,
|
526
|
+
parent_key=None,
|
527
|
+
sep=None,
|
528
|
+
excluded=None,
|
529
|
+
):
|
530
|
+
# ------------
|
531
|
+
# check inputs
|
532
|
+
|
533
|
+
# sep
|
534
|
+
if sep is not None:
|
535
|
+
sep = _generic_check._check_var(
|
536
|
+
sep, 'sep',
|
537
|
+
default='.',
|
538
|
+
types=str,
|
539
|
+
)
|
540
|
+
|
460
541
|
# parent_key
|
461
542
|
if parent_key is not None:
|
462
543
|
parent_key = _generic_check._check_var(
|
463
544
|
parent_key, 'parent_key',
|
464
|
-
default='
|
465
|
-
types=str,
|
545
|
+
default=('',),
|
546
|
+
types=(str, tuple),
|
466
547
|
)
|
467
548
|
|
549
|
+
# excluded
|
550
|
+
if excluded is not None:
|
551
|
+
if isinstance(excluded, str):
|
552
|
+
excluded = ((excluded,),)
|
553
|
+
|
554
|
+
if not isinstance(excluded, (list, tuple)):
|
555
|
+
msg = "Arg excluded must be a tuple of tuples of str!"
|
556
|
+
raise Exception(msg)
|
557
|
+
|
558
|
+
if any([isinstance(ss, (str, list)) for ss in excluded]):
|
559
|
+
excluded = tuple([
|
560
|
+
tuple(ss) if isinstance(ss, list)
|
561
|
+
else (ss if isinstance(ss, tuple) else (ss,))
|
562
|
+
for ss in excluded
|
563
|
+
])
|
564
|
+
|
565
|
+
c0 = (
|
566
|
+
isinstance(excluded, tuple)
|
567
|
+
and all([isinstance(tt, tuple) for tt in excluded])
|
568
|
+
and all([all([isinstance(ss, str) for ss in tt]) for tt in excluded])
|
569
|
+
)
|
570
|
+
if not c0:
|
571
|
+
msg = "Arg excluded must be a tuple of tuples of str!"
|
572
|
+
raise Exception(msg)
|
573
|
+
|
574
|
+
return parent_key, sep, excluded
|
575
|
+
|
576
|
+
|
577
|
+
def flatten_dict_keys(
|
578
|
+
din=None,
|
579
|
+
parent_key=None,
|
580
|
+
sep=None,
|
581
|
+
excluded=None,
|
582
|
+
):
|
583
|
+
""" Return a flattened version of the input dict keys"""
|
584
|
+
|
585
|
+
# ------------
|
586
|
+
# check inputs
|
587
|
+
# ------------
|
588
|
+
|
589
|
+
parent_key, sep, excluded = _flatten_dict_check(
|
590
|
+
din=din,
|
591
|
+
parent_key=parent_key,
|
592
|
+
sep=sep,
|
593
|
+
excluded=excluded,
|
594
|
+
)
|
595
|
+
|
596
|
+
# ------------
|
597
|
+
# top level
|
598
|
+
# ------------
|
599
|
+
|
600
|
+
if isinstance(din, dict):
|
601
|
+
|
602
|
+
dkeys = {}
|
603
|
+
for k0, v0 in din.items():
|
604
|
+
|
605
|
+
# key
|
606
|
+
if parent_key is None:
|
607
|
+
key = (k0,)
|
608
|
+
else:
|
609
|
+
key = tuple([k1 for k1 in parent_key] + [k0])
|
610
|
+
|
611
|
+
# value
|
612
|
+
if isinstance(v0, dict):
|
613
|
+
dkeys.update(
|
614
|
+
flatten_dict_keys(
|
615
|
+
v0,
|
616
|
+
key,
|
617
|
+
sep=None,
|
618
|
+
excluded=excluded,
|
619
|
+
)[0]
|
620
|
+
)
|
621
|
+
|
622
|
+
else:
|
623
|
+
|
624
|
+
# get class
|
625
|
+
if excluded is None or key not in excluded:
|
626
|
+
dkeys[key] = v0.__class__.__name__
|
627
|
+
|
628
|
+
else:
|
629
|
+
dkeys = {}
|
630
|
+
lk0 = [
|
631
|
+
k0 for k0 in dir(din)
|
632
|
+
if k0 != '__dict__'
|
633
|
+
and '__dlinks' not in k0
|
634
|
+
and not (
|
635
|
+
hasattr(din.__class__, k0)
|
636
|
+
and isinstance(getattr(din.__class__, k0), property)
|
637
|
+
)
|
638
|
+
and isinstance(getattr(din, k0), dict)
|
639
|
+
]
|
640
|
+
for k0 in lk0:
|
641
|
+
dkeys.update(flatten_dict_keys(
|
642
|
+
getattr(din, k0),
|
643
|
+
parent_key=(k0,),
|
644
|
+
sep=None,
|
645
|
+
excluded=excluded,
|
646
|
+
)[0])
|
647
|
+
|
648
|
+
# ---------------------
|
649
|
+
# format keys using sep
|
650
|
+
# ---------------------
|
651
|
+
|
652
|
+
if sep is not None:
|
653
|
+
|
654
|
+
# --------------------
|
655
|
+
# safety check vs sep
|
656
|
+
|
657
|
+
# dict of non-conform keys
|
658
|
+
dkout = {
|
659
|
+
k0: v0 for k0, v0 in dkeys.items()
|
660
|
+
if any([sep in k1 for k1 in k0])
|
661
|
+
}
|
662
|
+
|
663
|
+
# error msg
|
664
|
+
if len(dkout) > 0:
|
665
|
+
lstr = [f"\t- {k0}: {v0}" for k0, v0 in dkout.items()]
|
666
|
+
msg = (
|
667
|
+
f"The following keys already have the desired sep '{sep}':\n"
|
668
|
+
+ '\n'.join(lstr)
|
669
|
+
)
|
670
|
+
raise Exception(msg)
|
671
|
+
|
672
|
+
# ----------
|
673
|
+
# formatting
|
674
|
+
|
675
|
+
dkeys = {sep.join(k0): v0 for k0, v0 in dkeys.items()}
|
676
|
+
|
677
|
+
return dkeys, sep
|
678
|
+
|
679
|
+
|
680
|
+
def getFromDict(dataDict, mapList):
|
681
|
+
return reduce(operator.getitem, mapList, dataDict)
|
682
|
+
|
683
|
+
|
684
|
+
def setInDict(dataDict, mapList, value):
|
685
|
+
getFromDict(dataDict, mapList[:-1])[mapList[-1]] = value
|
686
|
+
|
687
|
+
|
688
|
+
def dict_from_dtypes(
|
689
|
+
coll=None,
|
690
|
+
dtypes=None,
|
691
|
+
flatten=None,
|
692
|
+
sep=None,
|
693
|
+
asarray=None,
|
694
|
+
copy=None,
|
695
|
+
):
|
696
|
+
""" Assumes dtypes is flat """
|
697
|
+
|
698
|
+
# -------------------------
|
699
|
+
# check inputs
|
700
|
+
|
468
701
|
# asarray
|
469
702
|
asarray = _generic_check._check_var(
|
470
703
|
asarray, 'asarray',
|
@@ -472,99 +705,120 @@ def flatten_dict(
|
|
472
705
|
types=bool,
|
473
706
|
)
|
474
707
|
|
475
|
-
|
476
|
-
|
477
|
-
with_types, 'with_types',
|
708
|
+
copy = _generic_check._check_var(
|
709
|
+
copy, 'copy',
|
478
710
|
default=False,
|
479
711
|
types=bool,
|
480
712
|
)
|
481
713
|
|
482
|
-
#
|
483
|
-
|
484
|
-
type_str, 'type_str',
|
485
|
-
default='__type',
|
486
|
-
types=str,
|
487
|
-
)
|
714
|
+
# -------------------------
|
715
|
+
# get flat / unflat version
|
488
716
|
|
489
|
-
#
|
490
|
-
|
717
|
+
# keys
|
718
|
+
if sep is None:
|
719
|
+
lkeys = sorted(dtypes.keys())
|
720
|
+
else:
|
721
|
+
lkeys = sorted([k0.split(sep) for k0 in dtypes.keys()])
|
491
722
|
|
492
|
-
|
493
|
-
|
723
|
+
# -----------
|
724
|
+
# build
|
494
725
|
|
495
|
-
#
|
496
|
-
|
726
|
+
# initialize dout
|
727
|
+
if flatten is True:
|
728
|
+
dout = {}
|
729
|
+
else:
|
730
|
+
dout = dict(reshape_dict(dtypes, sep=sep))
|
497
731
|
|
498
|
-
|
499
|
-
for k0
|
732
|
+
# loop on all keys from (flat) dtypes
|
733
|
+
for k0 in lkeys:
|
500
734
|
|
501
|
-
#
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
v0,
|
512
|
-
key,
|
513
|
-
sep=sep,
|
514
|
-
asarray=asarray,
|
515
|
-
with_types=with_types,
|
516
|
-
type_str=type_str,
|
517
|
-
).items()
|
518
|
-
)
|
735
|
+
# get value from coll
|
736
|
+
for ii, k1 in enumerate(k0):
|
737
|
+
if ii == 0:
|
738
|
+
out = getattr(coll, k1)
|
739
|
+
else:
|
740
|
+
out = out[k1]
|
741
|
+
|
742
|
+
# asarray
|
743
|
+
if asarray is True:
|
744
|
+
out = np.asarray(out)
|
519
745
|
|
746
|
+
# set value in dout
|
747
|
+
if flatten is True:
|
748
|
+
if sep is None:
|
749
|
+
dout[k0] = out
|
750
|
+
else:
|
751
|
+
dout[sep.join(k0)] = out
|
520
752
|
else:
|
753
|
+
setInDict(dout, k0, out)
|
754
|
+
|
755
|
+
# ---------------
|
756
|
+
# prepare output
|
521
757
|
|
522
|
-
|
523
|
-
|
758
|
+
if copy is True:
|
759
|
+
return deepcopy(dout)
|
760
|
+
else:
|
761
|
+
return dout
|
524
762
|
|
525
|
-
# store type?
|
526
|
-
if with_types:
|
527
|
-
dtypes[f'{key}{type_str}'] = cc
|
528
763
|
|
529
|
-
|
530
|
-
|
764
|
+
def _blend_dicts(
|
765
|
+
dbase=None,
|
766
|
+
dextra=None,
|
767
|
+
extra_key=None,
|
768
|
+
flatten=None,
|
769
|
+
):
|
531
770
|
|
532
|
-
|
533
|
-
|
534
|
-
v0 = str(v0)
|
771
|
+
# -------------
|
772
|
+
# check inputs
|
535
773
|
|
536
|
-
|
774
|
+
extra_key = _generic_check._check_var(
|
775
|
+
extra_key, 'extra_key',
|
776
|
+
default='__type',
|
777
|
+
types=str,
|
778
|
+
)
|
537
779
|
|
538
|
-
|
539
|
-
items.append((key, v0))
|
780
|
+
assert isinstance(dbase, dict) and isinstance(dextra, dict)
|
540
781
|
|
541
|
-
#
|
542
|
-
#
|
782
|
+
# ------------
|
783
|
+
# blend
|
784
|
+
|
785
|
+
if flatten is True:
|
786
|
+
|
787
|
+
for k0, v0 in dextra.items():
|
788
|
+
dbase[f"{k0}{extra_key}"] = v0
|
543
789
|
|
544
|
-
if with_types:
|
545
|
-
dout = dict(items)
|
546
|
-
dout.update(dtypes)
|
547
|
-
return dout
|
548
790
|
else:
|
549
|
-
|
791
|
+
msg = "Blended dict not implement for flatten = False"
|
792
|
+
raise NotImplementedError(msg)
|
793
|
+
|
794
|
+
return dbase
|
550
795
|
|
551
796
|
|
552
797
|
def _reshape_dict(k0, v0, dinit={}, sep=None):
|
553
798
|
""" Populate dinit """
|
554
799
|
|
555
|
-
|
556
|
-
|
800
|
+
assert isinstance(dinit, dict), dinit
|
801
|
+
|
802
|
+
if sep is None:
|
803
|
+
lk = k0
|
804
|
+
else:
|
805
|
+
lk = k0.split(sep)
|
806
|
+
|
807
|
+
k0 = lk[0]
|
557
808
|
|
558
809
|
if len(lk) == 2:
|
559
810
|
if k0 not in dinit.keys():
|
560
811
|
dinit[k0] = {}
|
561
|
-
assert isinstance(dinit[k0], dict)
|
812
|
+
assert isinstance(dinit[k0], dict), (k0, dinit[k0])
|
562
813
|
dinit[k0].update({lk[1]: v0})
|
563
814
|
|
564
815
|
elif len(lk) > 2:
|
565
816
|
if k0 not in dinit.keys():
|
566
817
|
dinit[k0] = {}
|
567
|
-
|
818
|
+
assert isinstance(dinit[k0], dict), (k0, dinit[k0])
|
819
|
+
|
820
|
+
knew = lk[1:] if sep is None else sep.join(lk[1:])
|
821
|
+
_reshape_dict(knew, v0, dinit=dinit[k0], sep=sep)
|
568
822
|
|
569
823
|
else:
|
570
824
|
assert k0 not in dinit.keys()
|
@@ -577,18 +831,20 @@ def reshape_dict(din, sep=None):
|
|
577
831
|
# ------------
|
578
832
|
# check inputs
|
579
833
|
|
580
|
-
sep
|
581
|
-
sep
|
582
|
-
|
583
|
-
|
584
|
-
|
834
|
+
if sep is not None:
|
835
|
+
sep = _generic_check._check_var(
|
836
|
+
sep, 'sep',
|
837
|
+
default='.',
|
838
|
+
types=str,
|
839
|
+
)
|
585
840
|
|
586
|
-
#
|
841
|
+
# ------------------------
|
587
842
|
# Get all individual keys
|
588
843
|
|
589
844
|
dout = {}
|
590
845
|
for k0, v0 in din.items():
|
591
846
|
_reshape_dict(k0, v0, dinit=dout, sep=sep)
|
847
|
+
|
592
848
|
return dout
|
593
849
|
|
594
850
|
|
@@ -601,7 +857,7 @@ def reshape_dict(din, sep=None):
|
|
601
857
|
def KnuthMorrisPratt(text, pattern):
|
602
858
|
|
603
859
|
""" Yields all starting positions of copies of the pattern in the sequence
|
604
|
-
|
860
|
+
|
605
861
|
Calling conventions are similar to string.find, but its arguments can be
|
606
862
|
lists or iterators, not just strings, it returns all matches, not just
|
607
863
|
the first one, and it does not need the whole text in memory at once.
|