dcicutils 8.13.3.1b27__py3-none-any.whl → 8.13.3.1b30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dcicutils/misc_utils.py +60 -43
- dcicutils/structured_data.py +6 -4
- {dcicutils-8.13.3.1b27.dist-info → dcicutils-8.13.3.1b30.dist-info}/METADATA +1 -1
- {dcicutils-8.13.3.1b27.dist-info → dcicutils-8.13.3.1b30.dist-info}/RECORD +7 -7
- {dcicutils-8.13.3.1b27.dist-info → dcicutils-8.13.3.1b30.dist-info}/LICENSE.txt +0 -0
- {dcicutils-8.13.3.1b27.dist-info → dcicutils-8.13.3.1b30.dist-info}/WHEEL +0 -0
- {dcicutils-8.13.3.1b27.dist-info → dcicutils-8.13.3.1b30.dist-info}/entry_points.txt +0 -0
dcicutils/misc_utils.py
CHANGED
@@ -990,6 +990,10 @@ def to_integer(value: str,
|
|
990
990
|
allow_commas: bool = False,
|
991
991
|
allow_multiplier_suffix: bool = False,
|
992
992
|
fallback: Optional[Union[int, float]] = None) -> Optional[int]:
|
993
|
+
"""
|
994
|
+
Converts the given string value to an int or None or the given fallback value if malformed.
|
995
|
+
See comments in to_number for details on the other arguments.
|
996
|
+
"""
|
993
997
|
return to_number(value, fallback=fallback, as_float=False,
|
994
998
|
allow_commas=allow_commas,
|
995
999
|
allow_multiplier_suffix=allow_multiplier_suffix)
|
@@ -999,25 +1003,33 @@ def to_float(value: str,
|
|
999
1003
|
allow_commas: bool = False,
|
1000
1004
|
allow_multiplier_suffix: bool = False,
|
1001
1005
|
fallback: Optional[Union[int, float]] = None) -> Optional[int]:
|
1006
|
+
"""
|
1007
|
+
Converts the given string value to a float or None or the given fallback value if malformed.
|
1008
|
+
See comments in to_number for details on the other arguments.
|
1009
|
+
"""
|
1002
1010
|
return to_number(value, fallback=fallback, as_float=True,
|
1003
1011
|
allow_commas=allow_commas,
|
1004
1012
|
allow_multiplier_suffix=allow_multiplier_suffix)
|
1005
1013
|
|
1006
1014
|
|
1007
|
-
|
1008
|
-
|
1009
|
-
|
1010
|
-
|
1015
|
+
_TO_NUMBER_POWER_OF_TEN_FOR_NOTHING = 0
|
1016
|
+
_TO_NUMBER_POWER_OF_TEN_FOR_K = 3
|
1017
|
+
_TO_NUMBER_POWER_OF_TEN_FOR_M = 6
|
1018
|
+
_TO_NUMBER_POWER_OF_TEN_FOR_G = 9
|
1019
|
+
_TO_NUMBER_POWER_OF_TEN_FOR_T = 12
|
1011
1020
|
|
1012
1021
|
_TO_NUMBER_MULTIPLIER_SUFFIXES = {
|
1013
|
-
"K":
|
1014
|
-
"KB":
|
1015
|
-
"M":
|
1016
|
-
"MB":
|
1017
|
-
"G":
|
1018
|
-
"GB":
|
1019
|
-
"T":
|
1020
|
-
"TB":
|
1022
|
+
"K": _TO_NUMBER_POWER_OF_TEN_FOR_K,
|
1023
|
+
"KB": _TO_NUMBER_POWER_OF_TEN_FOR_K,
|
1024
|
+
"M": _TO_NUMBER_POWER_OF_TEN_FOR_M,
|
1025
|
+
"MB": _TO_NUMBER_POWER_OF_TEN_FOR_M,
|
1026
|
+
"G": _TO_NUMBER_POWER_OF_TEN_FOR_G,
|
1027
|
+
"GB": _TO_NUMBER_POWER_OF_TEN_FOR_G,
|
1028
|
+
"T": _TO_NUMBER_POWER_OF_TEN_FOR_T,
|
1029
|
+
"TB": _TO_NUMBER_POWER_OF_TEN_FOR_T,
|
1030
|
+
# B means bytes or bases and BP means base pairs; needs to be last.
|
1031
|
+
"B": _TO_NUMBER_POWER_OF_TEN_FOR_NOTHING,
|
1032
|
+
"BP": _TO_NUMBER_POWER_OF_TEN_FOR_NOTHING
|
1021
1033
|
}
|
1022
1034
|
|
1023
1035
|
|
@@ -1028,7 +1040,7 @@ def to_number(value: str,
|
|
1028
1040
|
fallback: Optional[Union[int, float]] = None) -> Optional[Union[int, float]]:
|
1029
1041
|
"""
|
1030
1042
|
Converts the given string value to an int, or float if as_float is True,
|
1031
|
-
or None or the
|
1043
|
+
or None or the given fallback value if malformed.
|
1032
1044
|
|
1033
1045
|
If allow_commas is True then allows appropriately placed commas (i.e. every three digits).
|
1034
1046
|
|
@@ -1050,7 +1062,7 @@ def to_number(value: str,
|
|
1050
1062
|
else:
|
1051
1063
|
return value if isinstance(value, int) else fallback
|
1052
1064
|
|
1053
|
-
value_multiplier =
|
1065
|
+
value_multiplier = _TO_NUMBER_POWER_OF_TEN_FOR_NOTHING
|
1054
1066
|
value_fraction = None
|
1055
1067
|
value_negative = False
|
1056
1068
|
|
@@ -1066,29 +1078,27 @@ def to_number(value: str,
|
|
1066
1078
|
value_upper = value.upper()
|
1067
1079
|
for suffix in _TO_NUMBER_MULTIPLIER_SUFFIXES:
|
1068
1080
|
if value_upper.endswith(suffix):
|
1069
|
-
value_multiplier
|
1081
|
+
value_multiplier = _TO_NUMBER_MULTIPLIER_SUFFIXES[suffix]
|
1070
1082
|
if not (value := value[:-len(suffix)].strip()):
|
1071
1083
|
return fallback
|
1072
1084
|
break
|
1073
1085
|
|
1074
1086
|
if (allow_multiplier_suffix is True) or (as_float is True):
|
1075
|
-
# Allow for example "1.5K" to mean 1500 (
|
1087
|
+
# Allow for example "1.5K" to mean 1500 (int).
|
1076
1088
|
if (dot_index := value.rfind(".")) >= 0:
|
1077
1089
|
if value_fraction := value[dot_index + 1:].strip():
|
1078
|
-
|
1079
|
-
value_fraction = float(f"0.{value_fraction}")
|
1080
|
-
except Exception:
|
1090
|
+
if not value_fraction.isdigit():
|
1081
1091
|
return fallback
|
1082
1092
|
if not (value := value[:dot_index].strip()):
|
1083
1093
|
if not value_fraction:
|
1084
1094
|
return fallback
|
1085
1095
|
value = "0"
|
1086
1096
|
elif (as_float is not True) and (value_dot_zero_suffix := re.search(r"\.0*$", value)):
|
1087
|
-
# Allow for example "123.00" to mean 123 (
|
1097
|
+
# Allow for example "123.00" to mean 123 (int).
|
1088
1098
|
value = value[:value_dot_zero_suffix.start()]
|
1089
1099
|
|
1090
1100
|
if (allow_commas is True) and ("," in value):
|
1091
|
-
# Make sure
|
1101
|
+
# Make sure any commas are properly placed/spaced.
|
1092
1102
|
if not re.fullmatch(r"(-?\d{1,3}(,\d{3})*)", value):
|
1093
1103
|
return fallback
|
1094
1104
|
value = value.replace(",", "")
|
@@ -1096,31 +1106,38 @@ def to_number(value: str,
|
|
1096
1106
|
if not value.isdigit():
|
1097
1107
|
return fallback
|
1098
1108
|
|
1099
|
-
|
1100
|
-
|
1101
|
-
|
1102
|
-
|
1103
|
-
#
|
1104
|
-
#
|
1105
|
-
|
1106
|
-
|
1107
|
-
|
1108
|
-
|
1109
|
-
|
1110
|
-
|
1111
|
-
|
1112
|
-
|
1109
|
+
if value_multiplier != _TO_NUMBER_POWER_OF_TEN_FOR_NOTHING:
|
1110
|
+
# We do string manipulation for the (power of ten) multiplier and NOT normal multiplicative
|
1111
|
+
# arithmetic because this can EASILY yield UNEXPECTED floating point related INACCURACIES.
|
1112
|
+
# E.g. to_integer("1.5678K", allow_multiplier_suffix=True) would yield 1567.8000000000002
|
1113
|
+
# rather than 1567.8 if (we simply did 1.5678 * 1000.0); also tried multiplying by 10 at
|
1114
|
+
# a time, and using Decimal, which obviated some, but not all, of the idiosyncrasies.
|
1115
|
+
if value_fraction:
|
1116
|
+
for _ in range(value_multiplier):
|
1117
|
+
if value_fraction:
|
1118
|
+
value += value_fraction[0]
|
1119
|
+
value_fraction = value_fraction[1:]
|
1120
|
+
else:
|
1121
|
+
value += "0"
|
1122
|
+
if value_fraction:
|
1123
|
+
if as_float is not True:
|
1124
|
+
# Left over fraction for int with multiplier, e.g. "1.2345K" -> 1234.5.
|
1125
|
+
return fallback
|
1126
|
+
value = float(f"{value}.{value_fraction}")
|
1127
|
+
else:
|
1128
|
+
value = float(value) if as_float else int(value)
|
1129
|
+
else:
|
1130
|
+
value = value + ("0" * value_multiplier)
|
1131
|
+
value = float(value) if as_float else int(value)
|
1132
|
+
elif as_float is True:
|
1133
|
+
if value_fraction:
|
1134
|
+
value = float(f"{value}.{value_fraction}")
|
1113
1135
|
else:
|
1114
|
-
value =
|
1115
|
-
if value_float != value:
|
1116
|
-
return fallback
|
1136
|
+
value = float(value)
|
1117
1137
|
else:
|
1118
|
-
value
|
1138
|
+
value = int(value)
|
1119
1139
|
|
1120
|
-
if value_negative
|
1121
|
-
value = -value
|
1122
|
-
|
1123
|
-
return value
|
1140
|
+
return -value if value_negative else value
|
1124
1141
|
|
1125
1142
|
|
1126
1143
|
def to_boolean(value: str, fallback: Optional[Any]) -> Optional[Any]:
|
dcicutils/structured_data.py
CHANGED
@@ -749,14 +749,16 @@ class Schema(SchemaBase):
|
|
749
749
|
|
750
750
|
def _map_function_date(self, typeinfo: dict) -> Callable:
|
751
751
|
def map_date(value: str, src: Optional[str]) -> str:
|
752
|
-
|
753
|
-
|
752
|
+
if not (parsed_value := normalize_date_string(value)):
|
753
|
+
return value
|
754
|
+
return parsed_value
|
754
755
|
return map_date
|
755
756
|
|
756
757
|
def _map_function_datetime(self, typeinfo: dict) -> Callable:
|
757
758
|
def map_datetime(value: str, src: Optional[str]) -> str:
|
758
|
-
|
759
|
-
|
759
|
+
if not (parsed_value := normalize_datetime_string(value)):
|
760
|
+
return value
|
761
|
+
return parsed_value
|
760
762
|
return map_datetime
|
761
763
|
|
762
764
|
def _map_function_ref(self, typeinfo: dict) -> Callable:
|
@@ -45,7 +45,7 @@ dcicutils/license_policies/park-lab-gpl-pipeline.jsonc,sha256=vLZkwm3Js-kjV44nug
|
|
45
45
|
dcicutils/license_policies/park-lab-pipeline.jsonc,sha256=9qlY0ASy3iUMQlr3gorVcXrSfRHnVGbLhkS427UaRy4,283
|
46
46
|
dcicutils/license_utils.py,sha256=2Yxnh1T1iuMe6wluwbvpFO_zYSGPxB4-STAMc-vz-YM,47202
|
47
47
|
dcicutils/log_utils.py,sha256=7pWMc6vyrorUZQf-V-M3YC6zrPgNhuV_fzm9xqTPph0,10883
|
48
|
-
dcicutils/misc_utils.py,sha256=
|
48
|
+
dcicutils/misc_utils.py,sha256=yoccfuCRlHOky31ljxtMDugrNE4QHqaweGWOWjWHI94,115922
|
49
49
|
dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmjw,5963
|
50
50
|
dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
|
51
51
|
dcicutils/portal_object_utils.py,sha256=Az3n1aL-PQkN5gOFE6ZqC2XkYsqiwKlq7-tZggs1QN4,11062
|
@@ -66,7 +66,7 @@ dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19
|
|
66
66
|
dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
|
67
67
|
dcicutils/snapshot_utils.py,sha256=YDeI3vD-MhAtHwKDzfEm2q-n3l-da2yRpRR3xp0Ah1M,23021
|
68
68
|
dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
|
69
|
-
dcicutils/structured_data.py,sha256=
|
69
|
+
dcicutils/structured_data.py,sha256=CF2lBrHmMR72B_tVHVaJGwxbX3G_08G4u1BhfZu9GdM,66169
|
70
70
|
dcicutils/submitr/progress_constants.py,sha256=5bxyX77ql8qEJearfHEvsvXl7D0GuUODW0T65mbRmnE,2895
|
71
71
|
dcicutils/submitr/ref_lookup_strategy.py,sha256=VJN-Oo0LLna6Vo2cu47eC-eU-yUC9NFlQP29xajejVU,4741
|
72
72
|
dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
|
@@ -75,8 +75,8 @@ dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
|
|
75
75
|
dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
|
76
76
|
dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
|
77
77
|
dcicutils/zip_utils.py,sha256=_Y9EmL3D2dUZhxucxHvrtmmlbZmK4FpSsHEb7rGSJLU,3265
|
78
|
-
dcicutils-8.13.3.
|
79
|
-
dcicutils-8.13.3.
|
80
|
-
dcicutils-8.13.3.
|
81
|
-
dcicutils-8.13.3.
|
82
|
-
dcicutils-8.13.3.
|
78
|
+
dcicutils-8.13.3.1b30.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
|
79
|
+
dcicutils-8.13.3.1b30.dist-info/METADATA,sha256=W41FLeNYNat2V-1BvXOE60D4QN1N-Gj7PdjYXESWib4,3440
|
80
|
+
dcicutils-8.13.3.1b30.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
81
|
+
dcicutils-8.13.3.1b30.dist-info/entry_points.txt,sha256=W6kEWdUJk9tQ4myAgpehPdebcwvCAZ7UgB-wyPgDUMg,335
|
82
|
+
dcicutils-8.13.3.1b30.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|