dcicutils 8.13.3.1b27__py3-none-any.whl → 8.13.3.1b30__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
dcicutils/misc_utils.py CHANGED
@@ -990,6 +990,10 @@ def to_integer(value: str,
990
990
  allow_commas: bool = False,
991
991
  allow_multiplier_suffix: bool = False,
992
992
  fallback: Optional[Union[int, float]] = None) -> Optional[int]:
993
+ """
994
+ Converts the given string value to an int or None or the given fallback value if malformed.
995
+ See comments in to_number for details on the other arguments.
996
+ """
993
997
  return to_number(value, fallback=fallback, as_float=False,
994
998
  allow_commas=allow_commas,
995
999
  allow_multiplier_suffix=allow_multiplier_suffix)
@@ -999,25 +1003,33 @@ def to_float(value: str,
999
1003
  allow_commas: bool = False,
1000
1004
  allow_multiplier_suffix: bool = False,
1001
1005
  fallback: Optional[Union[int, float]] = None) -> Optional[int]:
1006
+ """
1007
+ Converts the given string value to a float or None or the given fallback value if malformed.
1008
+ See comments in to_number for details on the other arguments.
1009
+ """
1002
1010
  return to_number(value, fallback=fallback, as_float=True,
1003
1011
  allow_commas=allow_commas,
1004
1012
  allow_multiplier_suffix=allow_multiplier_suffix)
1005
1013
 
1006
1014
 
1007
- _TO_NUMBER_MULTIPLIER_K = 1000
1008
- _TO_NUMBER_MULTIPLIER_M = 1000 * _TO_NUMBER_MULTIPLIER_K
1009
- _TO_NUMBER_MULTIPLIER_G = 1000 * _TO_NUMBER_MULTIPLIER_M
1010
- _TO_NUMBER_MULTIPLIER_T = 1000 * _TO_NUMBER_MULTIPLIER_G
1015
+ _TO_NUMBER_POWER_OF_TEN_FOR_NOTHING = 0
1016
+ _TO_NUMBER_POWER_OF_TEN_FOR_K = 3
1017
+ _TO_NUMBER_POWER_OF_TEN_FOR_M = 6
1018
+ _TO_NUMBER_POWER_OF_TEN_FOR_G = 9
1019
+ _TO_NUMBER_POWER_OF_TEN_FOR_T = 12
1011
1020
 
1012
1021
  _TO_NUMBER_MULTIPLIER_SUFFIXES = {
1013
- "K": _TO_NUMBER_MULTIPLIER_K,
1014
- "KB": _TO_NUMBER_MULTIPLIER_K,
1015
- "M": _TO_NUMBER_MULTIPLIER_M,
1016
- "MB": _TO_NUMBER_MULTIPLIER_M,
1017
- "G": _TO_NUMBER_MULTIPLIER_G,
1018
- "GB": _TO_NUMBER_MULTIPLIER_G,
1019
- "T": _TO_NUMBER_MULTIPLIER_T,
1020
- "TB": _TO_NUMBER_MULTIPLIER_T
1022
+ "K": _TO_NUMBER_POWER_OF_TEN_FOR_K,
1023
+ "KB": _TO_NUMBER_POWER_OF_TEN_FOR_K,
1024
+ "M": _TO_NUMBER_POWER_OF_TEN_FOR_M,
1025
+ "MB": _TO_NUMBER_POWER_OF_TEN_FOR_M,
1026
+ "G": _TO_NUMBER_POWER_OF_TEN_FOR_G,
1027
+ "GB": _TO_NUMBER_POWER_OF_TEN_FOR_G,
1028
+ "T": _TO_NUMBER_POWER_OF_TEN_FOR_T,
1029
+ "TB": _TO_NUMBER_POWER_OF_TEN_FOR_T,
1030
+ # B means bytes or bases and BP means base pairs; needs to be last.
1031
+ "B": _TO_NUMBER_POWER_OF_TEN_FOR_NOTHING,
1032
+ "BP": _TO_NUMBER_POWER_OF_TEN_FOR_NOTHING
1021
1033
  }
1022
1034
 
1023
1035
 
@@ -1028,7 +1040,7 @@ def to_number(value: str,
1028
1040
  fallback: Optional[Union[int, float]] = None) -> Optional[Union[int, float]]:
1029
1041
  """
1030
1042
  Converts the given string value to an int, or float if as_float is True,
1031
- or None or the give fallback value if malformed.
1043
+ or None or the given fallback value if malformed.
1032
1044
 
1033
1045
  If allow_commas is True then allows appropriately placed commas (i.e. every three digits).
1034
1046
 
@@ -1050,7 +1062,7 @@ def to_number(value: str,
1050
1062
  else:
1051
1063
  return value if isinstance(value, int) else fallback
1052
1064
 
1053
- value_multiplier = 1
1065
+ value_multiplier = _TO_NUMBER_POWER_OF_TEN_FOR_NOTHING
1054
1066
  value_fraction = None
1055
1067
  value_negative = False
1056
1068
 
@@ -1066,29 +1078,27 @@ def to_number(value: str,
1066
1078
  value_upper = value.upper()
1067
1079
  for suffix in _TO_NUMBER_MULTIPLIER_SUFFIXES:
1068
1080
  if value_upper.endswith(suffix):
1069
- value_multiplier *= _TO_NUMBER_MULTIPLIER_SUFFIXES[suffix]
1081
+ value_multiplier = _TO_NUMBER_MULTIPLIER_SUFFIXES[suffix]
1070
1082
  if not (value := value[:-len(suffix)].strip()):
1071
1083
  return fallback
1072
1084
  break
1073
1085
 
1074
1086
  if (allow_multiplier_suffix is True) or (as_float is True):
1075
- # Allow for example "1.5K" to mean 1500 (integer).
1087
+ # Allow for example "1.5K" to mean 1500 (int).
1076
1088
  if (dot_index := value.rfind(".")) >= 0:
1077
1089
  if value_fraction := value[dot_index + 1:].strip():
1078
- try:
1079
- value_fraction = float(f"0.{value_fraction}")
1080
- except Exception:
1090
+ if not value_fraction.isdigit():
1081
1091
  return fallback
1082
1092
  if not (value := value[:dot_index].strip()):
1083
1093
  if not value_fraction:
1084
1094
  return fallback
1085
1095
  value = "0"
1086
1096
  elif (as_float is not True) and (value_dot_zero_suffix := re.search(r"\.0*$", value)):
1087
- # Allow for example "123.00" to mean 123 (integer).
1097
+ # Allow for example "123.00" to mean 123 (int).
1088
1098
  value = value[:value_dot_zero_suffix.start()]
1089
1099
 
1090
1100
  if (allow_commas is True) and ("," in value):
1091
- # Make sure that any commas are properly placed.
1101
+ # Make sure any commas are properly placed/spaced.
1092
1102
  if not re.fullmatch(r"(-?\d{1,3}(,\d{3})*)", value):
1093
1103
  return fallback
1094
1104
  value = value.replace(",", "")
@@ -1096,31 +1106,38 @@ def to_number(value: str,
1096
1106
  if not value.isdigit():
1097
1107
  return fallback
1098
1108
 
1099
- value = float(value) if as_float is True else int(value)
1100
-
1101
- if value_fraction:
1102
- value_float = float(value) + value_fraction
1103
- # Here we do NOT simply do: value_float *= float(value_multiplier);
1104
- # because it introduces obvious FLOATing point precision ERRORs; for example,
1105
- # to_integer("1.5678K", allow_multiplier_suffix=True) would yield 1567.8000000000002
1106
- # if we simply did 1.5678 * 1000.0; but doing the multiplication 10 at a time obviates this
1107
- # idiosyncracy yielding 1567.8; this ASSUMES that the multipliers are simple multiples of 10.
1108
- while value_multiplier > 1:
1109
- value_float *= 10
1110
- value_multiplier /= 10
1111
- if as_float is True:
1112
- value = value_float
1109
+ if value_multiplier != _TO_NUMBER_POWER_OF_TEN_FOR_NOTHING:
1110
+ # We do string manipulation for the (power of ten) multiplier and NOT normal multiplicative
1111
+ # arithmetic because this can EASILY yield UNEXPECTED floating point related INACCURACIES.
1112
+ # E.g. to_integer("1.5678K", allow_multiplier_suffix=True) would yield 1567.8000000000002
1113
+ # rather than 1567.8 if (we simply did 1.5678 * 1000.0); also tried multiplying by 10 at
1114
+ # a time, and using Decimal, which obviated some, but not all, of the idiosyncrasies.
1115
+ if value_fraction:
1116
+ for _ in range(value_multiplier):
1117
+ if value_fraction:
1118
+ value += value_fraction[0]
1119
+ value_fraction = value_fraction[1:]
1120
+ else:
1121
+ value += "0"
1122
+ if value_fraction:
1123
+ if as_float is not True:
1124
+ # Left over fraction for int with multiplier, e.g. "1.2345K" -> 1234.5.
1125
+ return fallback
1126
+ value = float(f"{value}.{value_fraction}")
1127
+ else:
1128
+ value = float(value) if as_float else int(value)
1129
+ else:
1130
+ value = value + ("0" * value_multiplier)
1131
+ value = float(value) if as_float else int(value)
1132
+ elif as_float is True:
1133
+ if value_fraction:
1134
+ value = float(f"{value}.{value_fraction}")
1113
1135
  else:
1114
- value = int(value_float)
1115
- if value_float != value:
1116
- return fallback
1136
+ value = float(value)
1117
1137
  else:
1118
- value *= value_multiplier
1138
+ value = int(value)
1119
1139
 
1120
- if value_negative:
1121
- value = -value
1122
-
1123
- return value
1140
+ return -value if value_negative else value
1124
1141
 
1125
1142
 
1126
1143
  def to_boolean(value: str, fallback: Optional[Any]) -> Optional[Any]:
@@ -749,14 +749,16 @@ class Schema(SchemaBase):
749
749
 
750
750
  def _map_function_date(self, typeinfo: dict) -> Callable:
751
751
  def map_date(value: str, src: Optional[str]) -> str:
752
- value = normalize_date_string(value)
753
- return value if value is not None else ""
752
+ if not (parsed_value := normalize_date_string(value)):
753
+ return value
754
+ return parsed_value
754
755
  return map_date
755
756
 
756
757
  def _map_function_datetime(self, typeinfo: dict) -> Callable:
757
758
  def map_datetime(value: str, src: Optional[str]) -> str:
758
- value = normalize_datetime_string(value)
759
- return value if value is not None else ""
759
+ if not (parsed_value := normalize_datetime_string(value)):
760
+ return value
761
+ return parsed_value
760
762
  return map_datetime
761
763
 
762
764
  def _map_function_ref(self, typeinfo: dict) -> Callable:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.13.3.1b27
3
+ Version: 8.13.3.1b30
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -45,7 +45,7 @@ dcicutils/license_policies/park-lab-gpl-pipeline.jsonc,sha256=vLZkwm3Js-kjV44nug
45
45
  dcicutils/license_policies/park-lab-pipeline.jsonc,sha256=9qlY0ASy3iUMQlr3gorVcXrSfRHnVGbLhkS427UaRy4,283
46
46
  dcicutils/license_utils.py,sha256=2Yxnh1T1iuMe6wluwbvpFO_zYSGPxB4-STAMc-vz-YM,47202
47
47
  dcicutils/log_utils.py,sha256=7pWMc6vyrorUZQf-V-M3YC6zrPgNhuV_fzm9xqTPph0,10883
48
- dcicutils/misc_utils.py,sha256=R3nUtmx4nynmdTfc98w3CV2JKYCPZPRBijets1nWM20,114877
48
+ dcicutils/misc_utils.py,sha256=yoccfuCRlHOky31ljxtMDugrNE4QHqaweGWOWjWHI94,115922
49
49
  dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmjw,5963
50
50
  dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
51
51
  dcicutils/portal_object_utils.py,sha256=Az3n1aL-PQkN5gOFE6ZqC2XkYsqiwKlq7-tZggs1QN4,11062
@@ -66,7 +66,7 @@ dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19
66
66
  dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
67
67
  dcicutils/snapshot_utils.py,sha256=YDeI3vD-MhAtHwKDzfEm2q-n3l-da2yRpRR3xp0Ah1M,23021
68
68
  dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
69
- dcicutils/structured_data.py,sha256=LN-Bn-RUM_jy0T6s-ovZT9QXDsQ8w1_QVtkb8LAHFs4,66119
69
+ dcicutils/structured_data.py,sha256=CF2lBrHmMR72B_tVHVaJGwxbX3G_08G4u1BhfZu9GdM,66169
70
70
  dcicutils/submitr/progress_constants.py,sha256=5bxyX77ql8qEJearfHEvsvXl7D0GuUODW0T65mbRmnE,2895
71
71
  dcicutils/submitr/ref_lookup_strategy.py,sha256=VJN-Oo0LLna6Vo2cu47eC-eU-yUC9NFlQP29xajejVU,4741
72
72
  dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
@@ -75,8 +75,8 @@ dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
75
75
  dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
76
76
  dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
77
77
  dcicutils/zip_utils.py,sha256=_Y9EmL3D2dUZhxucxHvrtmmlbZmK4FpSsHEb7rGSJLU,3265
78
- dcicutils-8.13.3.1b27.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
79
- dcicutils-8.13.3.1b27.dist-info/METADATA,sha256=w4K_ZFbf-pBM62p35V5NoZS4HQWx0jlimtBQ2z73pQE,3440
80
- dcicutils-8.13.3.1b27.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
81
- dcicutils-8.13.3.1b27.dist-info/entry_points.txt,sha256=W6kEWdUJk9tQ4myAgpehPdebcwvCAZ7UgB-wyPgDUMg,335
82
- dcicutils-8.13.3.1b27.dist-info/RECORD,,
78
+ dcicutils-8.13.3.1b30.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
79
+ dcicutils-8.13.3.1b30.dist-info/METADATA,sha256=W41FLeNYNat2V-1BvXOE60D4QN1N-Gj7PdjYXESWib4,3440
80
+ dcicutils-8.13.3.1b30.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
81
+ dcicutils-8.13.3.1b30.dist-info/entry_points.txt,sha256=W6kEWdUJk9tQ4myAgpehPdebcwvCAZ7UgB-wyPgDUMg,335
82
+ dcicutils-8.13.3.1b30.dist-info/RECORD,,