dcicutils 8.13.3.1b27__py3-none-any.whl → 8.13.3.1b30__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
dcicutils/misc_utils.py CHANGED
@@ -990,6 +990,10 @@ def to_integer(value: str,
990
990
  allow_commas: bool = False,
991
991
  allow_multiplier_suffix: bool = False,
992
992
  fallback: Optional[Union[int, float]] = None) -> Optional[int]:
993
+ """
994
+ Converts the given string value to an int or None or the given fallback value if malformed.
995
+ See comments in to_number for details on the other arguments.
996
+ """
993
997
  return to_number(value, fallback=fallback, as_float=False,
994
998
  allow_commas=allow_commas,
995
999
  allow_multiplier_suffix=allow_multiplier_suffix)
@@ -999,25 +1003,33 @@ def to_float(value: str,
999
1003
  allow_commas: bool = False,
1000
1004
  allow_multiplier_suffix: bool = False,
1001
1005
  fallback: Optional[Union[int, float]] = None) -> Optional[int]:
1006
+ """
1007
+ Converts the given string value to a float or None or the given fallback value if malformed.
1008
+ See comments in to_number for details on the other arguments.
1009
+ """
1002
1010
  return to_number(value, fallback=fallback, as_float=True,
1003
1011
  allow_commas=allow_commas,
1004
1012
  allow_multiplier_suffix=allow_multiplier_suffix)
1005
1013
 
1006
1014
 
1007
- _TO_NUMBER_MULTIPLIER_K = 1000
1008
- _TO_NUMBER_MULTIPLIER_M = 1000 * _TO_NUMBER_MULTIPLIER_K
1009
- _TO_NUMBER_MULTIPLIER_G = 1000 * _TO_NUMBER_MULTIPLIER_M
1010
- _TO_NUMBER_MULTIPLIER_T = 1000 * _TO_NUMBER_MULTIPLIER_G
1015
+ _TO_NUMBER_POWER_OF_TEN_FOR_NOTHING = 0
1016
+ _TO_NUMBER_POWER_OF_TEN_FOR_K = 3
1017
+ _TO_NUMBER_POWER_OF_TEN_FOR_M = 6
1018
+ _TO_NUMBER_POWER_OF_TEN_FOR_G = 9
1019
+ _TO_NUMBER_POWER_OF_TEN_FOR_T = 12
1011
1020
 
1012
1021
  _TO_NUMBER_MULTIPLIER_SUFFIXES = {
1013
- "K": _TO_NUMBER_MULTIPLIER_K,
1014
- "KB": _TO_NUMBER_MULTIPLIER_K,
1015
- "M": _TO_NUMBER_MULTIPLIER_M,
1016
- "MB": _TO_NUMBER_MULTIPLIER_M,
1017
- "G": _TO_NUMBER_MULTIPLIER_G,
1018
- "GB": _TO_NUMBER_MULTIPLIER_G,
1019
- "T": _TO_NUMBER_MULTIPLIER_T,
1020
- "TB": _TO_NUMBER_MULTIPLIER_T
1022
+ "K": _TO_NUMBER_POWER_OF_TEN_FOR_K,
1023
+ "KB": _TO_NUMBER_POWER_OF_TEN_FOR_K,
1024
+ "M": _TO_NUMBER_POWER_OF_TEN_FOR_M,
1025
+ "MB": _TO_NUMBER_POWER_OF_TEN_FOR_M,
1026
+ "G": _TO_NUMBER_POWER_OF_TEN_FOR_G,
1027
+ "GB": _TO_NUMBER_POWER_OF_TEN_FOR_G,
1028
+ "T": _TO_NUMBER_POWER_OF_TEN_FOR_T,
1029
+ "TB": _TO_NUMBER_POWER_OF_TEN_FOR_T,
1030
+ # B means bytes or bases and BP means base pairs; needs to be last.
1031
+ "B": _TO_NUMBER_POWER_OF_TEN_FOR_NOTHING,
1032
+ "BP": _TO_NUMBER_POWER_OF_TEN_FOR_NOTHING
1021
1033
  }
1022
1034
 
1023
1035
 
@@ -1028,7 +1040,7 @@ def to_number(value: str,
1028
1040
  fallback: Optional[Union[int, float]] = None) -> Optional[Union[int, float]]:
1029
1041
  """
1030
1042
  Converts the given string value to an int, or float if as_float is True,
1031
- or None or the give fallback value if malformed.
1043
+ or None or the given fallback value if malformed.
1032
1044
 
1033
1045
  If allow_commas is True then allows appropriately placed commas (i.e. every three digits).
1034
1046
 
@@ -1050,7 +1062,7 @@ def to_number(value: str,
1050
1062
  else:
1051
1063
  return value if isinstance(value, int) else fallback
1052
1064
 
1053
- value_multiplier = 1
1065
+ value_multiplier = _TO_NUMBER_POWER_OF_TEN_FOR_NOTHING
1054
1066
  value_fraction = None
1055
1067
  value_negative = False
1056
1068
 
@@ -1066,29 +1078,27 @@ def to_number(value: str,
1066
1078
  value_upper = value.upper()
1067
1079
  for suffix in _TO_NUMBER_MULTIPLIER_SUFFIXES:
1068
1080
  if value_upper.endswith(suffix):
1069
- value_multiplier *= _TO_NUMBER_MULTIPLIER_SUFFIXES[suffix]
1081
+ value_multiplier = _TO_NUMBER_MULTIPLIER_SUFFIXES[suffix]
1070
1082
  if not (value := value[:-len(suffix)].strip()):
1071
1083
  return fallback
1072
1084
  break
1073
1085
 
1074
1086
  if (allow_multiplier_suffix is True) or (as_float is True):
1075
- # Allow for example "1.5K" to mean 1500 (integer).
1087
+ # Allow for example "1.5K" to mean 1500 (int).
1076
1088
  if (dot_index := value.rfind(".")) >= 0:
1077
1089
  if value_fraction := value[dot_index + 1:].strip():
1078
- try:
1079
- value_fraction = float(f"0.{value_fraction}")
1080
- except Exception:
1090
+ if not value_fraction.isdigit():
1081
1091
  return fallback
1082
1092
  if not (value := value[:dot_index].strip()):
1083
1093
  if not value_fraction:
1084
1094
  return fallback
1085
1095
  value = "0"
1086
1096
  elif (as_float is not True) and (value_dot_zero_suffix := re.search(r"\.0*$", value)):
1087
- # Allow for example "123.00" to mean 123 (integer).
1097
+ # Allow for example "123.00" to mean 123 (int).
1088
1098
  value = value[:value_dot_zero_suffix.start()]
1089
1099
 
1090
1100
  if (allow_commas is True) and ("," in value):
1091
- # Make sure that any commas are properly placed.
1101
+ # Make sure any commas are properly placed/spaced.
1092
1102
  if not re.fullmatch(r"(-?\d{1,3}(,\d{3})*)", value):
1093
1103
  return fallback
1094
1104
  value = value.replace(",", "")
@@ -1096,31 +1106,38 @@ def to_number(value: str,
1096
1106
  if not value.isdigit():
1097
1107
  return fallback
1098
1108
 
1099
- value = float(value) if as_float is True else int(value)
1100
-
1101
- if value_fraction:
1102
- value_float = float(value) + value_fraction
1103
- # Here we do NOT simply do: value_float *= float(value_multiplier);
1104
- # because it introduces obvious FLOATing point precision ERRORs; for example,
1105
- # to_integer("1.5678K", allow_multiplier_suffix=True) would yield 1567.8000000000002
1106
- # if we simply did 1.5678 * 1000.0; but doing the multiplication 10 at a time obviates this
1107
- # idiosyncracy yielding 1567.8; this ASSUMES that the multipliers are simple multiples of 10.
1108
- while value_multiplier > 1:
1109
- value_float *= 10
1110
- value_multiplier /= 10
1111
- if as_float is True:
1112
- value = value_float
1109
+ if value_multiplier != _TO_NUMBER_POWER_OF_TEN_FOR_NOTHING:
1110
+ # We do string manipulation for the (power of ten) multiplier and NOT normal multiplicative
1111
+ # arithmetic because this can EASILY yield UNEXPECTED floating point related INACCURACIES.
1112
+ # E.g. to_integer("1.5678K", allow_multiplier_suffix=True) would yield 1567.8000000000002
1113
+ # rather than 1567.8 if (we simply did 1.5678 * 1000.0); also tried multiplying by 10 at
1114
+ # a time, and using Decimal, which obviated some, but not all, of the idiosyncrasies.
1115
+ if value_fraction:
1116
+ for _ in range(value_multiplier):
1117
+ if value_fraction:
1118
+ value += value_fraction[0]
1119
+ value_fraction = value_fraction[1:]
1120
+ else:
1121
+ value += "0"
1122
+ if value_fraction:
1123
+ if as_float is not True:
1124
+ # Left over fraction for int with multiplier, e.g. "1.2345K" -> 1234.5.
1125
+ return fallback
1126
+ value = float(f"{value}.{value_fraction}")
1127
+ else:
1128
+ value = float(value) if as_float else int(value)
1129
+ else:
1130
+ value = value + ("0" * value_multiplier)
1131
+ value = float(value) if as_float else int(value)
1132
+ elif as_float is True:
1133
+ if value_fraction:
1134
+ value = float(f"{value}.{value_fraction}")
1113
1135
  else:
1114
- value = int(value_float)
1115
- if value_float != value:
1116
- return fallback
1136
+ value = float(value)
1117
1137
  else:
1118
- value *= value_multiplier
1138
+ value = int(value)
1119
1139
 
1120
- if value_negative:
1121
- value = -value
1122
-
1123
- return value
1140
+ return -value if value_negative else value
1124
1141
 
1125
1142
 
1126
1143
  def to_boolean(value: str, fallback: Optional[Any]) -> Optional[Any]:
@@ -749,14 +749,16 @@ class Schema(SchemaBase):
749
749
 
750
750
  def _map_function_date(self, typeinfo: dict) -> Callable:
751
751
  def map_date(value: str, src: Optional[str]) -> str:
752
- value = normalize_date_string(value)
753
- return value if value is not None else ""
752
+ if not (parsed_value := normalize_date_string(value)):
753
+ return value
754
+ return parsed_value
754
755
  return map_date
755
756
 
756
757
  def _map_function_datetime(self, typeinfo: dict) -> Callable:
757
758
  def map_datetime(value: str, src: Optional[str]) -> str:
758
- value = normalize_datetime_string(value)
759
- return value if value is not None else ""
759
+ if not (parsed_value := normalize_datetime_string(value)):
760
+ return value
761
+ return parsed_value
760
762
  return map_datetime
761
763
 
762
764
  def _map_function_ref(self, typeinfo: dict) -> Callable:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcicutils
3
- Version: 8.13.3.1b27
3
+ Version: 8.13.3.1b30
4
4
  Summary: Utility package for interacting with the 4DN Data Portal and other 4DN resources
5
5
  Home-page: https://github.com/4dn-dcic/utils
6
6
  License: MIT
@@ -45,7 +45,7 @@ dcicutils/license_policies/park-lab-gpl-pipeline.jsonc,sha256=vLZkwm3Js-kjV44nug
45
45
  dcicutils/license_policies/park-lab-pipeline.jsonc,sha256=9qlY0ASy3iUMQlr3gorVcXrSfRHnVGbLhkS427UaRy4,283
46
46
  dcicutils/license_utils.py,sha256=2Yxnh1T1iuMe6wluwbvpFO_zYSGPxB4-STAMc-vz-YM,47202
47
47
  dcicutils/log_utils.py,sha256=7pWMc6vyrorUZQf-V-M3YC6zrPgNhuV_fzm9xqTPph0,10883
48
- dcicutils/misc_utils.py,sha256=R3nUtmx4nynmdTfc98w3CV2JKYCPZPRBijets1nWM20,114877
48
+ dcicutils/misc_utils.py,sha256=yoccfuCRlHOky31ljxtMDugrNE4QHqaweGWOWjWHI94,115922
49
49
  dcicutils/obfuscation_utils.py,sha256=fo2jOmDRC6xWpYX49u80bVNisqRRoPskFNX3ymFAmjw,5963
50
50
  dcicutils/opensearch_utils.py,sha256=V2exmFYW8Xl2_pGFixF4I2Cc549Opwe4PhFi5twC0M8,1017
51
51
  dcicutils/portal_object_utils.py,sha256=Az3n1aL-PQkN5gOFE6ZqC2XkYsqiwKlq7-tZggs1QN4,11062
@@ -66,7 +66,7 @@ dcicutils/secrets_utils.py,sha256=8dppXAsiHhJzI6NmOcvJV5ldvKkQZzh3Fl-cb8Wm7MI,19
66
66
  dcicutils/sheet_utils.py,sha256=VlmzteONW5VF_Q4vo0yA5vesz1ViUah1MZ_yA1rwZ0M,33629
67
67
  dcicutils/snapshot_utils.py,sha256=YDeI3vD-MhAtHwKDzfEm2q-n3l-da2yRpRR3xp0Ah1M,23021
68
68
  dcicutils/ssl_certificate_utils.py,sha256=F0ifz_wnRRN9dfrfsz7aCp4UDLgHEY8LaK7PjnNvrAQ,9707
69
- dcicutils/structured_data.py,sha256=LN-Bn-RUM_jy0T6s-ovZT9QXDsQ8w1_QVtkb8LAHFs4,66119
69
+ dcicutils/structured_data.py,sha256=CF2lBrHmMR72B_tVHVaJGwxbX3G_08G4u1BhfZu9GdM,66169
70
70
  dcicutils/submitr/progress_constants.py,sha256=5bxyX77ql8qEJearfHEvsvXl7D0GuUODW0T65mbRmnE,2895
71
71
  dcicutils/submitr/ref_lookup_strategy.py,sha256=VJN-Oo0LLna6Vo2cu47eC-eU-yUC9NFlQP29xajejVU,4741
72
72
  dcicutils/task_utils.py,sha256=MF8ujmTD6-O2AC2gRGPHyGdUrVKgtr8epT5XU8WtNjk,8082
@@ -75,8 +75,8 @@ dcicutils/trace_utils.py,sha256=g8kwV4ebEy5kXW6oOrEAUsurBcCROvwtZqz9fczsGRE,1769
75
75
  dcicutils/validation_utils.py,sha256=cMZIU2cY98FYtzK52z5WUYck7urH6JcqOuz9jkXpqzg,14797
76
76
  dcicutils/variant_utils.py,sha256=2H9azNx3xAj-MySg-uZ2SFqbWs4kZvf61JnK6b-h4Qw,4343
77
77
  dcicutils/zip_utils.py,sha256=_Y9EmL3D2dUZhxucxHvrtmmlbZmK4FpSsHEb7rGSJLU,3265
78
- dcicutils-8.13.3.1b27.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
79
- dcicutils-8.13.3.1b27.dist-info/METADATA,sha256=w4K_ZFbf-pBM62p35V5NoZS4HQWx0jlimtBQ2z73pQE,3440
80
- dcicutils-8.13.3.1b27.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
81
- dcicutils-8.13.3.1b27.dist-info/entry_points.txt,sha256=W6kEWdUJk9tQ4myAgpehPdebcwvCAZ7UgB-wyPgDUMg,335
82
- dcicutils-8.13.3.1b27.dist-info/RECORD,,
78
+ dcicutils-8.13.3.1b30.dist-info/LICENSE.txt,sha256=qnwSmfnEWMl5l78VPDEzAmEbLVrRqQvfUQiHT0ehrOo,1102
79
+ dcicutils-8.13.3.1b30.dist-info/METADATA,sha256=W41FLeNYNat2V-1BvXOE60D4QN1N-Gj7PdjYXESWib4,3440
80
+ dcicutils-8.13.3.1b30.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
81
+ dcicutils-8.13.3.1b30.dist-info/entry_points.txt,sha256=W6kEWdUJk9tQ4myAgpehPdebcwvCAZ7UgB-wyPgDUMg,335
82
+ dcicutils-8.13.3.1b30.dist-info/RECORD,,