meerschaum 3.0.0rc1__py3-none-any.whl → 3.0.0rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. meerschaum/_internal/arguments/_parser.py +2 -1
  2. meerschaum/_internal/docs/index.py +49 -2
  3. meerschaum/_internal/shell/Shell.py +5 -4
  4. meerschaum/_internal/static.py +8 -24
  5. meerschaum/actions/bootstrap.py +1 -1
  6. meerschaum/actions/edit.py +6 -3
  7. meerschaum/actions/start.py +1 -1
  8. meerschaum/actions/verify.py +5 -8
  9. meerschaum/api/__init__.py +2 -1
  10. meerschaum/api/dash/__init__.py +0 -2
  11. meerschaum/api/dash/callbacks/__init__.py +1 -0
  12. meerschaum/api/dash/callbacks/dashboard.py +20 -19
  13. meerschaum/api/dash/callbacks/jobs.py +11 -5
  14. meerschaum/api/dash/callbacks/pipes.py +106 -5
  15. meerschaum/api/dash/callbacks/settings/__init__.py +0 -1
  16. meerschaum/api/dash/callbacks/{settings/tokens.py → tokens.py} +1 -1
  17. meerschaum/api/dash/jobs.py +1 -1
  18. meerschaum/api/dash/pages/__init__.py +2 -1
  19. meerschaum/api/dash/pages/{job.py → jobs.py} +10 -7
  20. meerschaum/api/dash/pages/pipes.py +4 -3
  21. meerschaum/api/dash/pages/settings/__init__.py +0 -1
  22. meerschaum/api/dash/pages/{settings/tokens.py → tokens.py} +6 -8
  23. meerschaum/api/dash/pipes.py +131 -0
  24. meerschaum/api/dash/tokens.py +28 -31
  25. meerschaum/api/routes/_pipes.py +47 -37
  26. meerschaum/config/_default.py +13 -2
  27. meerschaum/config/_paths.py +1 -0
  28. meerschaum/config/_version.py +1 -1
  29. meerschaum/config/stack/__init__.py +9 -8
  30. meerschaum/connectors/api/_pipes.py +2 -18
  31. meerschaum/connectors/api/_tokens.py +2 -2
  32. meerschaum/connectors/instance/_tokens.py +10 -6
  33. meerschaum/connectors/sql/_SQLConnector.py +14 -0
  34. meerschaum/connectors/sql/_create_engine.py +3 -14
  35. meerschaum/connectors/sql/_pipes.py +175 -185
  36. meerschaum/connectors/sql/_sql.py +38 -20
  37. meerschaum/connectors/sql/tables/__init__.py +237 -122
  38. meerschaum/connectors/valkey/_pipes.py +44 -16
  39. meerschaum/core/Pipe/__init__.py +28 -5
  40. meerschaum/core/Pipe/_attributes.py +273 -46
  41. meerschaum/core/Pipe/_data.py +55 -17
  42. meerschaum/core/Pipe/_dtypes.py +19 -4
  43. meerschaum/core/Pipe/_edit.py +2 -0
  44. meerschaum/core/Pipe/_fetch.py +1 -1
  45. meerschaum/core/Pipe/_sync.py +90 -160
  46. meerschaum/core/Pipe/_verify.py +3 -3
  47. meerschaum/core/Token/_Token.py +4 -5
  48. meerschaum/plugins/bootstrap.py +508 -3
  49. meerschaum/utils/_get_pipes.py +1 -1
  50. meerschaum/utils/dataframe.py +385 -68
  51. meerschaum/utils/debug.py +15 -15
  52. meerschaum/utils/dtypes/__init__.py +387 -22
  53. meerschaum/utils/dtypes/sql.py +327 -31
  54. meerschaum/utils/misc.py +9 -68
  55. meerschaum/utils/packages/__init__.py +7 -21
  56. meerschaum/utils/packages/_packages.py +7 -2
  57. meerschaum/utils/schedule.py +1 -1
  58. meerschaum/utils/sql.py +8 -8
  59. {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/METADATA +5 -17
  60. {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/RECORD +66 -65
  61. meerschaum-3.0.0rc3.dist-info/licenses/NOTICE +2 -0
  62. {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/WHEEL +0 -0
  63. {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/entry_points.txt +0 -0
  64. {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/licenses/LICENSE +0 -0
  65. {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/top_level.txt +0 -0
  66. {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/zip-safe +0 -0
meerschaum/utils/debug.py CHANGED
@@ -10,16 +10,16 @@ from __future__ import annotations
10
10
  from meerschaum.utils.typing import Union, Optional, List
11
11
 
12
12
  def dprint(
13
- msg: str,
14
- leader: bool = True,
15
- package: bool = True,
16
- color: Optional[Union[str, List[str]]] = None,
17
- attrs: Optional[List[str]] = None,
18
- nopretty: bool = False,
19
- _progress: Optional['rich.progress.Progress'] = None,
20
- _task: Optional[int] = None,
21
- **kw
22
- ) -> None:
13
+ msg: str,
14
+ leader: bool = True,
15
+ package: bool = True,
16
+ color: Optional[Union[str, List[str]]] = None,
17
+ attrs: Optional[List[str]] = None,
18
+ nopretty: bool = False,
19
+ _progress: Optional['rich.progress.Progress'] = None,
20
+ _task: Optional[int] = None,
21
+ **kw
22
+ ) -> None:
23
23
  """Print a debug message."""
24
24
  if attrs is None:
25
25
  attrs = []
@@ -86,11 +86,11 @@ def dprint(
86
86
 
87
87
 
88
88
  def _checkpoint(
89
- _progress: Optional['rich.progress.Progress'] = None,
90
- _task: Optional[int] = None,
91
- _total: Optional[int] = None,
92
- **kw
93
- ) -> None:
89
+ _progress: Optional['rich.progress.Progress'] = None,
90
+ _task: Optional[int] = None,
91
+ _total: Optional[int] = None,
92
+ **kw
93
+ ) -> None:
94
94
  """If the `_progress` and `_task` objects are provided, increment the task by one step.
95
95
  If `_total` is provided, update the total instead.
96
96
  """
@@ -9,12 +9,14 @@ Utility functions for working with data types.
9
9
  import traceback
10
10
  import json
11
11
  import uuid
12
- from datetime import timezone, datetime
12
+ import time
13
+ from datetime import timezone, datetime, date, timedelta
13
14
  from decimal import Decimal, Context, InvalidOperation, ROUND_HALF_UP
14
15
 
15
16
  import meerschaum as mrsm
16
17
  from meerschaum.utils.typing import Dict, Union, Any, Optional, Tuple
17
18
  from meerschaum.utils.warnings import warn
19
+ from meerschaum._internal.static import STATIC_CONFIG as _STATIC_CONFIG
18
20
 
19
21
  MRSM_ALIAS_DTYPES: Dict[str, str] = {
20
22
  'decimal': 'numeric',
@@ -30,6 +32,8 @@ MRSM_ALIAS_DTYPES: Dict[str, str] = {
30
32
  'UUID': 'uuid',
31
33
  'geom': 'geometry',
32
34
  'geog': 'geography',
35
+ 'boolean': 'bool',
36
+ 'day': 'date',
33
37
  }
34
38
  MRSM_PD_DTYPES: Dict[Union[str, None], str] = {
35
39
  'json': 'object',
@@ -37,18 +41,52 @@ MRSM_PD_DTYPES: Dict[Union[str, None], str] = {
37
41
  'geometry': 'object',
38
42
  'geography': 'object',
39
43
  'uuid': 'object',
40
- 'datetime': 'datetime64[ns, UTC]',
44
+ 'date': 'date32[day][pyarrow]',
45
+ 'datetime': 'datetime64[us, UTC]',
41
46
  'bool': 'bool[pyarrow]',
42
- 'int': 'Int64',
43
- 'int8': 'Int8',
44
- 'int16': 'Int16',
45
- 'int32': 'Int32',
46
- 'int64': 'Int64',
47
- 'str': 'string[python]',
48
- 'bytes': 'object',
47
+ 'int': 'int64[pyarrow]',
48
+ 'int8': 'int8[pyarrow]',
49
+ 'int16': 'int16[pyarrow]',
50
+ 'int32': 'int32[pyarrow]',
51
+ 'int64': 'int64[pyarrow]',
52
+ 'str': 'string',
53
+ 'bytes': 'binary[pyarrow]',
49
54
  None: 'object',
50
55
  }
51
56
 
57
+ MRSM_PRECISION_UNITS_SCALARS: Dict[str, Union[int, float]] = {
58
+ 'nanosecond': 1_000_000_000,
59
+ 'microsecond': 1_000_000,
60
+ 'millisecond': 1000,
61
+ 'second': 1,
62
+ 'minute': (1 / 60),
63
+ 'hour': (1 / 3600),
64
+ 'day': (1 / 86400),
65
+ }
66
+
67
+ MRSM_PRECISION_UNITS_ALIASES: Dict[str, str] = {
68
+ 'ns': 'nanosecond',
69
+ 'us': 'microsecond',
70
+ 'ms': 'millisecond',
71
+ 's': 'second',
72
+ 'sec': 'second',
73
+ 'm': 'minute',
74
+ 'min': 'minute',
75
+ 'h': 'hour',
76
+ 'hr': 'hour',
77
+ 'd': 'day',
78
+ 'D': 'day',
79
+ }
80
+ MRSM_PRECISION_UNITS_ABBREVIATIONS: Dict[str, str] = {
81
+ 'nanosecond': 'ns',
82
+ 'microsecond': 'us',
83
+ 'millisecond': 'ms',
84
+ 'second': 's',
85
+ 'minute': 'min',
86
+ 'hour': 'hr',
87
+ 'day': 'D',
88
+ }
89
+
52
90
 
53
91
  def to_pandas_dtype(dtype: str) -> str:
54
92
  """
@@ -147,7 +185,7 @@ def are_dtypes_equal(
147
185
  if ldtype in json_dtypes and rdtype in json_dtypes:
148
186
  return True
149
187
 
150
- numeric_dtypes = ('numeric', 'object')
188
+ numeric_dtypes = ('numeric', 'decimal', 'object')
151
189
  if ldtype in numeric_dtypes and rdtype in numeric_dtypes:
152
190
  return True
153
191
 
@@ -155,7 +193,7 @@ def are_dtypes_equal(
155
193
  if ldtype in uuid_dtypes and rdtype in uuid_dtypes:
156
194
  return True
157
195
 
158
- bytes_dtypes = ('bytes', 'object')
196
+ bytes_dtypes = ('bytes', 'object', 'binary')
159
197
  if ldtype in bytes_dtypes and rdtype in bytes_dtypes:
160
198
  return True
161
199
 
@@ -179,7 +217,10 @@ def are_dtypes_equal(
179
217
  if ldtype in string_dtypes and rdtype in string_dtypes:
180
218
  return True
181
219
 
182
- int_dtypes = ('int', 'int64', 'int32', 'int16', 'int8')
220
+ int_dtypes = (
221
+ 'int', 'int64', 'int32', 'int16', 'int8',
222
+ 'uint', 'uint64', 'uint32', 'uint16', 'uint8',
223
+ )
183
224
  if ldtype.lower() in int_dtypes and rdtype.lower() in int_dtypes:
184
225
  return True
185
226
 
@@ -191,6 +232,13 @@ def are_dtypes_equal(
191
232
  if ldtype in bool_dtypes and rdtype in bool_dtypes:
192
233
  return True
193
234
 
235
+ date_dtypes = (
236
+ 'date', 'date32', 'date32[pyarrow]', 'date32[day][pyarrow]',
237
+ 'date64', 'date64[pyarrow]', 'date64[ms][pyarrow]',
238
+ )
239
+ if ldtype in date_dtypes and rdtype in date_dtypes:
240
+ return True
241
+
194
242
  return False
195
243
 
196
244
 
@@ -461,7 +509,6 @@ def coerce_timezone(
461
509
  return dt
462
510
 
463
511
  dt_is_series = hasattr(dt, 'dtype') and hasattr(dt, '__module__')
464
-
465
512
  if dt_is_series:
466
513
  pandas = mrsm.attempt_import('pandas', lazy=False)
467
514
 
@@ -497,15 +544,40 @@ def coerce_timezone(
497
544
  return utc_dt
498
545
 
499
546
 
500
- def to_datetime(dt_val: Any, as_pydatetime: bool = False, coerce_utc: bool = True) -> Any:
547
+ def to_datetime(
548
+ dt_val: Any,
549
+ as_pydatetime: bool = False,
550
+ coerce_utc: bool = True,
551
+ precision_unit: Optional[str] = None,
552
+ ) -> Any:
501
553
  """
502
554
  Wrap `pd.to_datetime()` and add support for out-of-bounds values.
555
+
556
+ Parameters
557
+ ----------
558
+ dt_val: Any
559
+ The value to coerce to Pandas Timestamps.
560
+
561
+ as_pydatetime: bool, default False
562
+ If `True`, return a Python datetime object.
563
+
564
+ coerce_utc: bool, default True
565
+ If `True`, ensure the value has UTC tzinfo.
566
+
567
+ precision_unit: Optional[str], default None
568
+ If provided, enforce the provided precision unit.
503
569
  """
504
570
  pandas, dateutil_parser = mrsm.attempt_import('pandas', 'dateutil.parser', lazy=False)
505
571
  is_dask = 'dask' in getattr(dt_val, '__module__', '')
506
572
  dd = mrsm.attempt_import('dask.dataframe') if is_dask else None
507
573
  dt_is_series = hasattr(dt_val, 'dtype') and hasattr(dt_val, '__module__')
508
574
  pd = pandas if dd is None else dd
575
+ enforce_precision = precision_unit is not None
576
+ precision_unit = precision_unit or 'microsecond'
577
+ true_precision_unit = MRSM_PRECISION_UNITS_ALIASES.get(precision_unit, precision_unit)
578
+ precision_abbreviation = MRSM_PRECISION_UNITS_ABBREVIATIONS.get(true_precision_unit, None)
579
+ if not precision_abbreviation:
580
+ raise ValueError(f"Invalid precision '{precision_unit}'.")
509
581
 
510
582
  def parse(x: Any) -> Any:
511
583
  try:
@@ -513,6 +585,25 @@ def to_datetime(dt_val: Any, as_pydatetime: bool = False, coerce_utc: bool = Tru
513
585
  except Exception:
514
586
  return x
515
587
 
588
+ def check_dtype(dtype_to_check: str, with_utc: bool = True) -> bool:
589
+ dtype_check_against = (
590
+ f"datetime64[{precision_abbreviation}, UTC]"
591
+ if with_utc
592
+ else f"datetime64[{precision_abbreviation}]"
593
+ )
594
+ return (
595
+ dtype_to_check == dtype_check_against
596
+ if enforce_precision
597
+ else (
598
+ dtype_to_check.startswith('datetime64[')
599
+ and (
600
+ ('utc' in dtype_to_check.lower())
601
+ if with_utc
602
+ else ('utc' not in dtype_to_check.lower())
603
+ )
604
+ )
605
+ )
606
+
516
607
  if isinstance(dt_val, pd.Timestamp):
517
608
  dt_val_to_return = dt_val if not as_pydatetime else dt_val.to_pydatetime()
518
609
  return (
@@ -525,7 +616,11 @@ def to_datetime(dt_val: Any, as_pydatetime: bool = False, coerce_utc: bool = Tru
525
616
  changed_tz = False
526
617
  original_tz = None
527
618
  dtype = str(getattr(dt_val, 'dtype', 'object'))
528
- if are_dtypes_equal(dtype, 'datetime') and 'utc' not in dtype.lower():
619
+ if (
620
+ are_dtypes_equal(dtype, 'datetime')
621
+ and 'utc' not in dtype.lower()
622
+ and hasattr(dt_val, 'dt')
623
+ ):
529
624
  original_tz = dt_val.dt.tz
530
625
  dt_val = dt_val.dt.tz_localize(timezone.utc)
531
626
  changed_tz = True
@@ -533,12 +628,14 @@ def to_datetime(dt_val: Any, as_pydatetime: bool = False, coerce_utc: bool = Tru
533
628
  try:
534
629
  new_dt_series = (
535
630
  dt_val
536
- if dtype == 'datetime64[ns, UTC]'
537
- else dt_val.astype("datetime64[ns, UTC]")
631
+ if check_dtype(dtype, with_utc=True)
632
+ else dt_val.astype(f"datetime64[{precision_abbreviation}, UTC]")
538
633
  )
539
634
  except pd.errors.OutOfBoundsDatetime:
540
635
  try:
541
- new_dt_series = dt_val.astype("datetime64[ms, UTC]")
636
+ next_precision = get_next_precision_unit(true_precision_unit)
637
+ next_precision_abbrevation = MRSM_PRECISION_UNITS_ABBREVIATIONS[next_precision]
638
+ new_dt_series = dt_val.astype(f"datetime64[{next_precision_abbrevation}, UTC]")
542
639
  except Exception:
543
640
  new_dt_series = None
544
641
  except ValueError:
@@ -547,8 +644,8 @@ def to_datetime(dt_val: Any, as_pydatetime: bool = False, coerce_utc: bool = Tru
547
644
  try:
548
645
  new_dt_series = (
549
646
  new_dt_series
550
- if str(getattr(new_dt_series, 'dtype', None)) == 'datetime64[ns]'
551
- else dt_val.astype("datetime64[ns]")
647
+ if check_dtype(str(getattr(new_dt_series, 'dtype', None)), with_utc=False)
648
+ else dt_val.astype(f"datetime64[{precision_abbreviation}]")
552
649
  )
553
650
  except Exception:
554
651
  new_dt_series = None
@@ -565,6 +662,8 @@ def to_datetime(dt_val: Any, as_pydatetime: bool = False, coerce_utc: bool = Tru
565
662
 
566
663
  try:
567
664
  new_dt_val = pd.to_datetime(dt_val, utc=True, format='ISO8601')
665
+ if new_dt_val.unit != precision_abbreviation:
666
+ new_dt_val = new_dt_val.as_unit(precision_abbreviation)
568
667
  if as_pydatetime:
569
668
  return new_dt_val.to_pydatetime()
570
669
  return new_dt_val
@@ -716,12 +815,20 @@ def serialize_datetime(dt: datetime) -> Union[str, None]:
716
815
  '{"a": "2022-01-01T00:00:00Z"}'
717
816
 
718
817
  """
719
- if not isinstance(dt, datetime):
818
+ if not hasattr(dt, 'isoformat'):
720
819
  return None
721
- tz_suffix = 'Z' if dt.tzinfo is None else ''
820
+
821
+ tz_suffix = 'Z' if getattr(dt, 'tzinfo', None) is None else ''
722
822
  return dt.isoformat() + tz_suffix
723
823
 
724
824
 
825
+ def serialize_date(d: date) -> Union[str, None]:
826
+ """
827
+ Serialize a date object into its ISO representation.
828
+ """
829
+ return d.isoformat() if hasattr(d, 'isoformat') else None
830
+
831
+
725
832
  def json_serialize_value(x: Any, default_to_str: bool = True) -> Union[str, None]:
726
833
  """
727
834
  Serialize the given value to a JSON value. Accounts for datetimes, bytes, decimals, etc.
@@ -745,6 +852,9 @@ def json_serialize_value(x: Any, default_to_str: bool = True) -> Union[str, None
745
852
  if hasattr(x, 'tzinfo'):
746
853
  return serialize_datetime(x)
747
854
 
855
+ if hasattr(x, 'isoformat'):
856
+ return serialize_date(x)
857
+
748
858
  if isinstance(x, bytes):
749
859
  return serialize_bytes(x)
750
860
 
@@ -846,3 +956,258 @@ def get_geometry_type_srid(
846
956
  break
847
957
 
848
958
  return geometry_type, srid
959
+
960
+
961
+ def get_current_timestamp(
962
+ precision_unit: str = _STATIC_CONFIG['dtypes']['datetime']['default_precision_unit'],
963
+ precision_interval: int = 1,
964
+ round_to: str = 'down',
965
+ as_pandas: bool = False,
966
+ as_int: bool = False,
967
+ _now: Union[datetime, int, None] = None,
968
+ ) -> 'Union[datetime, pd.Timestamp, int]':
969
+ """
970
+ Return the current UTC timestamp to nanosecond precision.
971
+
972
+ Parameters
973
+ ----------
974
+ precision_unit: str, default 'us'
975
+ The precision of the timestamp to be returned.
976
+ Valid values are the following:
977
+ - `ns` / `nanosecond`
978
+ - `us` / `microsecond`
979
+ - `ms` / `millisecond`
980
+ - `s` / `sec` / `second`
981
+ - `m` / `min` / `minute`
982
+ - `h` / `hr` / `hour`
983
+ - `d` / `day`
984
+
985
+ precision_interval: int, default 1
986
+ Round the timestamp to the `precision_interval` units.
987
+ For example, `precision='minute'` and `precision_interval=15` will round to 15-minute intervals.
988
+ Note: `precision_interval` must be 1 when `precision='nanosecond'`.
989
+
990
+ round_to: str, default 'down'
991
+ The direction to which to round the timestamp.
992
+ Available options are `down`, `up`, and `closest`.
993
+
994
+ as_pandas: bool, default False
995
+ If `True`, return a Pandas Timestamp.
996
+ This is always true if `unit` is `nanosecond`.
997
+
998
+ as_int: bool, default False
999
+ If `True`, return the timestamp to an integer.
1000
+ Overrides `as_pandas`.
1001
+
1002
+ Returns
1003
+ -------
1004
+ A Pandas Timestamp, datetime object, or integer with precision to the provided unit.
1005
+
1006
+ Examples
1007
+ --------
1008
+ >>> get_current_timestamp('ns')
1009
+ Timestamp('2025-07-17 17:59:16.423644369+0000', tz='UTC')
1010
+ >>> get_current_timestamp('ms')
1011
+ Timestamp('2025-07-17 17:59:16.424000+0000', tz='UTC')
1012
+ """
1013
+ true_precision_unit = MRSM_PRECISION_UNITS_ALIASES.get(precision_unit, precision_unit)
1014
+ if true_precision_unit not in MRSM_PRECISION_UNITS_SCALARS:
1015
+ from meerschaum.utils.misc import items_str
1016
+ raise ValueError(
1017
+ f"Unknown precision unit '{precision_unit}'. "
1018
+ "Accepted values are "
1019
+ f"{items_str(list(MRSM_PRECISION_UNITS_SCALARS) + list(MRSM_PRECISION_UNITS_ALIASES))}."
1020
+ )
1021
+
1022
+ if not as_int:
1023
+ as_pandas = as_pandas or true_precision_unit == 'nanosecond'
1024
+ pd = mrsm.attempt_import('pandas', lazy=False) if as_pandas else None
1025
+
1026
+ if true_precision_unit == 'nanosecond':
1027
+ if precision_interval != 1:
1028
+ warn("`precision_interval` must be 1 for nanosecond precision.")
1029
+ now_ts = time.time_ns() if not isinstance(_now, int) else _now
1030
+ if as_int:
1031
+ return now_ts
1032
+ return pd.to_datetime(now_ts, unit='ns', utc=True)
1033
+
1034
+ now = datetime.now(timezone.utc) if not isinstance(_now, datetime) else _now
1035
+ delta = timedelta(**{true_precision_unit + 's': precision_interval})
1036
+ rounded_now = round_time(now, delta, to=round_to)
1037
+
1038
+ if as_int:
1039
+ return int(rounded_now.timestamp() * MRSM_PRECISION_UNITS_SCALARS[true_precision_unit])
1040
+
1041
+ ts_val = (
1042
+ pd.to_datetime(rounded_now, utc=True)
1043
+ if as_pandas
1044
+ else rounded_now
1045
+ )
1046
+
1047
+ if not as_pandas:
1048
+ return ts_val
1049
+
1050
+ as_unit_precisions = ('microsecond', 'millisecond', 'second')
1051
+ if true_precision_unit not in as_unit_precisions:
1052
+ return ts_val
1053
+
1054
+ return ts_val.as_unit(MRSM_PRECISION_UNITS_ABBREVIATIONS[true_precision_unit])
1055
+
1056
+
1057
+ def dtype_is_special(type_: str) -> bool:
1058
+ """
1059
+ Return whether a dtype should be treated as a special Meerschaum dtype.
1060
+ This is not the same as a Meerschaum alias.
1061
+ """
1062
+ true_type = MRSM_ALIAS_DTYPES.get(type_, type_)
1063
+ if true_type in (
1064
+ 'uuid',
1065
+ 'json',
1066
+ 'bytes',
1067
+ 'numeric',
1068
+ 'datetime',
1069
+ 'geometry',
1070
+ 'geography',
1071
+ 'date',
1072
+ ):
1073
+ return True
1074
+
1075
+ if are_dtypes_equal(true_type, 'datetime'):
1076
+ return True
1077
+
1078
+ if are_dtypes_equal(true_type, 'date'):
1079
+ return True
1080
+
1081
+ if true_type.startswith('numeric'):
1082
+ return True
1083
+
1084
+ if true_type.startswith('geometry'):
1085
+ return True
1086
+
1087
+ if true_type.startswith('geography'):
1088
+ return True
1089
+
1090
+ return False
1091
+
1092
+
1093
+ def get_next_precision_unit(precision_unit: str, decrease: bool = True) -> str:
1094
+ """
1095
+ Get the next precision string in order of value.
1096
+
1097
+ Parameters
1098
+ ----------
1099
+ precision_unit: str
1100
+ The precision string (`'nanosecond'`, `'ms'`, etc.).
1101
+
1102
+ decrease: bool, defaul True
1103
+ If `True` return the precision unit which is lower (e.g. `nanosecond` -> `millisecond`).
1104
+ If `False`, return the precision unit which is higher.
1105
+
1106
+ Returns
1107
+ -------
1108
+ A `precision` string which is lower or higher than the given precision unit.
1109
+
1110
+ Examples
1111
+ --------
1112
+ >>> get_next_precision_unit('nanosecond')
1113
+ 'microsecond'
1114
+ >>> get_next_precision_unit('ms')
1115
+ 'second'
1116
+ >>> get_next_precision_unit('hour', decrease=False)
1117
+ 'minute'
1118
+ """
1119
+ true_precision_unit = MRSM_PRECISION_UNITS_ALIASES.get(precision_unit, precision_unit)
1120
+ precision_scalar = MRSM_PRECISION_UNITS_SCALARS.get(true_precision_unit, None)
1121
+ if not precision_scalar:
1122
+ raise ValueError(f"Invalid precision unit '{precision_unit}'.")
1123
+
1124
+ precisions = sorted(
1125
+ list(MRSM_PRECISION_UNITS_SCALARS),
1126
+ key=lambda p: MRSM_PRECISION_UNITS_SCALARS[p]
1127
+ )
1128
+
1129
+ precision_index = precisions.index(true_precision_unit)
1130
+ new_precision_index = precision_index + (-1 if decrease else 1)
1131
+ if new_precision_index < 0 or new_precision_index >= len(precisions):
1132
+ raise ValueError(f"No precision {'below' if decrease else 'above'} '{precision_unit}'.")
1133
+
1134
+ return precisions[new_precision_index]
1135
+
1136
+
1137
+ def round_time(
1138
+ dt: Optional[datetime] = None,
1139
+ date_delta: Optional[timedelta] = None,
1140
+ to: 'str' = 'down'
1141
+ ) -> datetime:
1142
+ """
1143
+ Round a datetime object to a multiple of a timedelta.
1144
+
1145
+ Parameters
1146
+ ----------
1147
+ dt: Optional[datetime], default None
1148
+ If `None`, grab the current UTC datetime.
1149
+
1150
+ date_delta: Optional[timedelta], default None
1151
+ If `None`, use a delta of 1 minute.
1152
+
1153
+ to: 'str', default 'down'
1154
+ Available options are `'up'`, `'down'`, and `'closest'`.
1155
+
1156
+ Returns
1157
+ -------
1158
+ A rounded `datetime` object.
1159
+
1160
+ Examples
1161
+ --------
1162
+ >>> round_time(datetime(2022, 1, 1, 12, 15, 57, 200))
1163
+ datetime.datetime(2022, 1, 1, 12, 15)
1164
+ >>> round_time(datetime(2022, 1, 1, 12, 15, 57, 200), to='up')
1165
+ datetime.datetime(2022, 1, 1, 12, 16)
1166
+ >>> round_time(datetime(2022, 1, 1, 12, 15, 57, 200), timedelta(hours=1))
1167
+ datetime.datetime(2022, 1, 1, 12, 0)
1168
+ >>> round_time(
1169
+ ... datetime(2022, 1, 1, 12, 15, 57, 200),
1170
+ ... timedelta(hours=1),
1171
+ ... to = 'closest'
1172
+ ... )
1173
+ datetime.datetime(2022, 1, 1, 12, 0)
1174
+ >>> round_time(
1175
+ ... datetime(2022, 1, 1, 12, 45, 57, 200),
1176
+ ... datetime.timedelta(hours=1),
1177
+ ... to = 'closest'
1178
+ ... )
1179
+ datetime.datetime(2022, 1, 1, 13, 0)
1180
+
1181
+ """
1182
+ from decimal import Decimal, ROUND_HALF_UP, ROUND_DOWN, ROUND_UP
1183
+ if date_delta is None:
1184
+ date_delta = timedelta(minutes=1)
1185
+
1186
+ if dt is None:
1187
+ dt = datetime.now(timezone.utc).replace(tzinfo=None)
1188
+
1189
+ def get_total_microseconds(td: timedelta) -> int:
1190
+ return (td.days * 86400 + td.seconds) * 1_000_000 + td.microseconds
1191
+
1192
+ round_to_microseconds = get_total_microseconds(date_delta)
1193
+ if round_to_microseconds == 0:
1194
+ return dt
1195
+
1196
+ dt_delta_from_min = dt.replace(tzinfo=None) - datetime.min
1197
+ dt_total_microseconds = get_total_microseconds(dt_delta_from_min)
1198
+
1199
+ dt_dec = Decimal(dt_total_microseconds)
1200
+ round_to_dec = Decimal(round_to_microseconds)
1201
+
1202
+ div = dt_dec / round_to_dec
1203
+ if to == 'down':
1204
+ num_intervals = div.to_integral_value(rounding=ROUND_DOWN)
1205
+ elif to == 'up':
1206
+ num_intervals = div.to_integral_value(rounding=ROUND_UP)
1207
+ else:
1208
+ num_intervals = div.to_integral_value(rounding=ROUND_HALF_UP)
1209
+
1210
+ rounded_dt_total_microseconds = num_intervals * round_to_dec
1211
+ adjustment_microseconds = int(rounded_dt_total_microseconds) - dt_total_microseconds
1212
+
1213
+ return dt + timedelta(microseconds=adjustment_microseconds)