meerschaum 3.0.0rc1__py3-none-any.whl → 3.0.0rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/_internal/arguments/_parser.py +2 -1
- meerschaum/_internal/docs/index.py +49 -2
- meerschaum/_internal/shell/Shell.py +5 -4
- meerschaum/_internal/static.py +8 -24
- meerschaum/actions/bootstrap.py +1 -1
- meerschaum/actions/edit.py +6 -3
- meerschaum/actions/start.py +1 -1
- meerschaum/actions/verify.py +5 -8
- meerschaum/api/__init__.py +2 -1
- meerschaum/api/dash/__init__.py +0 -2
- meerschaum/api/dash/callbacks/__init__.py +1 -0
- meerschaum/api/dash/callbacks/dashboard.py +20 -19
- meerschaum/api/dash/callbacks/jobs.py +11 -5
- meerschaum/api/dash/callbacks/pipes.py +106 -5
- meerschaum/api/dash/callbacks/settings/__init__.py +0 -1
- meerschaum/api/dash/callbacks/{settings/tokens.py → tokens.py} +1 -1
- meerschaum/api/dash/jobs.py +1 -1
- meerschaum/api/dash/pages/__init__.py +2 -1
- meerschaum/api/dash/pages/{job.py → jobs.py} +10 -7
- meerschaum/api/dash/pages/pipes.py +4 -3
- meerschaum/api/dash/pages/settings/__init__.py +0 -1
- meerschaum/api/dash/pages/{settings/tokens.py → tokens.py} +6 -8
- meerschaum/api/dash/pipes.py +131 -0
- meerschaum/api/dash/tokens.py +28 -31
- meerschaum/api/routes/_pipes.py +47 -37
- meerschaum/config/_default.py +13 -2
- meerschaum/config/_paths.py +1 -0
- meerschaum/config/_version.py +1 -1
- meerschaum/config/stack/__init__.py +9 -8
- meerschaum/connectors/api/_pipes.py +2 -18
- meerschaum/connectors/api/_tokens.py +2 -2
- meerschaum/connectors/instance/_tokens.py +10 -6
- meerschaum/connectors/sql/_SQLConnector.py +14 -0
- meerschaum/connectors/sql/_create_engine.py +3 -14
- meerschaum/connectors/sql/_pipes.py +175 -185
- meerschaum/connectors/sql/_sql.py +38 -20
- meerschaum/connectors/sql/tables/__init__.py +237 -122
- meerschaum/connectors/valkey/_pipes.py +44 -16
- meerschaum/core/Pipe/__init__.py +28 -5
- meerschaum/core/Pipe/_attributes.py +273 -46
- meerschaum/core/Pipe/_data.py +55 -17
- meerschaum/core/Pipe/_dtypes.py +19 -4
- meerschaum/core/Pipe/_edit.py +2 -0
- meerschaum/core/Pipe/_fetch.py +1 -1
- meerschaum/core/Pipe/_sync.py +90 -160
- meerschaum/core/Pipe/_verify.py +3 -3
- meerschaum/core/Token/_Token.py +4 -5
- meerschaum/plugins/bootstrap.py +508 -3
- meerschaum/utils/_get_pipes.py +1 -1
- meerschaum/utils/dataframe.py +385 -68
- meerschaum/utils/debug.py +15 -15
- meerschaum/utils/dtypes/__init__.py +387 -22
- meerschaum/utils/dtypes/sql.py +327 -31
- meerschaum/utils/misc.py +9 -68
- meerschaum/utils/packages/__init__.py +7 -21
- meerschaum/utils/packages/_packages.py +7 -2
- meerschaum/utils/schedule.py +1 -1
- meerschaum/utils/sql.py +8 -8
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/METADATA +5 -17
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/RECORD +66 -65
- meerschaum-3.0.0rc3.dist-info/licenses/NOTICE +2 -0
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/WHEEL +0 -0
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/entry_points.txt +0 -0
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/licenses/LICENSE +0 -0
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/top_level.txt +0 -0
- {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/zip-safe +0 -0
meerschaum/utils/debug.py
CHANGED
@@ -10,16 +10,16 @@ from __future__ import annotations
|
|
10
10
|
from meerschaum.utils.typing import Union, Optional, List
|
11
11
|
|
12
12
|
def dprint(
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
13
|
+
msg: str,
|
14
|
+
leader: bool = True,
|
15
|
+
package: bool = True,
|
16
|
+
color: Optional[Union[str, List[str]]] = None,
|
17
|
+
attrs: Optional[List[str]] = None,
|
18
|
+
nopretty: bool = False,
|
19
|
+
_progress: Optional['rich.progress.Progress'] = None,
|
20
|
+
_task: Optional[int] = None,
|
21
|
+
**kw
|
22
|
+
) -> None:
|
23
23
|
"""Print a debug message."""
|
24
24
|
if attrs is None:
|
25
25
|
attrs = []
|
@@ -86,11 +86,11 @@ def dprint(
|
|
86
86
|
|
87
87
|
|
88
88
|
def _checkpoint(
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
89
|
+
_progress: Optional['rich.progress.Progress'] = None,
|
90
|
+
_task: Optional[int] = None,
|
91
|
+
_total: Optional[int] = None,
|
92
|
+
**kw
|
93
|
+
) -> None:
|
94
94
|
"""If the `_progress` and `_task` objects are provided, increment the task by one step.
|
95
95
|
If `_total` is provided, update the total instead.
|
96
96
|
"""
|
@@ -9,12 +9,14 @@ Utility functions for working with data types.
|
|
9
9
|
import traceback
|
10
10
|
import json
|
11
11
|
import uuid
|
12
|
-
|
12
|
+
import time
|
13
|
+
from datetime import timezone, datetime, date, timedelta
|
13
14
|
from decimal import Decimal, Context, InvalidOperation, ROUND_HALF_UP
|
14
15
|
|
15
16
|
import meerschaum as mrsm
|
16
17
|
from meerschaum.utils.typing import Dict, Union, Any, Optional, Tuple
|
17
18
|
from meerschaum.utils.warnings import warn
|
19
|
+
from meerschaum._internal.static import STATIC_CONFIG as _STATIC_CONFIG
|
18
20
|
|
19
21
|
MRSM_ALIAS_DTYPES: Dict[str, str] = {
|
20
22
|
'decimal': 'numeric',
|
@@ -30,6 +32,8 @@ MRSM_ALIAS_DTYPES: Dict[str, str] = {
|
|
30
32
|
'UUID': 'uuid',
|
31
33
|
'geom': 'geometry',
|
32
34
|
'geog': 'geography',
|
35
|
+
'boolean': 'bool',
|
36
|
+
'day': 'date',
|
33
37
|
}
|
34
38
|
MRSM_PD_DTYPES: Dict[Union[str, None], str] = {
|
35
39
|
'json': 'object',
|
@@ -37,18 +41,52 @@ MRSM_PD_DTYPES: Dict[Union[str, None], str] = {
|
|
37
41
|
'geometry': 'object',
|
38
42
|
'geography': 'object',
|
39
43
|
'uuid': 'object',
|
40
|
-
'
|
44
|
+
'date': 'date32[day][pyarrow]',
|
45
|
+
'datetime': 'datetime64[us, UTC]',
|
41
46
|
'bool': 'bool[pyarrow]',
|
42
|
-
'int': '
|
43
|
-
'int8': '
|
44
|
-
'int16': '
|
45
|
-
'int32': '
|
46
|
-
'int64': '
|
47
|
-
'str': 'string
|
48
|
-
'bytes': '
|
47
|
+
'int': 'int64[pyarrow]',
|
48
|
+
'int8': 'int8[pyarrow]',
|
49
|
+
'int16': 'int16[pyarrow]',
|
50
|
+
'int32': 'int32[pyarrow]',
|
51
|
+
'int64': 'int64[pyarrow]',
|
52
|
+
'str': 'string',
|
53
|
+
'bytes': 'binary[pyarrow]',
|
49
54
|
None: 'object',
|
50
55
|
}
|
51
56
|
|
57
|
+
MRSM_PRECISION_UNITS_SCALARS: Dict[str, Union[int, float]] = {
|
58
|
+
'nanosecond': 1_000_000_000,
|
59
|
+
'microsecond': 1_000_000,
|
60
|
+
'millisecond': 1000,
|
61
|
+
'second': 1,
|
62
|
+
'minute': (1 / 60),
|
63
|
+
'hour': (1 / 3600),
|
64
|
+
'day': (1 / 86400),
|
65
|
+
}
|
66
|
+
|
67
|
+
MRSM_PRECISION_UNITS_ALIASES: Dict[str, str] = {
|
68
|
+
'ns': 'nanosecond',
|
69
|
+
'us': 'microsecond',
|
70
|
+
'ms': 'millisecond',
|
71
|
+
's': 'second',
|
72
|
+
'sec': 'second',
|
73
|
+
'm': 'minute',
|
74
|
+
'min': 'minute',
|
75
|
+
'h': 'hour',
|
76
|
+
'hr': 'hour',
|
77
|
+
'd': 'day',
|
78
|
+
'D': 'day',
|
79
|
+
}
|
80
|
+
MRSM_PRECISION_UNITS_ABBREVIATIONS: Dict[str, str] = {
|
81
|
+
'nanosecond': 'ns',
|
82
|
+
'microsecond': 'us',
|
83
|
+
'millisecond': 'ms',
|
84
|
+
'second': 's',
|
85
|
+
'minute': 'min',
|
86
|
+
'hour': 'hr',
|
87
|
+
'day': 'D',
|
88
|
+
}
|
89
|
+
|
52
90
|
|
53
91
|
def to_pandas_dtype(dtype: str) -> str:
|
54
92
|
"""
|
@@ -147,7 +185,7 @@ def are_dtypes_equal(
|
|
147
185
|
if ldtype in json_dtypes and rdtype in json_dtypes:
|
148
186
|
return True
|
149
187
|
|
150
|
-
numeric_dtypes = ('numeric', 'object')
|
188
|
+
numeric_dtypes = ('numeric', 'decimal', 'object')
|
151
189
|
if ldtype in numeric_dtypes and rdtype in numeric_dtypes:
|
152
190
|
return True
|
153
191
|
|
@@ -155,7 +193,7 @@ def are_dtypes_equal(
|
|
155
193
|
if ldtype in uuid_dtypes and rdtype in uuid_dtypes:
|
156
194
|
return True
|
157
195
|
|
158
|
-
bytes_dtypes = ('bytes', 'object')
|
196
|
+
bytes_dtypes = ('bytes', 'object', 'binary')
|
159
197
|
if ldtype in bytes_dtypes and rdtype in bytes_dtypes:
|
160
198
|
return True
|
161
199
|
|
@@ -179,7 +217,10 @@ def are_dtypes_equal(
|
|
179
217
|
if ldtype in string_dtypes and rdtype in string_dtypes:
|
180
218
|
return True
|
181
219
|
|
182
|
-
int_dtypes = (
|
220
|
+
int_dtypes = (
|
221
|
+
'int', 'int64', 'int32', 'int16', 'int8',
|
222
|
+
'uint', 'uint64', 'uint32', 'uint16', 'uint8',
|
223
|
+
)
|
183
224
|
if ldtype.lower() in int_dtypes and rdtype.lower() in int_dtypes:
|
184
225
|
return True
|
185
226
|
|
@@ -191,6 +232,13 @@ def are_dtypes_equal(
|
|
191
232
|
if ldtype in bool_dtypes and rdtype in bool_dtypes:
|
192
233
|
return True
|
193
234
|
|
235
|
+
date_dtypes = (
|
236
|
+
'date', 'date32', 'date32[pyarrow]', 'date32[day][pyarrow]',
|
237
|
+
'date64', 'date64[pyarrow]', 'date64[ms][pyarrow]',
|
238
|
+
)
|
239
|
+
if ldtype in date_dtypes and rdtype in date_dtypes:
|
240
|
+
return True
|
241
|
+
|
194
242
|
return False
|
195
243
|
|
196
244
|
|
@@ -461,7 +509,6 @@ def coerce_timezone(
|
|
461
509
|
return dt
|
462
510
|
|
463
511
|
dt_is_series = hasattr(dt, 'dtype') and hasattr(dt, '__module__')
|
464
|
-
|
465
512
|
if dt_is_series:
|
466
513
|
pandas = mrsm.attempt_import('pandas', lazy=False)
|
467
514
|
|
@@ -497,15 +544,40 @@ def coerce_timezone(
|
|
497
544
|
return utc_dt
|
498
545
|
|
499
546
|
|
500
|
-
def to_datetime(
|
547
|
+
def to_datetime(
|
548
|
+
dt_val: Any,
|
549
|
+
as_pydatetime: bool = False,
|
550
|
+
coerce_utc: bool = True,
|
551
|
+
precision_unit: Optional[str] = None,
|
552
|
+
) -> Any:
|
501
553
|
"""
|
502
554
|
Wrap `pd.to_datetime()` and add support for out-of-bounds values.
|
555
|
+
|
556
|
+
Parameters
|
557
|
+
----------
|
558
|
+
dt_val: Any
|
559
|
+
The value to coerce to Pandas Timestamps.
|
560
|
+
|
561
|
+
as_pydatetime: bool, default False
|
562
|
+
If `True`, return a Python datetime object.
|
563
|
+
|
564
|
+
coerce_utc: bool, default True
|
565
|
+
If `True`, ensure the value has UTC tzinfo.
|
566
|
+
|
567
|
+
precision_unit: Optional[str], default None
|
568
|
+
If provided, enforce the provided precision unit.
|
503
569
|
"""
|
504
570
|
pandas, dateutil_parser = mrsm.attempt_import('pandas', 'dateutil.parser', lazy=False)
|
505
571
|
is_dask = 'dask' in getattr(dt_val, '__module__', '')
|
506
572
|
dd = mrsm.attempt_import('dask.dataframe') if is_dask else None
|
507
573
|
dt_is_series = hasattr(dt_val, 'dtype') and hasattr(dt_val, '__module__')
|
508
574
|
pd = pandas if dd is None else dd
|
575
|
+
enforce_precision = precision_unit is not None
|
576
|
+
precision_unit = precision_unit or 'microsecond'
|
577
|
+
true_precision_unit = MRSM_PRECISION_UNITS_ALIASES.get(precision_unit, precision_unit)
|
578
|
+
precision_abbreviation = MRSM_PRECISION_UNITS_ABBREVIATIONS.get(true_precision_unit, None)
|
579
|
+
if not precision_abbreviation:
|
580
|
+
raise ValueError(f"Invalid precision '{precision_unit}'.")
|
509
581
|
|
510
582
|
def parse(x: Any) -> Any:
|
511
583
|
try:
|
@@ -513,6 +585,25 @@ def to_datetime(dt_val: Any, as_pydatetime: bool = False, coerce_utc: bool = Tru
|
|
513
585
|
except Exception:
|
514
586
|
return x
|
515
587
|
|
588
|
+
def check_dtype(dtype_to_check: str, with_utc: bool = True) -> bool:
|
589
|
+
dtype_check_against = (
|
590
|
+
f"datetime64[{precision_abbreviation}, UTC]"
|
591
|
+
if with_utc
|
592
|
+
else f"datetime64[{precision_abbreviation}]"
|
593
|
+
)
|
594
|
+
return (
|
595
|
+
dtype_to_check == dtype_check_against
|
596
|
+
if enforce_precision
|
597
|
+
else (
|
598
|
+
dtype_to_check.startswith('datetime64[')
|
599
|
+
and (
|
600
|
+
('utc' in dtype_to_check.lower())
|
601
|
+
if with_utc
|
602
|
+
else ('utc' not in dtype_to_check.lower())
|
603
|
+
)
|
604
|
+
)
|
605
|
+
)
|
606
|
+
|
516
607
|
if isinstance(dt_val, pd.Timestamp):
|
517
608
|
dt_val_to_return = dt_val if not as_pydatetime else dt_val.to_pydatetime()
|
518
609
|
return (
|
@@ -525,7 +616,11 @@ def to_datetime(dt_val: Any, as_pydatetime: bool = False, coerce_utc: bool = Tru
|
|
525
616
|
changed_tz = False
|
526
617
|
original_tz = None
|
527
618
|
dtype = str(getattr(dt_val, 'dtype', 'object'))
|
528
|
-
if
|
619
|
+
if (
|
620
|
+
are_dtypes_equal(dtype, 'datetime')
|
621
|
+
and 'utc' not in dtype.lower()
|
622
|
+
and hasattr(dt_val, 'dt')
|
623
|
+
):
|
529
624
|
original_tz = dt_val.dt.tz
|
530
625
|
dt_val = dt_val.dt.tz_localize(timezone.utc)
|
531
626
|
changed_tz = True
|
@@ -533,12 +628,14 @@ def to_datetime(dt_val: Any, as_pydatetime: bool = False, coerce_utc: bool = Tru
|
|
533
628
|
try:
|
534
629
|
new_dt_series = (
|
535
630
|
dt_val
|
536
|
-
if dtype
|
537
|
-
else dt_val.astype("datetime64[
|
631
|
+
if check_dtype(dtype, with_utc=True)
|
632
|
+
else dt_val.astype(f"datetime64[{precision_abbreviation}, UTC]")
|
538
633
|
)
|
539
634
|
except pd.errors.OutOfBoundsDatetime:
|
540
635
|
try:
|
541
|
-
|
636
|
+
next_precision = get_next_precision_unit(true_precision_unit)
|
637
|
+
next_precision_abbrevation = MRSM_PRECISION_UNITS_ABBREVIATIONS[next_precision]
|
638
|
+
new_dt_series = dt_val.astype(f"datetime64[{next_precision_abbrevation}, UTC]")
|
542
639
|
except Exception:
|
543
640
|
new_dt_series = None
|
544
641
|
except ValueError:
|
@@ -547,8 +644,8 @@ def to_datetime(dt_val: Any, as_pydatetime: bool = False, coerce_utc: bool = Tru
|
|
547
644
|
try:
|
548
645
|
new_dt_series = (
|
549
646
|
new_dt_series
|
550
|
-
if str(getattr(new_dt_series, 'dtype', None))
|
551
|
-
else dt_val.astype("datetime64[
|
647
|
+
if check_dtype(str(getattr(new_dt_series, 'dtype', None)), with_utc=False)
|
648
|
+
else dt_val.astype(f"datetime64[{precision_abbreviation}]")
|
552
649
|
)
|
553
650
|
except Exception:
|
554
651
|
new_dt_series = None
|
@@ -565,6 +662,8 @@ def to_datetime(dt_val: Any, as_pydatetime: bool = False, coerce_utc: bool = Tru
|
|
565
662
|
|
566
663
|
try:
|
567
664
|
new_dt_val = pd.to_datetime(dt_val, utc=True, format='ISO8601')
|
665
|
+
if new_dt_val.unit != precision_abbreviation:
|
666
|
+
new_dt_val = new_dt_val.as_unit(precision_abbreviation)
|
568
667
|
if as_pydatetime:
|
569
668
|
return new_dt_val.to_pydatetime()
|
570
669
|
return new_dt_val
|
@@ -716,12 +815,20 @@ def serialize_datetime(dt: datetime) -> Union[str, None]:
|
|
716
815
|
'{"a": "2022-01-01T00:00:00Z"}'
|
717
816
|
|
718
817
|
"""
|
719
|
-
if not
|
818
|
+
if not hasattr(dt, 'isoformat'):
|
720
819
|
return None
|
721
|
-
|
820
|
+
|
821
|
+
tz_suffix = 'Z' if getattr(dt, 'tzinfo', None) is None else ''
|
722
822
|
return dt.isoformat() + tz_suffix
|
723
823
|
|
724
824
|
|
825
|
+
def serialize_date(d: date) -> Union[str, None]:
|
826
|
+
"""
|
827
|
+
Serialize a date object into its ISO representation.
|
828
|
+
"""
|
829
|
+
return d.isoformat() if hasattr(d, 'isoformat') else None
|
830
|
+
|
831
|
+
|
725
832
|
def json_serialize_value(x: Any, default_to_str: bool = True) -> Union[str, None]:
|
726
833
|
"""
|
727
834
|
Serialize the given value to a JSON value. Accounts for datetimes, bytes, decimals, etc.
|
@@ -745,6 +852,9 @@ def json_serialize_value(x: Any, default_to_str: bool = True) -> Union[str, None
|
|
745
852
|
if hasattr(x, 'tzinfo'):
|
746
853
|
return serialize_datetime(x)
|
747
854
|
|
855
|
+
if hasattr(x, 'isoformat'):
|
856
|
+
return serialize_date(x)
|
857
|
+
|
748
858
|
if isinstance(x, bytes):
|
749
859
|
return serialize_bytes(x)
|
750
860
|
|
@@ -846,3 +956,258 @@ def get_geometry_type_srid(
|
|
846
956
|
break
|
847
957
|
|
848
958
|
return geometry_type, srid
|
959
|
+
|
960
|
+
|
961
|
+
def get_current_timestamp(
|
962
|
+
precision_unit: str = _STATIC_CONFIG['dtypes']['datetime']['default_precision_unit'],
|
963
|
+
precision_interval: int = 1,
|
964
|
+
round_to: str = 'down',
|
965
|
+
as_pandas: bool = False,
|
966
|
+
as_int: bool = False,
|
967
|
+
_now: Union[datetime, int, None] = None,
|
968
|
+
) -> 'Union[datetime, pd.Timestamp, int]':
|
969
|
+
"""
|
970
|
+
Return the current UTC timestamp to nanosecond precision.
|
971
|
+
|
972
|
+
Parameters
|
973
|
+
----------
|
974
|
+
precision_unit: str, default 'us'
|
975
|
+
The precision of the timestamp to be returned.
|
976
|
+
Valid values are the following:
|
977
|
+
- `ns` / `nanosecond`
|
978
|
+
- `us` / `microsecond`
|
979
|
+
- `ms` / `millisecond`
|
980
|
+
- `s` / `sec` / `second`
|
981
|
+
- `m` / `min` / `minute`
|
982
|
+
- `h` / `hr` / `hour`
|
983
|
+
- `d` / `day`
|
984
|
+
|
985
|
+
precision_interval: int, default 1
|
986
|
+
Round the timestamp to the `precision_interval` units.
|
987
|
+
For example, `precision='minute'` and `precision_interval=15` will round to 15-minute intervals.
|
988
|
+
Note: `precision_interval` must be 1 when `precision='nanosecond'`.
|
989
|
+
|
990
|
+
round_to: str, default 'down'
|
991
|
+
The direction to which to round the timestamp.
|
992
|
+
Available options are `down`, `up`, and `closest`.
|
993
|
+
|
994
|
+
as_pandas: bool, default False
|
995
|
+
If `True`, return a Pandas Timestamp.
|
996
|
+
This is always true if `unit` is `nanosecond`.
|
997
|
+
|
998
|
+
as_int: bool, default False
|
999
|
+
If `True`, return the timestamp to an integer.
|
1000
|
+
Overrides `as_pandas`.
|
1001
|
+
|
1002
|
+
Returns
|
1003
|
+
-------
|
1004
|
+
A Pandas Timestamp, datetime object, or integer with precision to the provided unit.
|
1005
|
+
|
1006
|
+
Examples
|
1007
|
+
--------
|
1008
|
+
>>> get_current_timestamp('ns')
|
1009
|
+
Timestamp('2025-07-17 17:59:16.423644369+0000', tz='UTC')
|
1010
|
+
>>> get_current_timestamp('ms')
|
1011
|
+
Timestamp('2025-07-17 17:59:16.424000+0000', tz='UTC')
|
1012
|
+
"""
|
1013
|
+
true_precision_unit = MRSM_PRECISION_UNITS_ALIASES.get(precision_unit, precision_unit)
|
1014
|
+
if true_precision_unit not in MRSM_PRECISION_UNITS_SCALARS:
|
1015
|
+
from meerschaum.utils.misc import items_str
|
1016
|
+
raise ValueError(
|
1017
|
+
f"Unknown precision unit '{precision_unit}'. "
|
1018
|
+
"Accepted values are "
|
1019
|
+
f"{items_str(list(MRSM_PRECISION_UNITS_SCALARS) + list(MRSM_PRECISION_UNITS_ALIASES))}."
|
1020
|
+
)
|
1021
|
+
|
1022
|
+
if not as_int:
|
1023
|
+
as_pandas = as_pandas or true_precision_unit == 'nanosecond'
|
1024
|
+
pd = mrsm.attempt_import('pandas', lazy=False) if as_pandas else None
|
1025
|
+
|
1026
|
+
if true_precision_unit == 'nanosecond':
|
1027
|
+
if precision_interval != 1:
|
1028
|
+
warn("`precision_interval` must be 1 for nanosecond precision.")
|
1029
|
+
now_ts = time.time_ns() if not isinstance(_now, int) else _now
|
1030
|
+
if as_int:
|
1031
|
+
return now_ts
|
1032
|
+
return pd.to_datetime(now_ts, unit='ns', utc=True)
|
1033
|
+
|
1034
|
+
now = datetime.now(timezone.utc) if not isinstance(_now, datetime) else _now
|
1035
|
+
delta = timedelta(**{true_precision_unit + 's': precision_interval})
|
1036
|
+
rounded_now = round_time(now, delta, to=round_to)
|
1037
|
+
|
1038
|
+
if as_int:
|
1039
|
+
return int(rounded_now.timestamp() * MRSM_PRECISION_UNITS_SCALARS[true_precision_unit])
|
1040
|
+
|
1041
|
+
ts_val = (
|
1042
|
+
pd.to_datetime(rounded_now, utc=True)
|
1043
|
+
if as_pandas
|
1044
|
+
else rounded_now
|
1045
|
+
)
|
1046
|
+
|
1047
|
+
if not as_pandas:
|
1048
|
+
return ts_val
|
1049
|
+
|
1050
|
+
as_unit_precisions = ('microsecond', 'millisecond', 'second')
|
1051
|
+
if true_precision_unit not in as_unit_precisions:
|
1052
|
+
return ts_val
|
1053
|
+
|
1054
|
+
return ts_val.as_unit(MRSM_PRECISION_UNITS_ABBREVIATIONS[true_precision_unit])
|
1055
|
+
|
1056
|
+
|
1057
|
+
def dtype_is_special(type_: str) -> bool:
|
1058
|
+
"""
|
1059
|
+
Return whether a dtype should be treated as a special Meerschaum dtype.
|
1060
|
+
This is not the same as a Meerschaum alias.
|
1061
|
+
"""
|
1062
|
+
true_type = MRSM_ALIAS_DTYPES.get(type_, type_)
|
1063
|
+
if true_type in (
|
1064
|
+
'uuid',
|
1065
|
+
'json',
|
1066
|
+
'bytes',
|
1067
|
+
'numeric',
|
1068
|
+
'datetime',
|
1069
|
+
'geometry',
|
1070
|
+
'geography',
|
1071
|
+
'date',
|
1072
|
+
):
|
1073
|
+
return True
|
1074
|
+
|
1075
|
+
if are_dtypes_equal(true_type, 'datetime'):
|
1076
|
+
return True
|
1077
|
+
|
1078
|
+
if are_dtypes_equal(true_type, 'date'):
|
1079
|
+
return True
|
1080
|
+
|
1081
|
+
if true_type.startswith('numeric'):
|
1082
|
+
return True
|
1083
|
+
|
1084
|
+
if true_type.startswith('geometry'):
|
1085
|
+
return True
|
1086
|
+
|
1087
|
+
if true_type.startswith('geography'):
|
1088
|
+
return True
|
1089
|
+
|
1090
|
+
return False
|
1091
|
+
|
1092
|
+
|
1093
|
+
def get_next_precision_unit(precision_unit: str, decrease: bool = True) -> str:
|
1094
|
+
"""
|
1095
|
+
Get the next precision string in order of value.
|
1096
|
+
|
1097
|
+
Parameters
|
1098
|
+
----------
|
1099
|
+
precision_unit: str
|
1100
|
+
The precision string (`'nanosecond'`, `'ms'`, etc.).
|
1101
|
+
|
1102
|
+
decrease: bool, defaul True
|
1103
|
+
If `True` return the precision unit which is lower (e.g. `nanosecond` -> `millisecond`).
|
1104
|
+
If `False`, return the precision unit which is higher.
|
1105
|
+
|
1106
|
+
Returns
|
1107
|
+
-------
|
1108
|
+
A `precision` string which is lower or higher than the given precision unit.
|
1109
|
+
|
1110
|
+
Examples
|
1111
|
+
--------
|
1112
|
+
>>> get_next_precision_unit('nanosecond')
|
1113
|
+
'microsecond'
|
1114
|
+
>>> get_next_precision_unit('ms')
|
1115
|
+
'second'
|
1116
|
+
>>> get_next_precision_unit('hour', decrease=False)
|
1117
|
+
'minute'
|
1118
|
+
"""
|
1119
|
+
true_precision_unit = MRSM_PRECISION_UNITS_ALIASES.get(precision_unit, precision_unit)
|
1120
|
+
precision_scalar = MRSM_PRECISION_UNITS_SCALARS.get(true_precision_unit, None)
|
1121
|
+
if not precision_scalar:
|
1122
|
+
raise ValueError(f"Invalid precision unit '{precision_unit}'.")
|
1123
|
+
|
1124
|
+
precisions = sorted(
|
1125
|
+
list(MRSM_PRECISION_UNITS_SCALARS),
|
1126
|
+
key=lambda p: MRSM_PRECISION_UNITS_SCALARS[p]
|
1127
|
+
)
|
1128
|
+
|
1129
|
+
precision_index = precisions.index(true_precision_unit)
|
1130
|
+
new_precision_index = precision_index + (-1 if decrease else 1)
|
1131
|
+
if new_precision_index < 0 or new_precision_index >= len(precisions):
|
1132
|
+
raise ValueError(f"No precision {'below' if decrease else 'above'} '{precision_unit}'.")
|
1133
|
+
|
1134
|
+
return precisions[new_precision_index]
|
1135
|
+
|
1136
|
+
|
1137
|
+
def round_time(
|
1138
|
+
dt: Optional[datetime] = None,
|
1139
|
+
date_delta: Optional[timedelta] = None,
|
1140
|
+
to: 'str' = 'down'
|
1141
|
+
) -> datetime:
|
1142
|
+
"""
|
1143
|
+
Round a datetime object to a multiple of a timedelta.
|
1144
|
+
|
1145
|
+
Parameters
|
1146
|
+
----------
|
1147
|
+
dt: Optional[datetime], default None
|
1148
|
+
If `None`, grab the current UTC datetime.
|
1149
|
+
|
1150
|
+
date_delta: Optional[timedelta], default None
|
1151
|
+
If `None`, use a delta of 1 minute.
|
1152
|
+
|
1153
|
+
to: 'str', default 'down'
|
1154
|
+
Available options are `'up'`, `'down'`, and `'closest'`.
|
1155
|
+
|
1156
|
+
Returns
|
1157
|
+
-------
|
1158
|
+
A rounded `datetime` object.
|
1159
|
+
|
1160
|
+
Examples
|
1161
|
+
--------
|
1162
|
+
>>> round_time(datetime(2022, 1, 1, 12, 15, 57, 200))
|
1163
|
+
datetime.datetime(2022, 1, 1, 12, 15)
|
1164
|
+
>>> round_time(datetime(2022, 1, 1, 12, 15, 57, 200), to='up')
|
1165
|
+
datetime.datetime(2022, 1, 1, 12, 16)
|
1166
|
+
>>> round_time(datetime(2022, 1, 1, 12, 15, 57, 200), timedelta(hours=1))
|
1167
|
+
datetime.datetime(2022, 1, 1, 12, 0)
|
1168
|
+
>>> round_time(
|
1169
|
+
... datetime(2022, 1, 1, 12, 15, 57, 200),
|
1170
|
+
... timedelta(hours=1),
|
1171
|
+
... to = 'closest'
|
1172
|
+
... )
|
1173
|
+
datetime.datetime(2022, 1, 1, 12, 0)
|
1174
|
+
>>> round_time(
|
1175
|
+
... datetime(2022, 1, 1, 12, 45, 57, 200),
|
1176
|
+
... datetime.timedelta(hours=1),
|
1177
|
+
... to = 'closest'
|
1178
|
+
... )
|
1179
|
+
datetime.datetime(2022, 1, 1, 13, 0)
|
1180
|
+
|
1181
|
+
"""
|
1182
|
+
from decimal import Decimal, ROUND_HALF_UP, ROUND_DOWN, ROUND_UP
|
1183
|
+
if date_delta is None:
|
1184
|
+
date_delta = timedelta(minutes=1)
|
1185
|
+
|
1186
|
+
if dt is None:
|
1187
|
+
dt = datetime.now(timezone.utc).replace(tzinfo=None)
|
1188
|
+
|
1189
|
+
def get_total_microseconds(td: timedelta) -> int:
|
1190
|
+
return (td.days * 86400 + td.seconds) * 1_000_000 + td.microseconds
|
1191
|
+
|
1192
|
+
round_to_microseconds = get_total_microseconds(date_delta)
|
1193
|
+
if round_to_microseconds == 0:
|
1194
|
+
return dt
|
1195
|
+
|
1196
|
+
dt_delta_from_min = dt.replace(tzinfo=None) - datetime.min
|
1197
|
+
dt_total_microseconds = get_total_microseconds(dt_delta_from_min)
|
1198
|
+
|
1199
|
+
dt_dec = Decimal(dt_total_microseconds)
|
1200
|
+
round_to_dec = Decimal(round_to_microseconds)
|
1201
|
+
|
1202
|
+
div = dt_dec / round_to_dec
|
1203
|
+
if to == 'down':
|
1204
|
+
num_intervals = div.to_integral_value(rounding=ROUND_DOWN)
|
1205
|
+
elif to == 'up':
|
1206
|
+
num_intervals = div.to_integral_value(rounding=ROUND_UP)
|
1207
|
+
else:
|
1208
|
+
num_intervals = div.to_integral_value(rounding=ROUND_HALF_UP)
|
1209
|
+
|
1210
|
+
rounded_dt_total_microseconds = num_intervals * round_to_dec
|
1211
|
+
adjustment_microseconds = int(rounded_dt_total_microseconds) - dt_total_microseconds
|
1212
|
+
|
1213
|
+
return dt + timedelta(microseconds=adjustment_microseconds)
|