meerschaum 2.9.5__py3-none-any.whl → 3.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/__init__.py +5 -2
- meerschaum/_internal/__init__.py +1 -0
- meerschaum/_internal/arguments/_parse_arguments.py +4 -4
- meerschaum/_internal/arguments/_parser.py +19 -2
- meerschaum/_internal/docs/index.py +49 -2
- meerschaum/_internal/entry.py +6 -6
- meerschaum/_internal/shell/Shell.py +1 -1
- meerschaum/_internal/static.py +356 -0
- meerschaum/actions/api.py +12 -2
- meerschaum/actions/bootstrap.py +7 -7
- meerschaum/actions/edit.py +142 -18
- meerschaum/actions/register.py +137 -6
- meerschaum/actions/show.py +117 -29
- meerschaum/actions/stop.py +4 -1
- meerschaum/actions/sync.py +1 -1
- meerschaum/actions/tag.py +9 -8
- meerschaum/actions/verify.py +5 -8
- meerschaum/api/__init__.py +11 -3
- meerschaum/api/_events.py +39 -2
- meerschaum/api/_oauth2.py +118 -8
- meerschaum/api/_tokens.py +102 -0
- meerschaum/api/dash/__init__.py +0 -3
- meerschaum/api/dash/callbacks/custom.py +2 -2
- meerschaum/api/dash/callbacks/dashboard.py +103 -19
- meerschaum/api/dash/callbacks/plugins.py +0 -1
- meerschaum/api/dash/callbacks/register.py +1 -1
- meerschaum/api/dash/callbacks/settings/__init__.py +1 -0
- meerschaum/api/dash/callbacks/settings/password_reset.py +2 -2
- meerschaum/api/dash/callbacks/settings/tokens.py +388 -0
- meerschaum/api/dash/components.py +30 -8
- meerschaum/api/dash/keys.py +19 -93
- meerschaum/api/dash/pages/dashboard.py +1 -20
- meerschaum/api/dash/pages/settings/__init__.py +1 -0
- meerschaum/api/dash/pages/settings/password_reset.py +1 -1
- meerschaum/api/dash/pages/settings/tokens.py +55 -0
- meerschaum/api/dash/pipes.py +94 -59
- meerschaum/api/dash/sessions.py +12 -0
- meerschaum/api/dash/tokens.py +606 -0
- meerschaum/api/dash/websockets.py +1 -1
- meerschaum/api/dash/webterm.py +4 -0
- meerschaum/api/models/__init__.py +23 -3
- meerschaum/api/models/_actions.py +22 -0
- meerschaum/api/models/_pipes.py +85 -7
- meerschaum/api/models/_tokens.py +81 -0
- meerschaum/api/resources/templates/termpage.html +12 -0
- meerschaum/api/routes/__init__.py +1 -0
- meerschaum/api/routes/_actions.py +3 -4
- meerschaum/api/routes/_connectors.py +3 -7
- meerschaum/api/routes/_jobs.py +14 -35
- meerschaum/api/routes/_login.py +49 -12
- meerschaum/api/routes/_misc.py +5 -10
- meerschaum/api/routes/_pipes.py +173 -140
- meerschaum/api/routes/_plugins.py +38 -28
- meerschaum/api/routes/_tokens.py +236 -0
- meerschaum/api/routes/_users.py +47 -35
- meerschaum/api/routes/_version.py +3 -3
- meerschaum/config/__init__.py +43 -20
- meerschaum/config/_default.py +43 -6
- meerschaum/config/_edit.py +28 -24
- meerschaum/config/_environment.py +1 -1
- meerschaum/config/_patch.py +6 -6
- meerschaum/config/_paths.py +5 -1
- meerschaum/config/_read_config.py +65 -34
- meerschaum/config/_sync.py +6 -3
- meerschaum/config/_version.py +1 -1
- meerschaum/config/stack/__init__.py +31 -11
- meerschaum/config/static.py +18 -0
- meerschaum/connectors/_Connector.py +10 -4
- meerschaum/connectors/__init__.py +4 -20
- meerschaum/connectors/api/_APIConnector.py +34 -6
- meerschaum/connectors/api/_actions.py +2 -2
- meerschaum/connectors/api/_jobs.py +1 -1
- meerschaum/connectors/api/_login.py +33 -7
- meerschaum/connectors/api/_misc.py +2 -2
- meerschaum/connectors/api/_pipes.py +16 -31
- meerschaum/connectors/api/_plugins.py +2 -2
- meerschaum/connectors/api/_request.py +1 -1
- meerschaum/connectors/api/_tokens.py +146 -0
- meerschaum/connectors/api/_users.py +70 -58
- meerschaum/connectors/instance/_InstanceConnector.py +83 -0
- meerschaum/connectors/instance/__init__.py +10 -0
- meerschaum/connectors/instance/_pipes.py +442 -0
- meerschaum/connectors/instance/_plugins.py +151 -0
- meerschaum/connectors/instance/_tokens.py +296 -0
- meerschaum/connectors/instance/_users.py +181 -0
- meerschaum/connectors/parse.py +4 -1
- meerschaum/connectors/sql/_SQLConnector.py +8 -5
- meerschaum/connectors/sql/_cli.py +12 -11
- meerschaum/connectors/sql/_create_engine.py +9 -168
- meerschaum/connectors/sql/_fetch.py +2 -18
- meerschaum/connectors/sql/_pipes.py +156 -190
- meerschaum/connectors/sql/_plugins.py +29 -0
- meerschaum/connectors/sql/_sql.py +46 -21
- meerschaum/connectors/sql/_users.py +29 -2
- meerschaum/connectors/sql/tables/__init__.py +1 -1
- meerschaum/connectors/valkey/_ValkeyConnector.py +2 -4
- meerschaum/connectors/valkey/_pipes.py +53 -26
- meerschaum/connectors/valkey/_plugins.py +2 -26
- meerschaum/core/Pipe/__init__.py +59 -19
- meerschaum/core/Pipe/_attributes.py +412 -90
- meerschaum/core/Pipe/_bootstrap.py +54 -24
- meerschaum/core/Pipe/_data.py +96 -18
- meerschaum/core/Pipe/_dtypes.py +48 -18
- meerschaum/core/Pipe/_edit.py +14 -4
- meerschaum/core/Pipe/_fetch.py +1 -1
- meerschaum/core/Pipe/_show.py +5 -5
- meerschaum/core/Pipe/_sync.py +118 -193
- meerschaum/core/Pipe/_verify.py +4 -4
- meerschaum/{plugins → core/Plugin}/_Plugin.py +9 -11
- meerschaum/core/Plugin/__init__.py +1 -1
- meerschaum/core/Token/_Token.py +220 -0
- meerschaum/core/Token/__init__.py +12 -0
- meerschaum/core/User/_User.py +34 -8
- meerschaum/core/User/__init__.py +9 -1
- meerschaum/core/__init__.py +1 -0
- meerschaum/jobs/_Job.py +3 -2
- meerschaum/jobs/__init__.py +3 -2
- meerschaum/jobs/systemd.py +1 -1
- meerschaum/models/__init__.py +35 -0
- meerschaum/models/pipes.py +247 -0
- meerschaum/models/tokens.py +38 -0
- meerschaum/models/users.py +26 -0
- meerschaum/plugins/__init__.py +22 -7
- meerschaum/plugins/bootstrap.py +2 -1
- meerschaum/utils/_get_pipes.py +68 -27
- meerschaum/utils/daemon/Daemon.py +2 -1
- meerschaum/utils/daemon/__init__.py +30 -2
- meerschaum/utils/dataframe.py +473 -81
- meerschaum/utils/debug.py +15 -15
- meerschaum/utils/dtypes/__init__.py +473 -34
- meerschaum/utils/dtypes/sql.py +368 -28
- meerschaum/utils/formatting/__init__.py +1 -1
- meerschaum/utils/formatting/_pipes.py +5 -4
- meerschaum/utils/formatting/_shell.py +11 -9
- meerschaum/utils/misc.py +246 -148
- meerschaum/utils/packages/__init__.py +10 -27
- meerschaum/utils/packages/_packages.py +41 -34
- meerschaum/utils/pipes.py +181 -0
- meerschaum/utils/process.py +1 -1
- meerschaum/utils/prompt.py +3 -1
- meerschaum/utils/schedule.py +2 -1
- meerschaum/utils/sql.py +121 -44
- meerschaum/utils/typing.py +1 -4
- meerschaum/utils/venv/_Venv.py +2 -2
- meerschaum/utils/venv/__init__.py +5 -7
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/METADATA +92 -96
- meerschaum-3.0.0rc2.dist-info/RECORD +283 -0
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/WHEEL +1 -1
- meerschaum-3.0.0rc2.dist-info/licenses/NOTICE +2 -0
- meerschaum/api/models/_interfaces.py +0 -15
- meerschaum/api/models/_locations.py +0 -15
- meerschaum/api/models/_metrics.py +0 -15
- meerschaum/config/static/__init__.py +0 -186
- meerschaum-2.9.5.dist-info/RECORD +0 -263
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/licenses/LICENSE +0 -0
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/top_level.txt +0 -0
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/zip-safe +0 -0
@@ -9,12 +9,14 @@ Utility functions for working with data types.
|
|
9
9
|
import traceback
|
10
10
|
import json
|
11
11
|
import uuid
|
12
|
-
|
12
|
+
import time
|
13
|
+
from datetime import timezone, datetime, date, timedelta
|
13
14
|
from decimal import Decimal, Context, InvalidOperation, ROUND_HALF_UP
|
14
15
|
|
15
16
|
import meerschaum as mrsm
|
16
17
|
from meerschaum.utils.typing import Dict, Union, Any, Optional, Tuple
|
17
18
|
from meerschaum.utils.warnings import warn
|
19
|
+
from meerschaum._internal.static import STATIC_CONFIG as _STATIC_CONFIG
|
18
20
|
|
19
21
|
MRSM_ALIAS_DTYPES: Dict[str, str] = {
|
20
22
|
'decimal': 'numeric',
|
@@ -30,6 +32,8 @@ MRSM_ALIAS_DTYPES: Dict[str, str] = {
|
|
30
32
|
'UUID': 'uuid',
|
31
33
|
'geom': 'geometry',
|
32
34
|
'geog': 'geography',
|
35
|
+
'boolean': 'bool',
|
36
|
+
'day': 'date',
|
33
37
|
}
|
34
38
|
MRSM_PD_DTYPES: Dict[Union[str, None], str] = {
|
35
39
|
'json': 'object',
|
@@ -37,18 +41,52 @@ MRSM_PD_DTYPES: Dict[Union[str, None], str] = {
|
|
37
41
|
'geometry': 'object',
|
38
42
|
'geography': 'object',
|
39
43
|
'uuid': 'object',
|
40
|
-
'
|
44
|
+
'date': 'date32[day][pyarrow]',
|
45
|
+
'datetime': 'datetime64[us, UTC]',
|
41
46
|
'bool': 'bool[pyarrow]',
|
42
|
-
'int': '
|
43
|
-
'int8': '
|
44
|
-
'int16': '
|
45
|
-
'int32': '
|
46
|
-
'int64': '
|
47
|
-
'str': 'string
|
48
|
-
'bytes': '
|
47
|
+
'int': 'int64[pyarrow]',
|
48
|
+
'int8': 'int8[pyarrow]',
|
49
|
+
'int16': 'int16[pyarrow]',
|
50
|
+
'int32': 'int32[pyarrow]',
|
51
|
+
'int64': 'int64[pyarrow]',
|
52
|
+
'str': 'string',
|
53
|
+
'bytes': 'binary[pyarrow]',
|
49
54
|
None: 'object',
|
50
55
|
}
|
51
56
|
|
57
|
+
MRSM_PRECISION_UNITS_SCALARS: Dict[str, Union[int, float]] = {
|
58
|
+
'nanosecond': 1_000_000_000,
|
59
|
+
'microsecond': 1_000_000,
|
60
|
+
'millisecond': 1000,
|
61
|
+
'second': 1,
|
62
|
+
'minute': (1 / 60),
|
63
|
+
'hour': (1 / 3600),
|
64
|
+
'day': (1 / 86400),
|
65
|
+
}
|
66
|
+
|
67
|
+
MRSM_PRECISION_UNITS_ALIASES: Dict[str, str] = {
|
68
|
+
'ns': 'nanosecond',
|
69
|
+
'us': 'microsecond',
|
70
|
+
'ms': 'millisecond',
|
71
|
+
's': 'second',
|
72
|
+
'sec': 'second',
|
73
|
+
'm': 'minute',
|
74
|
+
'min': 'minute',
|
75
|
+
'h': 'hour',
|
76
|
+
'hr': 'hour',
|
77
|
+
'd': 'day',
|
78
|
+
'D': 'day',
|
79
|
+
}
|
80
|
+
MRSM_PRECISION_UNITS_ABBREVIATIONS: Dict[str, str] = {
|
81
|
+
'nanosecond': 'ns',
|
82
|
+
'microsecond': 'us',
|
83
|
+
'millisecond': 'ms',
|
84
|
+
'second': 's',
|
85
|
+
'minute': 'min',
|
86
|
+
'hour': 'hr',
|
87
|
+
'day': 'D',
|
88
|
+
}
|
89
|
+
|
52
90
|
|
53
91
|
def to_pandas_dtype(dtype: str) -> str:
|
54
92
|
"""
|
@@ -147,7 +185,7 @@ def are_dtypes_equal(
|
|
147
185
|
if ldtype in json_dtypes and rdtype in json_dtypes:
|
148
186
|
return True
|
149
187
|
|
150
|
-
numeric_dtypes = ('numeric', 'object')
|
188
|
+
numeric_dtypes = ('numeric', 'decimal', 'object')
|
151
189
|
if ldtype in numeric_dtypes and rdtype in numeric_dtypes:
|
152
190
|
return True
|
153
191
|
|
@@ -155,7 +193,7 @@ def are_dtypes_equal(
|
|
155
193
|
if ldtype in uuid_dtypes and rdtype in uuid_dtypes:
|
156
194
|
return True
|
157
195
|
|
158
|
-
bytes_dtypes = ('bytes', 'object')
|
196
|
+
bytes_dtypes = ('bytes', 'object', 'binary')
|
159
197
|
if ldtype in bytes_dtypes and rdtype in bytes_dtypes:
|
160
198
|
return True
|
161
199
|
|
@@ -179,7 +217,10 @@ def are_dtypes_equal(
|
|
179
217
|
if ldtype in string_dtypes and rdtype in string_dtypes:
|
180
218
|
return True
|
181
219
|
|
182
|
-
int_dtypes = (
|
220
|
+
int_dtypes = (
|
221
|
+
'int', 'int64', 'int32', 'int16', 'int8',
|
222
|
+
'uint', 'uint64', 'uint32', 'uint16', 'uint8',
|
223
|
+
)
|
183
224
|
if ldtype.lower() in int_dtypes and rdtype.lower() in int_dtypes:
|
184
225
|
return True
|
185
226
|
|
@@ -191,6 +232,13 @@ def are_dtypes_equal(
|
|
191
232
|
if ldtype in bool_dtypes and rdtype in bool_dtypes:
|
192
233
|
return True
|
193
234
|
|
235
|
+
date_dtypes = (
|
236
|
+
'date', 'date32[pyarrow]', 'date32[day][pyarrow]',
|
237
|
+
'date64[pyarrow]', 'date64[ms][pyarrow]',
|
238
|
+
)
|
239
|
+
if ldtype in date_dtypes and rdtype in date_dtypes:
|
240
|
+
return True
|
241
|
+
|
194
242
|
return False
|
195
243
|
|
196
244
|
|
@@ -309,7 +357,7 @@ def attempt_cast_to_geometry(value: Any) -> Any:
|
|
309
357
|
if isinstance(value, (dict, list)):
|
310
358
|
try:
|
311
359
|
return shapely.from_geojson(json.dumps(value))
|
312
|
-
except Exception
|
360
|
+
except Exception:
|
313
361
|
return value
|
314
362
|
|
315
363
|
value_is_wkt = geometry_is_wkt(value)
|
@@ -361,7 +409,7 @@ def value_is_null(value: Any) -> bool:
|
|
361
409
|
"""
|
362
410
|
Determine if a value is a null-like string.
|
363
411
|
"""
|
364
|
-
return str(value).lower() in ('none', 'nan', 'na', 'nat', '', '<na>')
|
412
|
+
return str(value).lower() in ('none', 'nan', 'na', 'nat', 'natz', '', '<na>')
|
365
413
|
|
366
414
|
|
367
415
|
def none_if_null(value: Any) -> Any:
|
@@ -455,10 +503,12 @@ def coerce_timezone(
|
|
455
503
|
|
456
504
|
if isinstance(dt, str):
|
457
505
|
dateutil_parser = mrsm.attempt_import('dateutil.parser')
|
458
|
-
|
506
|
+
try:
|
507
|
+
dt = dateutil_parser.parse(dt)
|
508
|
+
except Exception:
|
509
|
+
return dt
|
459
510
|
|
460
511
|
dt_is_series = hasattr(dt, 'dtype') and hasattr(dt, '__module__')
|
461
|
-
|
462
512
|
if dt_is_series:
|
463
513
|
pandas = mrsm.attempt_import('pandas', lazy=False)
|
464
514
|
|
@@ -472,6 +522,8 @@ def coerce_timezone(
|
|
472
522
|
return dt
|
473
523
|
|
474
524
|
dt_series = to_datetime(dt, coerce_utc=False)
|
525
|
+
if dt_series.dt.tz is None:
|
526
|
+
dt_series = dt_series.dt.tz_localize(timezone.utc)
|
475
527
|
if strip_utc:
|
476
528
|
try:
|
477
529
|
if dt_series.dt.tz is not None:
|
@@ -492,23 +544,40 @@ def coerce_timezone(
|
|
492
544
|
return utc_dt
|
493
545
|
|
494
546
|
|
495
|
-
def to_datetime(
|
547
|
+
def to_datetime(
|
548
|
+
dt_val: Any,
|
549
|
+
as_pydatetime: bool = False,
|
550
|
+
coerce_utc: bool = True,
|
551
|
+
precision_unit: Optional[str] = None,
|
552
|
+
) -> Any:
|
496
553
|
"""
|
497
554
|
Wrap `pd.to_datetime()` and add support for out-of-bounds values.
|
555
|
+
|
556
|
+
Parameters
|
557
|
+
----------
|
558
|
+
dt_val: Any
|
559
|
+
The value to coerce to Pandas Timestamps.
|
560
|
+
|
561
|
+
as_pydatetime: bool, default False
|
562
|
+
If `True`, return a Python datetime object.
|
563
|
+
|
564
|
+
coerce_utc: bool, default True
|
565
|
+
If `True`, ensure the value has UTC tzinfo.
|
566
|
+
|
567
|
+
precision_unit: Optional[str], default None
|
568
|
+
If provided, enforce the provided precision unit.
|
498
569
|
"""
|
499
570
|
pandas, dateutil_parser = mrsm.attempt_import('pandas', 'dateutil.parser', lazy=False)
|
500
571
|
is_dask = 'dask' in getattr(dt_val, '__module__', '')
|
501
572
|
dd = mrsm.attempt_import('dask.dataframe') if is_dask else None
|
502
573
|
dt_is_series = hasattr(dt_val, 'dtype') and hasattr(dt_val, '__module__')
|
503
574
|
pd = pandas if dd is None else dd
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
except (pd.errors.OutOfBoundsDatetime, ValueError):
|
511
|
-
pass
|
575
|
+
enforce_precision = precision_unit is not None
|
576
|
+
precision_unit = precision_unit or 'microsecond'
|
577
|
+
true_precision_unit = MRSM_PRECISION_UNITS_ALIASES.get(precision_unit, precision_unit)
|
578
|
+
precision_abbreviation = MRSM_PRECISION_UNITS_ABBREVIATIONS.get(true_precision_unit, None)
|
579
|
+
if not precision_abbreviation:
|
580
|
+
raise ValueError(f"Invalid precision '{precision_unit}'.")
|
512
581
|
|
513
582
|
def parse(x: Any) -> Any:
|
514
583
|
try:
|
@@ -516,11 +585,90 @@ def to_datetime(dt_val: Any, as_pydatetime: bool = False, coerce_utc: bool = Tru
|
|
516
585
|
except Exception:
|
517
586
|
return x
|
518
587
|
|
588
|
+
def check_dtype(dtype_to_check: str, with_utc: bool = True) -> bool:
|
589
|
+
dtype_check_against = (
|
590
|
+
f"datetime64[{precision_abbreviation}, UTC]"
|
591
|
+
if with_utc
|
592
|
+
else f"datetime64[{precision_abbreviation}]"
|
593
|
+
)
|
594
|
+
return (
|
595
|
+
dtype_to_check == dtype_check_against
|
596
|
+
if enforce_precision
|
597
|
+
else (
|
598
|
+
dtype_to_check.startswith('datetime64[')
|
599
|
+
and (
|
600
|
+
('utc' in dtype_to_check.lower())
|
601
|
+
if with_utc
|
602
|
+
else ('utc' not in dtype_to_check.lower())
|
603
|
+
)
|
604
|
+
)
|
605
|
+
)
|
606
|
+
|
607
|
+
if isinstance(dt_val, pd.Timestamp):
|
608
|
+
dt_val_to_return = dt_val if not as_pydatetime else dt_val.to_pydatetime()
|
609
|
+
return (
|
610
|
+
coerce_timezone(dt_val_to_return)
|
611
|
+
if coerce_utc
|
612
|
+
else dt_val_to_return
|
613
|
+
)
|
614
|
+
|
519
615
|
if dt_is_series:
|
520
|
-
|
616
|
+
changed_tz = False
|
617
|
+
original_tz = None
|
618
|
+
dtype = str(getattr(dt_val, 'dtype', 'object'))
|
619
|
+
if (
|
620
|
+
are_dtypes_equal(dtype, 'datetime')
|
621
|
+
and 'utc' not in dtype.lower()
|
622
|
+
and hasattr(dt_val, 'dt')
|
623
|
+
):
|
624
|
+
original_tz = dt_val.dt.tz
|
625
|
+
dt_val = dt_val.dt.tz_localize(timezone.utc)
|
626
|
+
changed_tz = True
|
627
|
+
dtype = str(getattr(dt_val, 'dtype', 'object'))
|
628
|
+
try:
|
629
|
+
new_dt_series = (
|
630
|
+
dt_val
|
631
|
+
if check_dtype(dtype, with_utc=True)
|
632
|
+
else dt_val.astype(f"datetime64[{precision_abbreviation}, UTC]")
|
633
|
+
)
|
634
|
+
except pd.errors.OutOfBoundsDatetime:
|
635
|
+
try:
|
636
|
+
next_precision = get_next_precision_unit(true_precision_unit)
|
637
|
+
next_precision_abbrevation = MRSM_PRECISION_UNITS_ABBREVIATIONS[next_precision]
|
638
|
+
new_dt_series = dt_val.astype(f"datetime64[{next_precision_abbrevation}, UTC]")
|
639
|
+
except Exception:
|
640
|
+
new_dt_series = None
|
641
|
+
except ValueError:
|
642
|
+
new_dt_series = None
|
643
|
+
except TypeError:
|
644
|
+
try:
|
645
|
+
new_dt_series = (
|
646
|
+
new_dt_series
|
647
|
+
if check_dtype(str(getattr(new_dt_series, 'dtype', None)), with_utc=False)
|
648
|
+
else dt_val.astype(f"datetime64[{precision_abbreviation}]")
|
649
|
+
)
|
650
|
+
except Exception:
|
651
|
+
new_dt_series = None
|
652
|
+
|
653
|
+
if new_dt_series is None:
|
654
|
+
new_dt_series = dt_val.apply(lambda x: parse(str(x)))
|
655
|
+
|
521
656
|
if coerce_utc:
|
522
|
-
return coerce_timezone(
|
523
|
-
|
657
|
+
return coerce_timezone(new_dt_series)
|
658
|
+
|
659
|
+
if changed_tz:
|
660
|
+
new_dt_series = new_dt_series.dt.tz_localize(original_tz)
|
661
|
+
return new_dt_series
|
662
|
+
|
663
|
+
try:
|
664
|
+
new_dt_val = pd.to_datetime(dt_val, utc=True, format='ISO8601')
|
665
|
+
if new_dt_val.unit != precision_abbreviation:
|
666
|
+
new_dt_val = new_dt_val.as_unit(precision_abbreviation)
|
667
|
+
if as_pydatetime:
|
668
|
+
return new_dt_val.to_pydatetime()
|
669
|
+
return new_dt_val
|
670
|
+
except (pd.errors.OutOfBoundsDatetime, ValueError):
|
671
|
+
pass
|
524
672
|
|
525
673
|
new_dt_val = parse(dt_val)
|
526
674
|
if not coerce_utc:
|
@@ -541,6 +689,7 @@ def serialize_bytes(data: bytes) -> str:
|
|
541
689
|
def serialize_geometry(
|
542
690
|
geom: Any,
|
543
691
|
geometry_format: str = 'wkb_hex',
|
692
|
+
srid: Optional[int] = None,
|
544
693
|
) -> Union[str, Dict[str, Any], None]:
|
545
694
|
"""
|
546
695
|
Serialize geometry data as a hex-encoded well-known-binary string.
|
@@ -555,19 +704,30 @@ def serialize_geometry(
|
|
555
704
|
Accepted formats are `wkb_hex` (well-known binary hex string),
|
556
705
|
`wkt` (well-known text), and `geojson`.
|
557
706
|
|
707
|
+
srid: Optional[int], default None
|
708
|
+
If provided, use this as the source CRS when serializing to GeoJSON.
|
709
|
+
|
558
710
|
Returns
|
559
711
|
-------
|
560
712
|
A string containing the geometry data.
|
561
713
|
"""
|
562
714
|
if value_is_null(geom):
|
563
715
|
return None
|
564
|
-
shapely = mrsm.attempt_import(
|
716
|
+
shapely, shapely_ops, pyproj = mrsm.attempt_import(
|
717
|
+
'shapely', 'shapely.ops', 'pyproj',
|
718
|
+
lazy=False,
|
719
|
+
)
|
565
720
|
if geometry_format == 'geojson':
|
721
|
+
if srid:
|
722
|
+
transformer = pyproj.Transformer.from_crs(f"EPSG:{srid}", "EPSG:4326", always_xy=True)
|
723
|
+
geom = shapely_ops.transform(transformer.transform, geom)
|
566
724
|
geojson_str = shapely.to_geojson(geom)
|
567
725
|
return json.loads(geojson_str)
|
568
726
|
|
569
727
|
if hasattr(geom, 'wkb_hex'):
|
570
|
-
|
728
|
+
if geometry_format == "wkb_hex":
|
729
|
+
return shapely.to_wkb(geom, hex=True, include_srid=True)
|
730
|
+
return shapely.to_wkt(geom)
|
571
731
|
|
572
732
|
return str(geom)
|
573
733
|
|
@@ -576,10 +736,19 @@ def deserialize_geometry(geom_wkb: Union[str, bytes]):
|
|
576
736
|
"""
|
577
737
|
Deserialize a WKB string into a shapely geometry object.
|
578
738
|
"""
|
579
|
-
shapely = mrsm.attempt_import(lazy=False)
|
739
|
+
shapely = mrsm.attempt_import('shapely', lazy=False)
|
580
740
|
return shapely.wkb.loads(geom_wkb)
|
581
741
|
|
582
742
|
|
743
|
+
def project_geometry(geom, srid: int, to_srid: int = 4326):
|
744
|
+
"""
|
745
|
+
Project a shapely geometry object to a new CRS (SRID).
|
746
|
+
"""
|
747
|
+
pyproj, shapely_ops = mrsm.attempt_import('pyproj', 'shapely.ops', lazy=False)
|
748
|
+
transformer = pyproj.Transformer.from_crs(f"EPSG:{srid}", f"EPSG:{to_srid}", always_xy=True)
|
749
|
+
return shapely_ops.transform(transformer.transform, geom)
|
750
|
+
|
751
|
+
|
583
752
|
def deserialize_bytes_string(data: Optional[str], force_hex: bool = False) -> Union[bytes, None]:
|
584
753
|
"""
|
585
754
|
Given a serialized ASCII string of bytes data, return the original bytes.
|
@@ -646,13 +815,21 @@ def serialize_datetime(dt: datetime) -> Union[str, None]:
|
|
646
815
|
'{"a": "2022-01-01T00:00:00Z"}'
|
647
816
|
|
648
817
|
"""
|
649
|
-
if not
|
818
|
+
if not hasattr(dt, 'isoformat'):
|
650
819
|
return None
|
651
|
-
|
820
|
+
|
821
|
+
tz_suffix = 'Z' if getattr(dt, 'tzinfo', None) is None else ''
|
652
822
|
return dt.isoformat() + tz_suffix
|
653
823
|
|
654
824
|
|
655
|
-
def
|
825
|
+
def serialize_date(d: date) -> Union[str, None]:
|
826
|
+
"""
|
827
|
+
Serialize a date object into its ISO representation.
|
828
|
+
"""
|
829
|
+
return d.isoformat() if hasattr(d, 'isoformat') else None
|
830
|
+
|
831
|
+
|
832
|
+
def json_serialize_value(x: Any, default_to_str: bool = True) -> Union[str, None]:
|
656
833
|
"""
|
657
834
|
Serialize the given value to a JSON value. Accounts for datetimes, bytes, decimals, etc.
|
658
835
|
|
@@ -675,6 +852,9 @@ def json_serialize_value(x: Any, default_to_str: bool = True) -> str:
|
|
675
852
|
if hasattr(x, 'tzinfo'):
|
676
853
|
return serialize_datetime(x)
|
677
854
|
|
855
|
+
if hasattr(x, 'isoformat'):
|
856
|
+
return serialize_date(x)
|
857
|
+
|
678
858
|
if isinstance(x, bytes):
|
679
859
|
return serialize_bytes(x)
|
680
860
|
|
@@ -687,6 +867,9 @@ def json_serialize_value(x: Any, default_to_str: bool = True) -> str:
|
|
687
867
|
if value_is_null(x):
|
688
868
|
return None
|
689
869
|
|
870
|
+
if isinstance(x, (dict, list, tuple)):
|
871
|
+
return json.dumps(x, default=json_serialize_value, separators=(',', ':'))
|
872
|
+
|
690
873
|
return str(x) if default_to_str else x
|
691
874
|
|
692
875
|
|
@@ -773,3 +956,259 @@ def get_geometry_type_srid(
|
|
773
956
|
break
|
774
957
|
|
775
958
|
return geometry_type, srid
|
959
|
+
|
960
|
+
|
961
|
+
def get_current_timestamp(
|
962
|
+
precision_unit: str = _STATIC_CONFIG['dtypes']['datetime']['default_precision_unit'],
|
963
|
+
precision_interval: int = 1,
|
964
|
+
round_to: str = 'down',
|
965
|
+
as_pandas: bool = False,
|
966
|
+
as_int: bool = False,
|
967
|
+
_now: Union[datetime, int, None] = None,
|
968
|
+
) -> 'Union[datetime, pd.Timestamp, int]':
|
969
|
+
"""
|
970
|
+
Return the current UTC timestamp to nanosecond precision.
|
971
|
+
|
972
|
+
Parameters
|
973
|
+
----------
|
974
|
+
precision_unit: str, default 'us'
|
975
|
+
The precision of the timestamp to be returned.
|
976
|
+
Valid values are the following:
|
977
|
+
- `ns` / `nanosecond`
|
978
|
+
- `us` / `microsecond`
|
979
|
+
- `ms` / `millisecond`
|
980
|
+
- `s` / `sec` / `second`
|
981
|
+
- `m` / `min` / `minute`
|
982
|
+
- `h` / `hr` / `hour`
|
983
|
+
- `d` / `day`
|
984
|
+
|
985
|
+
precision_interval: int, default 1
|
986
|
+
Round the timestamp to the `precision_interval` units.
|
987
|
+
For example, `precision='minute'` and `precision_interval=15` will round to 15-minute intervals.
|
988
|
+
Note: `precision_interval` must be 1 when `precision='nanosecond'`.
|
989
|
+
|
990
|
+
round_to: str, default 'down'
|
991
|
+
The direction to which to round the timestamp.
|
992
|
+
Available options are `down`, `up`, and `closest`.
|
993
|
+
|
994
|
+
as_pandas: bool, default False
|
995
|
+
If `True`, return a Pandas Timestamp.
|
996
|
+
This is always true if `unit` is `nanosecond`.
|
997
|
+
|
998
|
+
as_int: bool, default False
|
999
|
+
If `True`, return the timestamp to an integer.
|
1000
|
+
Overrides `as_pandas`.
|
1001
|
+
|
1002
|
+
Returns
|
1003
|
+
-------
|
1004
|
+
A Pandas Timestamp, datetime object, or integer with precision to the provided unit.
|
1005
|
+
|
1006
|
+
Examples
|
1007
|
+
--------
|
1008
|
+
>>> get_current_timestamp('ns')
|
1009
|
+
Timestamp('2025-07-17 17:59:16.423644369+0000', tz='UTC')
|
1010
|
+
>>> get_current_timestamp('ms')
|
1011
|
+
Timestamp('2025-07-17 17:59:16.424000+0000', tz='UTC')
|
1012
|
+
"""
|
1013
|
+
true_precision_unit = MRSM_PRECISION_UNITS_ALIASES.get(precision_unit, precision_unit)
|
1014
|
+
if true_precision_unit not in MRSM_PRECISION_UNITS_SCALARS:
|
1015
|
+
from meerschaum.utils.misc import items_str
|
1016
|
+
raise ValueError(
|
1017
|
+
f"Unknown precision unit '{precision_unit}'. "
|
1018
|
+
"Accepted values are "
|
1019
|
+
f"{items_str(list(MRSM_PRECISION_UNITS_SCALARS) + list(MRSM_PRECISION_UNITS_ALIASES))}."
|
1020
|
+
)
|
1021
|
+
|
1022
|
+
if not as_int:
|
1023
|
+
as_pandas = as_pandas or true_precision_unit == 'nanosecond'
|
1024
|
+
pd = mrsm.attempt_import('pandas', lazy=False) if as_pandas else None
|
1025
|
+
|
1026
|
+
if true_precision_unit == 'nanosecond':
|
1027
|
+
if precision_interval != 1:
|
1028
|
+
warn("`precision_interval` must be 1 for nanosecond precision.")
|
1029
|
+
now_ts = time.time_ns() if not isinstance(_now, int) else _now
|
1030
|
+
if as_int:
|
1031
|
+
return now_ts
|
1032
|
+
return pd.to_datetime(now_ts, unit='ns', utc=True)
|
1033
|
+
|
1034
|
+
now = datetime.now(timezone.utc) if not isinstance(_now, datetime) else _now
|
1035
|
+
delta = timedelta(**{true_precision_unit + 's': precision_interval})
|
1036
|
+
rounded_now = round_time(now, delta, to=round_to)
|
1037
|
+
|
1038
|
+
if as_int:
|
1039
|
+
return int(rounded_now.timestamp() * MRSM_PRECISION_UNITS_SCALARS[true_precision_unit])
|
1040
|
+
|
1041
|
+
ts_val = (
|
1042
|
+
pd.to_datetime(rounded_now, utc=True)
|
1043
|
+
if as_pandas
|
1044
|
+
else rounded_now
|
1045
|
+
)
|
1046
|
+
|
1047
|
+
if not as_pandas:
|
1048
|
+
return ts_val
|
1049
|
+
|
1050
|
+
as_unit_precisions = ('microsecond', 'millisecond', 'second')
|
1051
|
+
if true_precision_unit not in as_unit_precisions:
|
1052
|
+
return ts_val
|
1053
|
+
|
1054
|
+
return ts_val.as_unit(MRSM_PRECISION_UNITS_ABBREVIATIONS[true_precision_unit])
|
1055
|
+
|
1056
|
+
|
1057
|
+
def dtype_is_special(type_: str) -> bool:
|
1058
|
+
"""
|
1059
|
+
Return whether a dtype should be treated as a special Meerschaum dtype.
|
1060
|
+
This is not the same as a Meerschaum alias.
|
1061
|
+
"""
|
1062
|
+
true_type = MRSM_ALIAS_DTYPES.get(type_, type_)
|
1063
|
+
if true_type in (
|
1064
|
+
'uuid',
|
1065
|
+
'json',
|
1066
|
+
'bytes',
|
1067
|
+
'numeric',
|
1068
|
+
'datetime',
|
1069
|
+
'geometry',
|
1070
|
+
'geography',
|
1071
|
+
'date',
|
1072
|
+
):
|
1073
|
+
return True
|
1074
|
+
|
1075
|
+
if are_dtypes_equal(true_type, 'datetime'):
|
1076
|
+
return True
|
1077
|
+
|
1078
|
+
if are_dtypes_equal(true_type, 'date'):
|
1079
|
+
return True
|
1080
|
+
|
1081
|
+
if true_type.startswith('numeric'):
|
1082
|
+
return True
|
1083
|
+
|
1084
|
+
if true_type.startswith('geometry'):
|
1085
|
+
return True
|
1086
|
+
|
1087
|
+
if true_type.startswith('geography'):
|
1088
|
+
return True
|
1089
|
+
|
1090
|
+
return False
|
1091
|
+
|
1092
|
+
|
1093
|
+
def get_next_precision_unit(precision_unit: str, decrease: bool = True) -> str:
|
1094
|
+
"""
|
1095
|
+
Get the next precision string in order of value.
|
1096
|
+
|
1097
|
+
Parameters
|
1098
|
+
----------
|
1099
|
+
precision_unit: str
|
1100
|
+
The precision string (`'nanosecond'`, `'ms'`, etc.).
|
1101
|
+
|
1102
|
+
decrease: bool, defaul True
|
1103
|
+
If `True` return the precision unit which is lower (e.g. `nanosecond` -> `millisecond`).
|
1104
|
+
If `False`, return the precision unit which is higher.
|
1105
|
+
|
1106
|
+
Returns
|
1107
|
+
-------
|
1108
|
+
A `precision` string which is lower or higher than the given precision unit.
|
1109
|
+
|
1110
|
+
Examples
|
1111
|
+
--------
|
1112
|
+
>>> get_next_precision_unit('nanosecond')
|
1113
|
+
'microsecond'
|
1114
|
+
>>> get_next_precision_unit('ms')
|
1115
|
+
'second'
|
1116
|
+
>>> get_next_precision_unit('hour', decrease=False)
|
1117
|
+
'minute'
|
1118
|
+
"""
|
1119
|
+
true_precision_unit = MRSM_PRECISION_UNITS_ALIASES.get(precision_unit, precision_unit)
|
1120
|
+
precision_scalar = MRSM_PRECISION_UNITS_SCALARS.get(true_precision_unit, None)
|
1121
|
+
if not precision_scalar:
|
1122
|
+
raise ValueError(f"Invalid precision unit '{precision_unit}'.")
|
1123
|
+
|
1124
|
+
precisions = sorted(
|
1125
|
+
list(MRSM_PRECISION_UNITS_SCALARS),
|
1126
|
+
key=lambda p: MRSM_PRECISION_UNITS_SCALARS[p]
|
1127
|
+
)
|
1128
|
+
|
1129
|
+
precision_index = precisions.index(true_precision_unit)
|
1130
|
+
new_precision_index = precision_index + (-1 if decrease else 1)
|
1131
|
+
if new_precision_index < 0 or new_precision_index >= len(precisions):
|
1132
|
+
raise ValueError(f"No precision {'below' if decrease else 'above'} '{precision_unit}'.")
|
1133
|
+
|
1134
|
+
return precisions[new_precision_index]
|
1135
|
+
|
1136
|
+
|
1137
|
+
def round_time(
|
1138
|
+
dt: Optional[datetime] = None,
|
1139
|
+
date_delta: Optional[timedelta] = None,
|
1140
|
+
to: 'str' = 'down'
|
1141
|
+
) -> datetime:
|
1142
|
+
"""
|
1143
|
+
Round a datetime object to a multiple of a timedelta.
|
1144
|
+
http://stackoverflow.com/questions/3463930/how-to-round-the-minute-of-a-datetime-object-python
|
1145
|
+
|
1146
|
+
Parameters
|
1147
|
+
----------
|
1148
|
+
dt: Optional[datetime], default None
|
1149
|
+
If `None`, grab the current UTC datetime.
|
1150
|
+
|
1151
|
+
date_delta: Optional[timedelta], default None
|
1152
|
+
If `None`, use a delta of 1 minute.
|
1153
|
+
|
1154
|
+
to: 'str', default 'down'
|
1155
|
+
Available options are `'up'`, `'down'`, and `'closest'`.
|
1156
|
+
|
1157
|
+
Returns
|
1158
|
+
-------
|
1159
|
+
A rounded `datetime` object.
|
1160
|
+
|
1161
|
+
Examples
|
1162
|
+
--------
|
1163
|
+
>>> round_time(datetime(2022, 1, 1, 12, 15, 57, 200))
|
1164
|
+
datetime.datetime(2022, 1, 1, 12, 15)
|
1165
|
+
>>> round_time(datetime(2022, 1, 1, 12, 15, 57, 200), to='up')
|
1166
|
+
datetime.datetime(2022, 1, 1, 12, 16)
|
1167
|
+
>>> round_time(datetime(2022, 1, 1, 12, 15, 57, 200), timedelta(hours=1))
|
1168
|
+
datetime.datetime(2022, 1, 1, 12, 0)
|
1169
|
+
>>> round_time(
|
1170
|
+
... datetime(2022, 1, 1, 12, 15, 57, 200),
|
1171
|
+
... timedelta(hours=1),
|
1172
|
+
... to = 'closest'
|
1173
|
+
... )
|
1174
|
+
datetime.datetime(2022, 1, 1, 12, 0)
|
1175
|
+
>>> round_time(
|
1176
|
+
... datetime(2022, 1, 1, 12, 45, 57, 200),
|
1177
|
+
... datetime.timedelta(hours=1),
|
1178
|
+
... to = 'closest'
|
1179
|
+
... )
|
1180
|
+
datetime.datetime(2022, 1, 1, 13, 0)
|
1181
|
+
|
1182
|
+
"""
|
1183
|
+
from decimal import Decimal, ROUND_HALF_UP, ROUND_DOWN, ROUND_UP
|
1184
|
+
if date_delta is None:
|
1185
|
+
date_delta = timedelta(minutes=1)
|
1186
|
+
|
1187
|
+
if dt is None:
|
1188
|
+
dt = datetime.now(timezone.utc).replace(tzinfo=None)
|
1189
|
+
|
1190
|
+
def get_total_microseconds(td: timedelta) -> int:
|
1191
|
+
return (td.days * 86400 + td.seconds) * 1_000_000 + td.microseconds
|
1192
|
+
|
1193
|
+
round_to_microseconds = get_total_microseconds(date_delta)
|
1194
|
+
if round_to_microseconds == 0:
|
1195
|
+
return dt
|
1196
|
+
|
1197
|
+
dt_delta_from_min = dt.replace(tzinfo=None) - datetime.min
|
1198
|
+
dt_total_microseconds = get_total_microseconds(dt_delta_from_min)
|
1199
|
+
|
1200
|
+
dt_dec = Decimal(dt_total_microseconds)
|
1201
|
+
round_to_dec = Decimal(round_to_microseconds)
|
1202
|
+
|
1203
|
+
div = dt_dec / round_to_dec
|
1204
|
+
if to == 'down':
|
1205
|
+
num_intervals = div.to_integral_value(rounding=ROUND_DOWN)
|
1206
|
+
elif to == 'up':
|
1207
|
+
num_intervals = div.to_integral_value(rounding=ROUND_UP)
|
1208
|
+
else:
|
1209
|
+
num_intervals = div.to_integral_value(rounding=ROUND_HALF_UP)
|
1210
|
+
|
1211
|
+
rounded_dt_total_microseconds = num_intervals * round_to_dec
|
1212
|
+
adjustment_microseconds = int(rounded_dt_total_microseconds) - dt_total_microseconds
|
1213
|
+
|
1214
|
+
return dt + timedelta(microseconds=adjustment_microseconds)
|