meerschaum 2.8.3__py3-none-any.whl → 2.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/_internal/arguments/_parser.py +5 -0
- meerschaum/actions/drop.py +1 -1
- meerschaum/actions/start.py +14 -6
- meerschaum/actions/sync.py +9 -0
- meerschaum/api/__init__.py +9 -3
- meerschaum/api/_chunks.py +67 -0
- meerschaum/api/dash/callbacks/__init__.py +5 -2
- meerschaum/api/dash/callbacks/custom.py +21 -8
- meerschaum/api/dash/callbacks/dashboard.py +26 -4
- meerschaum/api/dash/callbacks/settings/__init__.py +8 -0
- meerschaum/api/dash/callbacks/settings/password_reset.py +76 -0
- meerschaum/api/dash/components.py +136 -25
- meerschaum/api/dash/pages/__init__.py +1 -0
- meerschaum/api/dash/pages/dashboard.py +11 -9
- meerschaum/api/dash/pages/plugins.py +31 -27
- meerschaum/api/dash/pages/settings/__init__.py +8 -0
- meerschaum/api/dash/pages/settings/password_reset.py +63 -0
- meerschaum/api/dash/webterm.py +6 -3
- meerschaum/api/resources/static/css/dash.css +8 -1
- meerschaum/api/resources/templates/termpage.html +4 -0
- meerschaum/api/routes/_pipes.py +234 -82
- meerschaum/config/_default.py +4 -0
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/__init__.py +1 -0
- meerschaum/connectors/api/_APIConnector.py +12 -1
- meerschaum/connectors/api/_pipes.py +106 -45
- meerschaum/connectors/api/_plugins.py +51 -45
- meerschaum/connectors/api/_request.py +1 -1
- meerschaum/connectors/parse.py +1 -2
- meerschaum/connectors/sql/_SQLConnector.py +4 -1
- meerschaum/connectors/sql/_cli.py +1 -0
- meerschaum/connectors/sql/_create_engine.py +51 -4
- meerschaum/connectors/sql/_pipes.py +38 -6
- meerschaum/connectors/sql/_sql.py +35 -4
- meerschaum/connectors/valkey/_ValkeyConnector.py +2 -0
- meerschaum/connectors/valkey/_pipes.py +51 -39
- meerschaum/core/Pipe/__init__.py +1 -0
- meerschaum/core/Pipe/_data.py +1 -2
- meerschaum/core/Pipe/_sync.py +64 -4
- meerschaum/core/Pipe/_verify.py +23 -8
- meerschaum/jobs/systemd.py +1 -1
- meerschaum/plugins/_Plugin.py +21 -5
- meerschaum/plugins/__init__.py +32 -8
- meerschaum/utils/dataframe.py +139 -2
- meerschaum/utils/dtypes/__init__.py +211 -1
- meerschaum/utils/dtypes/sql.py +296 -5
- meerschaum/utils/formatting/_shell.py +1 -4
- meerschaum/utils/misc.py +1 -1
- meerschaum/utils/packages/_packages.py +8 -2
- meerschaum/utils/process.py +27 -3
- meerschaum/utils/schedule.py +3 -3
- meerschaum/utils/sql.py +140 -12
- meerschaum/utils/venv/__init__.py +10 -2
- {meerschaum-2.8.3.dist-info → meerschaum-2.9.0.dist-info}/METADATA +17 -3
- {meerschaum-2.8.3.dist-info → meerschaum-2.9.0.dist-info}/RECORD +61 -61
- {meerschaum-2.8.3.dist-info → meerschaum-2.9.0.dist-info}/WHEEL +1 -1
- meerschaum/_internal/gui/__init__.py +0 -43
- meerschaum/_internal/gui/app/__init__.py +0 -50
- meerschaum/_internal/gui/app/_windows.py +0 -74
- meerschaum/_internal/gui/app/actions.py +0 -30
- meerschaum/_internal/gui/app/pipes.py +0 -47
- {meerschaum-2.8.3.dist-info → meerschaum-2.9.0.dist-info}/LICENSE +0 -0
- {meerschaum-2.8.3.dist-info → meerschaum-2.9.0.dist-info}/NOTICE +0 -0
- {meerschaum-2.8.3.dist-info → meerschaum-2.9.0.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.8.3.dist-info → meerschaum-2.9.0.dist-info}/top_level.txt +0 -0
- {meerschaum-2.8.3.dist-info → meerschaum-2.9.0.dist-info}/zip-safe +0 -0
meerschaum/plugins/_Plugin.py
CHANGED
@@ -450,7 +450,7 @@ class Plugin:
|
|
450
450
|
success, msg = False, (
|
451
451
|
f"Failed to run post-install setup for plugin '{self}'." + '\n' +
|
452
452
|
f"Check `setup()` in '{self.__file__}' for more information " +
|
453
|
-
|
453
|
+
"(no error message provided)."
|
454
454
|
)
|
455
455
|
else:
|
456
456
|
success, msg = True, success_msg
|
@@ -458,7 +458,7 @@ class Plugin:
|
|
458
458
|
success = True
|
459
459
|
msg = (
|
460
460
|
f"Post-install for plugin '{self}' returned None. " +
|
461
|
-
|
461
|
+
"Assuming plugin successfully installed."
|
462
462
|
)
|
463
463
|
warn(msg)
|
464
464
|
else:
|
@@ -469,7 +469,7 @@ class Plugin:
|
|
469
469
|
)
|
470
470
|
|
471
471
|
_ongoing_installations.remove(self.full_name)
|
472
|
-
|
472
|
+
_ = self.module
|
473
473
|
return success, msg
|
474
474
|
|
475
475
|
|
@@ -716,13 +716,14 @@ class Plugin:
|
|
716
716
|
return required
|
717
717
|
|
718
718
|
|
719
|
-
def get_required_plugins(self, debug: bool=False) -> List[
|
719
|
+
def get_required_plugins(self, debug: bool=False) -> List[mrsm.plugins.Plugin]:
|
720
720
|
"""
|
721
721
|
Return a list of required Plugin objects.
|
722
722
|
"""
|
723
723
|
from meerschaum.utils.warnings import warn
|
724
724
|
from meerschaum.config import get_config
|
725
725
|
from meerschaum.config.static import STATIC_CONFIG
|
726
|
+
from meerschaum.connectors.parse import is_valid_connector_keys
|
726
727
|
plugins = []
|
727
728
|
_deps = self.get_dependencies(debug=debug)
|
728
729
|
sep = STATIC_CONFIG['plugins']['repo_separator']
|
@@ -731,11 +732,13 @@ class Plugin:
|
|
731
732
|
if _d.startswith('plugin:') and len(_d) > len('plugin:')
|
732
733
|
]
|
733
734
|
default_repo_keys = get_config('meerschaum', 'default_repository')
|
735
|
+
skipped_repo_keys = set()
|
736
|
+
|
734
737
|
for _plugin_name in plugin_names:
|
735
738
|
if sep in _plugin_name:
|
736
739
|
try:
|
737
740
|
_plugin_name, _repo_keys = _plugin_name.split(sep)
|
738
|
-
except Exception
|
741
|
+
except Exception:
|
739
742
|
_repo_keys = default_repo_keys
|
740
743
|
warn(
|
741
744
|
f"Invalid repo keys for required plugin '{_plugin_name}'.\n "
|
@@ -744,7 +747,20 @@ class Plugin:
|
|
744
747
|
)
|
745
748
|
else:
|
746
749
|
_repo_keys = default_repo_keys
|
750
|
+
|
751
|
+
if _repo_keys in skipped_repo_keys:
|
752
|
+
continue
|
753
|
+
|
754
|
+
if not is_valid_connector_keys(_repo_keys):
|
755
|
+
warn(
|
756
|
+
f"Invalid connector '{_repo_keys}'.\n"
|
757
|
+
f" Skipping required plugins from repository '{_repo_keys}'",
|
758
|
+
stack=False,
|
759
|
+
)
|
760
|
+
continue
|
761
|
+
|
747
762
|
plugins.append(Plugin(_plugin_name, repo=_repo_keys))
|
763
|
+
|
748
764
|
return plugins
|
749
765
|
|
750
766
|
|
meerschaum/plugins/__init__.py
CHANGED
@@ -91,8 +91,8 @@ def make_action(
|
|
91
91
|
|
92
92
|
|
93
93
|
def pre_sync_hook(
|
94
|
-
|
95
|
-
|
94
|
+
function: Callable[[Any], Any],
|
95
|
+
) -> Callable[[Any], Any]:
|
96
96
|
"""
|
97
97
|
Register a function as a sync hook to be executed right before sync.
|
98
98
|
|
@@ -166,10 +166,12 @@ def post_sync_hook(
|
|
166
166
|
|
167
167
|
_plugin_endpoints_to_pages = {}
|
168
168
|
def web_page(
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
169
|
+
page: Union[str, None, Callable[[Any], Any]] = None,
|
170
|
+
login_required: bool = True,
|
171
|
+
skip_navbar: bool = False,
|
172
|
+
page_group: Optional[str] = None,
|
173
|
+
**kwargs
|
174
|
+
) -> Any:
|
173
175
|
"""
|
174
176
|
Quickly add pages to the dash application.
|
175
177
|
|
@@ -187,7 +189,7 @@ def web_page(
|
|
187
189
|
page_str = None
|
188
190
|
|
189
191
|
def _decorator(_func: Callable[[Any], Any]) -> Callable[[Any], Any]:
|
190
|
-
nonlocal page_str
|
192
|
+
nonlocal page_str, page_group
|
191
193
|
|
192
194
|
@functools.wraps(_func)
|
193
195
|
def wrapper(*_args, **_kwargs):
|
@@ -197,9 +199,31 @@ def web_page(
|
|
197
199
|
page_str = _func.__name__
|
198
200
|
|
199
201
|
page_str = page_str.lstrip('/').rstrip('/').strip()
|
200
|
-
|
202
|
+
page_key = (
|
203
|
+
' '.join(
|
204
|
+
[
|
205
|
+
word.capitalize()
|
206
|
+
for word in (
|
207
|
+
page_str.replace('/dash', '').lstrip('/').rstrip('/').strip()
|
208
|
+
.replace('-', ' ').replace('_', ' ').split(' ')
|
209
|
+
)
|
210
|
+
]
|
211
|
+
)
|
212
|
+
)
|
213
|
+
|
214
|
+
package_name = _func.__globals__['__name__']
|
215
|
+
plugin_name = (
|
216
|
+
package_name.split('.')[1]
|
217
|
+
if package_name.startswith('plugins.') else None
|
218
|
+
)
|
219
|
+
page_group = page_group or plugin_name
|
220
|
+
if page_group not in _plugin_endpoints_to_pages:
|
221
|
+
_plugin_endpoints_to_pages[page_group] = {}
|
222
|
+
_plugin_endpoints_to_pages[page_group][page_str] = {
|
201
223
|
'function': _func,
|
202
224
|
'login_required': login_required,
|
225
|
+
'skip_navbar': skip_navbar,
|
226
|
+
'page_key': page_key,
|
203
227
|
}
|
204
228
|
return wrapper
|
205
229
|
|
meerschaum/utils/dataframe.py
CHANGED
@@ -153,6 +153,7 @@ def filter_unseen_df(
|
|
153
153
|
attempt_cast_to_numeric,
|
154
154
|
attempt_cast_to_uuid,
|
155
155
|
attempt_cast_to_bytes,
|
156
|
+
attempt_cast_to_geometry,
|
156
157
|
coerce_timezone,
|
157
158
|
serialize_decimal,
|
158
159
|
)
|
@@ -350,6 +351,10 @@ def filter_unseen_df(
|
|
350
351
|
new_bytes_cols = get_bytes_cols(new_df)
|
351
352
|
bytes_cols = set(new_bytes_cols + old_bytes_cols)
|
352
353
|
|
354
|
+
old_geometry_cols = get_geometry_cols(old_df)
|
355
|
+
new_geometry_cols = get_geometry_cols(new_df)
|
356
|
+
geometry_cols = set(new_geometry_cols + old_geometry_cols)
|
357
|
+
|
353
358
|
joined_df = merge(
|
354
359
|
new_df.infer_objects(copy=False).fillna(NA),
|
355
360
|
old_df.infer_objects(copy=False).fillna(NA),
|
@@ -400,6 +405,14 @@ def filter_unseen_df(
|
|
400
405
|
except Exception:
|
401
406
|
warn(f"Unable to parse bytes column '{bytes_col}':\n{traceback.format_exc()}")
|
402
407
|
|
408
|
+
for geometry_col in geometry_cols:
|
409
|
+
if geometry_col not in delta_df.columns:
|
410
|
+
continue
|
411
|
+
try:
|
412
|
+
delta_df[geometry_col] = delta_df[geometry_col].apply(attempt_cast_to_geometry)
|
413
|
+
except Exception:
|
414
|
+
warn(f"Unable to parse bytes column '{bytes_col}':\n{traceback.format_exc()}")
|
415
|
+
|
403
416
|
return delta_df
|
404
417
|
|
405
418
|
|
@@ -858,6 +871,76 @@ def get_bytes_cols(df: 'pd.DataFrame') -> List[str]:
|
|
858
871
|
]
|
859
872
|
|
860
873
|
|
874
|
+
def get_geometry_cols(
|
875
|
+
df: 'pd.DataFrame',
|
876
|
+
with_types_srids: bool = False,
|
877
|
+
) -> Union[List[str], Dict[str, Any]]:
|
878
|
+
"""
|
879
|
+
Get the columns which contain shapely objects from a Pandas DataFrame.
|
880
|
+
|
881
|
+
Parameters
|
882
|
+
----------
|
883
|
+
df: pd.DataFrame
|
884
|
+
The DataFrame which may contain bytes strings.
|
885
|
+
|
886
|
+
with_types_srids: bool, default False
|
887
|
+
If `True`, return a dictionary mapping columns to geometry types and SRIDs.
|
888
|
+
|
889
|
+
Returns
|
890
|
+
-------
|
891
|
+
A list of columns to treat as `geometry`.
|
892
|
+
If `with_types_srids`, return a dictionary mapping columns to tuples in the form (type, SRID).
|
893
|
+
"""
|
894
|
+
if df is None:
|
895
|
+
return []
|
896
|
+
|
897
|
+
is_dask = 'dask' in df.__module__
|
898
|
+
if is_dask:
|
899
|
+
df = get_first_valid_dask_partition(df)
|
900
|
+
|
901
|
+
if len(df) == 0:
|
902
|
+
return []
|
903
|
+
|
904
|
+
cols_indices = {
|
905
|
+
col: df[col].first_valid_index()
|
906
|
+
for col in df.columns
|
907
|
+
}
|
908
|
+
geo_cols = [
|
909
|
+
col
|
910
|
+
for col, ix in cols_indices.items()
|
911
|
+
if (
|
912
|
+
ix is not None
|
913
|
+
and
|
914
|
+
'shapely' in str(type(df.loc[ix][col]))
|
915
|
+
)
|
916
|
+
]
|
917
|
+
if not with_types_srids:
|
918
|
+
return geo_cols
|
919
|
+
|
920
|
+
gpd = mrsm.attempt_import('geopandas', lazy=False)
|
921
|
+
geo_cols_types_srids = {}
|
922
|
+
for col in geo_cols:
|
923
|
+
try:
|
924
|
+
sample_geo_series = gpd.GeoSeries(df[col], crs=None)
|
925
|
+
geometry_types = {geom.geom_type for geom in sample_geo_series}
|
926
|
+
srid = (
|
927
|
+
(
|
928
|
+
sample_geo_series.crs.sub_crs_list[0].to_epsg()
|
929
|
+
if sample_geo_series.crs.is_compound
|
930
|
+
else sample_geo_series.crs.to_epsg()
|
931
|
+
)
|
932
|
+
if sample_geo_series.crs
|
933
|
+
else 0
|
934
|
+
)
|
935
|
+
geometry_type = list(geometry_types)[0] if len(geometry_types) == 1 else 'geometry'
|
936
|
+
except Exception:
|
937
|
+
srid = 0
|
938
|
+
geometry_type = 'geometry'
|
939
|
+
geo_cols_types_srids[col] = (geometry_type, srid)
|
940
|
+
|
941
|
+
return geo_cols_types_srids
|
942
|
+
|
943
|
+
|
861
944
|
def enforce_dtypes(
|
862
945
|
df: 'pd.DataFrame',
|
863
946
|
dtypes: Dict[str, str],
|
@@ -911,6 +994,7 @@ def enforce_dtypes(
|
|
911
994
|
attempt_cast_to_numeric,
|
912
995
|
attempt_cast_to_uuid,
|
913
996
|
attempt_cast_to_bytes,
|
997
|
+
attempt_cast_to_geometry,
|
914
998
|
coerce_timezone as _coerce_timezone,
|
915
999
|
)
|
916
1000
|
from meerschaum.utils.dtypes.sql import get_numeric_precision_scale
|
@@ -937,6 +1021,11 @@ def enforce_dtypes(
|
|
937
1021
|
for col, typ in dtypes.items()
|
938
1022
|
if typ.startswith('numeric')
|
939
1023
|
]
|
1024
|
+
geometry_cols = [
|
1025
|
+
col
|
1026
|
+
for col, typ in dtypes.items()
|
1027
|
+
if typ.startswith('geometry') or typ.startswith('geography')
|
1028
|
+
]
|
940
1029
|
uuid_cols = [
|
941
1030
|
col
|
942
1031
|
for col, typ in dtypes.items()
|
@@ -1026,6 +1115,28 @@ def enforce_dtypes(
|
|
1026
1115
|
if col in df.columns:
|
1027
1116
|
df[col] = _coerce_timezone(df[col], strip_utc=strip_timezone)
|
1028
1117
|
|
1118
|
+
if geometry_cols:
|
1119
|
+
geopandas = mrsm.attempt_import('geopandas')
|
1120
|
+
if debug:
|
1121
|
+
dprint(f"Checking for geometry: {geometry_cols}")
|
1122
|
+
parsed_geom_cols = []
|
1123
|
+
for col in geometry_cols:
|
1124
|
+
try:
|
1125
|
+
df[col] = df[col].apply(attempt_cast_to_geometry)
|
1126
|
+
parsed_geom_cols.append(col)
|
1127
|
+
except Exception as e:
|
1128
|
+
if debug:
|
1129
|
+
dprint(f"Unable to parse column '{col}' as geometry:\n{e}")
|
1130
|
+
|
1131
|
+
if parsed_geom_cols:
|
1132
|
+
if debug:
|
1133
|
+
dprint(f"Converting to GeoDataFrame (geometry column: '{parsed_geom_cols[0]}')...")
|
1134
|
+
df = geopandas.GeoDataFrame(df, geometry=parsed_geom_cols[0])
|
1135
|
+
try:
|
1136
|
+
df.rename_geometry(parsed_geom_cols[0], inplace=True)
|
1137
|
+
except ValueError:
|
1138
|
+
pass
|
1139
|
+
|
1029
1140
|
df_dtypes = {c: str(t) for c, t in df.dtypes.items()}
|
1030
1141
|
if are_dtypes_equal(df_dtypes, pipe_pandas_dtypes):
|
1031
1142
|
if debug:
|
@@ -1579,6 +1690,8 @@ def to_json(
|
|
1579
1690
|
orient: str = 'records',
|
1580
1691
|
date_format: str = 'iso',
|
1581
1692
|
date_unit: str = 'us',
|
1693
|
+
double_precision: int = 15,
|
1694
|
+
geometry_format: str = 'geojson',
|
1582
1695
|
**kwargs: Any
|
1583
1696
|
) -> str:
|
1584
1697
|
"""
|
@@ -1598,17 +1711,31 @@ def to_json(
|
|
1598
1711
|
date_unit: str, default 'us'
|
1599
1712
|
The precision of the timestamps.
|
1600
1713
|
|
1714
|
+
double_precision: int, default 15
|
1715
|
+
The number of decimal places to use when encoding floating point values (maximum 15).
|
1716
|
+
|
1717
|
+
geometry_format: str, default 'geojson'
|
1718
|
+
The serialization format for geometry data.
|
1719
|
+
Accepted values are `geojson`, `wkb_hex`, and `wkt`.
|
1720
|
+
|
1601
1721
|
Returns
|
1602
1722
|
-------
|
1603
1723
|
A JSON string.
|
1604
1724
|
"""
|
1725
|
+
import warnings
|
1726
|
+
import functools
|
1605
1727
|
from meerschaum.utils.packages import import_pandas
|
1606
|
-
from meerschaum.utils.dtypes import
|
1728
|
+
from meerschaum.utils.dtypes import (
|
1729
|
+
serialize_bytes,
|
1730
|
+
serialize_decimal,
|
1731
|
+
serialize_geometry,
|
1732
|
+
)
|
1607
1733
|
pd = import_pandas()
|
1608
1734
|
uuid_cols = get_uuid_cols(df)
|
1609
1735
|
bytes_cols = get_bytes_cols(df)
|
1610
1736
|
numeric_cols = get_numeric_cols(df)
|
1611
|
-
|
1737
|
+
geometry_cols = get_geometry_cols(df)
|
1738
|
+
if safe_copy and bool(uuid_cols or bytes_cols or geometry_cols or numeric_cols):
|
1612
1739
|
df = df.copy()
|
1613
1740
|
for col in uuid_cols:
|
1614
1741
|
df[col] = df[col].astype(str)
|
@@ -1616,9 +1743,19 @@ def to_json(
|
|
1616
1743
|
df[col] = df[col].apply(serialize_bytes)
|
1617
1744
|
for col in numeric_cols:
|
1618
1745
|
df[col] = df[col].apply(serialize_decimal)
|
1746
|
+
with warnings.catch_warnings():
|
1747
|
+
warnings.simplefilter("ignore")
|
1748
|
+
for col in geometry_cols:
|
1749
|
+
df[col] = df[col].apply(
|
1750
|
+
functools.partial(
|
1751
|
+
serialize_geometry,
|
1752
|
+
geometry_format=geometry_format,
|
1753
|
+
)
|
1754
|
+
)
|
1619
1755
|
return df.infer_objects(copy=False).fillna(pd.NA).to_json(
|
1620
1756
|
date_format=date_format,
|
1621
1757
|
date_unit=date_unit,
|
1758
|
+
double_precision=double_precision,
|
1622
1759
|
orient=orient,
|
1623
1760
|
**kwargs
|
1624
1761
|
)
|
@@ -7,12 +7,13 @@ Utility functions for working with data types.
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
import traceback
|
10
|
+
import json
|
10
11
|
import uuid
|
11
12
|
from datetime import timezone, datetime
|
12
13
|
from decimal import Decimal, Context, InvalidOperation, ROUND_HALF_UP
|
13
14
|
|
14
15
|
import meerschaum as mrsm
|
15
|
-
from meerschaum.utils.typing import Dict, Union, Any, Optional
|
16
|
+
from meerschaum.utils.typing import Dict, Union, Any, Optional, Tuple
|
16
17
|
from meerschaum.utils.warnings import warn
|
17
18
|
|
18
19
|
MRSM_ALIAS_DTYPES: Dict[str, str] = {
|
@@ -27,10 +28,14 @@ MRSM_ALIAS_DTYPES: Dict[str, str] = {
|
|
27
28
|
'bytea': 'bytes',
|
28
29
|
'guid': 'uuid',
|
29
30
|
'UUID': 'uuid',
|
31
|
+
'geom': 'geometry',
|
32
|
+
'geog': 'geography',
|
30
33
|
}
|
31
34
|
MRSM_PD_DTYPES: Dict[Union[str, None], str] = {
|
32
35
|
'json': 'object',
|
33
36
|
'numeric': 'object',
|
37
|
+
'geometry': 'object',
|
38
|
+
'geography': 'object',
|
34
39
|
'uuid': 'object',
|
35
40
|
'datetime': 'datetime64[ns, UTC]',
|
36
41
|
'bool': 'bool[pyarrow]',
|
@@ -60,6 +65,12 @@ def to_pandas_dtype(dtype: str) -> str:
|
|
60
65
|
if dtype.startswith('numeric'):
|
61
66
|
return MRSM_PD_DTYPES['numeric']
|
62
67
|
|
68
|
+
if dtype.startswith('geometry'):
|
69
|
+
return MRSM_PD_DTYPES['geometry']
|
70
|
+
|
71
|
+
if dtype.startswith('geography'):
|
72
|
+
return MRSM_PD_DTYPES['geography']
|
73
|
+
|
63
74
|
### NOTE: Kind of a hack, but if the first word of the given dtype is in all caps,
|
64
75
|
### treat it as a SQL db type.
|
65
76
|
if dtype.split(' ')[0].isupper():
|
@@ -67,6 +78,7 @@ def to_pandas_dtype(dtype: str) -> str:
|
|
67
78
|
return get_pd_type_from_db_type(dtype)
|
68
79
|
|
69
80
|
from meerschaum.utils.packages import attempt_import
|
81
|
+
_ = attempt_import('pyarrow', lazy=False)
|
70
82
|
pandas = attempt_import('pandas', lazy=False)
|
71
83
|
|
72
84
|
try:
|
@@ -147,6 +159,10 @@ def are_dtypes_equal(
|
|
147
159
|
if ldtype in bytes_dtypes and rdtype in bytes_dtypes:
|
148
160
|
return True
|
149
161
|
|
162
|
+
geometry_dtypes = ('geometry', 'object', 'geography')
|
163
|
+
if ldtype in geometry_dtypes and rdtype in geometry_dtypes:
|
164
|
+
return True
|
165
|
+
|
150
166
|
if ldtype.lower() == rdtype.lower():
|
151
167
|
return True
|
152
168
|
|
@@ -277,6 +293,70 @@ def attempt_cast_to_bytes(value: Any) -> Any:
|
|
277
293
|
return value
|
278
294
|
|
279
295
|
|
296
|
+
def attempt_cast_to_geometry(value: Any) -> Any:
|
297
|
+
"""
|
298
|
+
Given a value, attempt to coerce it into a `shapely` (`geometry`) object.
|
299
|
+
"""
|
300
|
+
shapely, shapely_wkt, shapely_wkb = mrsm.attempt_import(
|
301
|
+
'shapely',
|
302
|
+
'shapely.wkt',
|
303
|
+
'shapely.wkb',
|
304
|
+
lazy=False,
|
305
|
+
)
|
306
|
+
if 'shapely' in str(type(value)):
|
307
|
+
return value
|
308
|
+
|
309
|
+
if isinstance(value, (dict, list)):
|
310
|
+
try:
|
311
|
+
return shapely.from_geojson(json.dumps(value))
|
312
|
+
except Exception as e:
|
313
|
+
return value
|
314
|
+
|
315
|
+
value_is_wkt = geometry_is_wkt(value)
|
316
|
+
if value_is_wkt is None:
|
317
|
+
return value
|
318
|
+
|
319
|
+
try:
|
320
|
+
return (
|
321
|
+
shapely_wkt.loads(value)
|
322
|
+
if value_is_wkt
|
323
|
+
else shapely_wkb.loads(value)
|
324
|
+
)
|
325
|
+
except Exception:
|
326
|
+
return value
|
327
|
+
|
328
|
+
|
329
|
+
def geometry_is_wkt(value: Union[str, bytes]) -> Union[bool, None]:
|
330
|
+
"""
|
331
|
+
Determine whether an input value should be treated as WKT or WKB geometry data.
|
332
|
+
|
333
|
+
Parameters
|
334
|
+
----------
|
335
|
+
value: Union[str, bytes]
|
336
|
+
The input data to be parsed into geometry data.
|
337
|
+
|
338
|
+
Returns
|
339
|
+
-------
|
340
|
+
A `bool` (`True` if `value` is WKT and `False` if it should be treated as WKB).
|
341
|
+
Return `None` if `value` should be parsed as neither.
|
342
|
+
"""
|
343
|
+
import re
|
344
|
+
if not isinstance(value, (str, bytes)):
|
345
|
+
return None
|
346
|
+
|
347
|
+
if isinstance(value, bytes):
|
348
|
+
return False
|
349
|
+
|
350
|
+
wkt_pattern = r'^\s*(POINT|LINESTRING|POLYGON|MULTIPOINT|MULTILINESTRING|MULTIPOLYGON|GEOMETRYCOLLECTION)\s*\(.*\)\s*$'
|
351
|
+
if re.match(wkt_pattern, value, re.IGNORECASE):
|
352
|
+
return True
|
353
|
+
|
354
|
+
if all(c in '0123456789ABCDEFabcdef' for c in value) and len(value) % 2 == 0:
|
355
|
+
return False
|
356
|
+
|
357
|
+
return None
|
358
|
+
|
359
|
+
|
280
360
|
def value_is_null(value: Any) -> bool:
|
281
361
|
"""
|
282
362
|
Determine if a value is a null-like string.
|
@@ -458,6 +538,47 @@ def serialize_bytes(data: bytes) -> str:
|
|
458
538
|
return base64.b64encode(data).decode('utf-8')
|
459
539
|
|
460
540
|
|
541
|
+
def serialize_geometry(
|
542
|
+
geom: Any,
|
543
|
+
geometry_format: str = 'wkb_hex',
|
544
|
+
as_wkt: bool = False,
|
545
|
+
) -> Union[str, Dict[str, Any]]:
|
546
|
+
"""
|
547
|
+
Serialize geometry data as a hex-encoded well-known-binary string.
|
548
|
+
|
549
|
+
Parameters
|
550
|
+
----------
|
551
|
+
geom: Any
|
552
|
+
The potential geometry data to be serialized.
|
553
|
+
|
554
|
+
geometry_format: str, default 'wkb_hex'
|
555
|
+
The serialization format for geometry data.
|
556
|
+
Accepted formats are `wkb_hex` (well-known binary hex string),
|
557
|
+
`wkt` (well-known text), and `geojson`.
|
558
|
+
|
559
|
+
Returns
|
560
|
+
-------
|
561
|
+
A string containing the geometry data.
|
562
|
+
"""
|
563
|
+
shapely = mrsm.attempt_import('shapely', lazy=False)
|
564
|
+
if geometry_format == 'geojson':
|
565
|
+
geojson_str = shapely.to_geojson(geom)
|
566
|
+
return json.loads(geojson_str)
|
567
|
+
|
568
|
+
if hasattr(geom, 'wkb_hex'):
|
569
|
+
return geom.wkb_hex if geometry_format == 'wkb_hex' else geom.wkt
|
570
|
+
|
571
|
+
return str(geom)
|
572
|
+
|
573
|
+
|
574
|
+
def deserialize_geometry(geom_wkb: Union[str, bytes]):
|
575
|
+
"""
|
576
|
+
Deserialize a WKB string into a shapely geometry object.
|
577
|
+
"""
|
578
|
+
shapely = mrsm.attempt_import(lazy=False)
|
579
|
+
return shapely.wkb.loads(geom_wkb)
|
580
|
+
|
581
|
+
|
461
582
|
def deserialize_bytes_string(data: str | None, force_hex: bool = False) -> bytes | None:
|
462
583
|
"""
|
463
584
|
Given a serialized ASCII string of bytes data, return the original bytes.
|
@@ -559,7 +680,96 @@ def json_serialize_value(x: Any, default_to_str: bool = True) -> str:
|
|
559
680
|
if isinstance(x, Decimal):
|
560
681
|
return serialize_decimal(x)
|
561
682
|
|
683
|
+
if 'shapely' in str(type(x)):
|
684
|
+
return serialize_geometry(x)
|
685
|
+
|
562
686
|
if value_is_null(x):
|
563
687
|
return None
|
564
688
|
|
565
689
|
return str(x) if default_to_str else x
|
690
|
+
|
691
|
+
|
692
|
+
def get_geometry_type_srid(
|
693
|
+
dtype: str = 'geometry',
|
694
|
+
default_type: str = 'geometry',
|
695
|
+
default_srid: int = 4326,
|
696
|
+
) -> Union[Tuple[str, int], Tuple[str, None]]:
|
697
|
+
"""
|
698
|
+
Given the specified geometry `dtype`, return a tuple in the form (type, SRID).
|
699
|
+
|
700
|
+
Parameters
|
701
|
+
----------
|
702
|
+
dtype: Optional[str], default None
|
703
|
+
Optionally provide a specific `geometry` syntax (e.g. `geometry[MultiLineString, 4326]`).
|
704
|
+
You may specify a supported `shapely` geometry type and an SRID in the dtype modifier:
|
705
|
+
|
706
|
+
- `Point`
|
707
|
+
- `LineString`
|
708
|
+
- `LinearRing`
|
709
|
+
- `Polygon`
|
710
|
+
- `MultiPoint`
|
711
|
+
- `MultiLineString`
|
712
|
+
- `MultiPolygon`
|
713
|
+
- `GeometryCollection`
|
714
|
+
|
715
|
+
Returns
|
716
|
+
-------
|
717
|
+
A tuple in the form (type, SRID).
|
718
|
+
Defaults to `(default_type, default_srid)`.
|
719
|
+
|
720
|
+
Examples
|
721
|
+
--------
|
722
|
+
>>> from meerschaum.utils.dtypes import get_geometry_type_srid
|
723
|
+
>>> get_geometry_type_srid()
|
724
|
+
('geometry', 4326)
|
725
|
+
>>> get_geometry_type_srid('geometry[]')
|
726
|
+
('geometry', 4326)
|
727
|
+
>>> get_geometry_type_srid('geometry[Point, 0]')
|
728
|
+
('Point', 0)
|
729
|
+
>>> get_geometry_type_srid('geometry[0, Point]')
|
730
|
+
('Point', 0)
|
731
|
+
>>> get_geometry_type_srid('geometry[0]')
|
732
|
+
('geometry', 0)
|
733
|
+
>>> get_geometry_type_srid('geometry[MULTILINESTRING, 4326]')
|
734
|
+
('MultiLineString', 4326)
|
735
|
+
>>> get_geometry_type_srid('geography')
|
736
|
+
('geometry', 4326)
|
737
|
+
>>> get_geometry_type_srid('geography[POINT]')
|
738
|
+
('Point', 4376)
|
739
|
+
"""
|
740
|
+
from meerschaum.utils.misc import is_int
|
741
|
+
### NOTE: PostGIS syntax must also be parsed.
|
742
|
+
dtype = dtype.replace('(', '[').replace(')', ']')
|
743
|
+
bare_dtype = dtype.split('[', maxsplit=1)[0]
|
744
|
+
modifier = dtype.split(bare_dtype, maxsplit=1)[-1].lstrip('[').rstrip(']')
|
745
|
+
if not modifier:
|
746
|
+
return default_type, default_srid
|
747
|
+
|
748
|
+
shapely_geometry_base = mrsm.attempt_import('shapely.geometry.base')
|
749
|
+
geometry_types = {
|
750
|
+
typ.lower(): typ
|
751
|
+
for typ in shapely_geometry_base.GEOMETRY_TYPES
|
752
|
+
}
|
753
|
+
|
754
|
+
parts = [part.lower().replace('srid=', '').replace('type=', '').strip() for part in modifier.split(',')]
|
755
|
+
parts_casted = [
|
756
|
+
(
|
757
|
+
int(part)
|
758
|
+
if is_int(part)
|
759
|
+
else part
|
760
|
+
) for part in parts]
|
761
|
+
|
762
|
+
srid = default_srid
|
763
|
+
geometry_type = default_type
|
764
|
+
|
765
|
+
for part in parts_casted:
|
766
|
+
if isinstance(part, int):
|
767
|
+
srid = part
|
768
|
+
break
|
769
|
+
|
770
|
+
for part in parts:
|
771
|
+
if part.lower() in geometry_types:
|
772
|
+
geometry_type = geometry_types.get(part)
|
773
|
+
break
|
774
|
+
|
775
|
+
return geometry_type, srid
|