meerschaum 2.8.4__py3-none-any.whl → 2.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/api/_chunks.py +67 -0
- meerschaum/api/dash/callbacks/__init__.py +5 -2
- meerschaum/api/dash/callbacks/custom.py +21 -8
- meerschaum/api/dash/callbacks/dashboard.py +26 -4
- meerschaum/api/dash/callbacks/settings/__init__.py +8 -0
- meerschaum/api/dash/callbacks/settings/password_reset.py +76 -0
- meerschaum/api/dash/components.py +136 -25
- meerschaum/api/dash/pages/__init__.py +1 -0
- meerschaum/api/dash/pages/dashboard.py +11 -9
- meerschaum/api/dash/pages/plugins.py +31 -27
- meerschaum/api/dash/pages/settings/__init__.py +8 -0
- meerschaum/api/dash/pages/settings/password_reset.py +63 -0
- meerschaum/api/dash/webterm.py +6 -3
- meerschaum/api/resources/static/css/dash.css +8 -1
- meerschaum/api/resources/templates/termpage.html +4 -0
- meerschaum/api/routes/_pipes.py +232 -79
- meerschaum/config/_default.py +4 -0
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/__init__.py +1 -0
- meerschaum/connectors/api/_APIConnector.py +12 -1
- meerschaum/connectors/api/_pipes.py +106 -45
- meerschaum/connectors/api/_plugins.py +51 -45
- meerschaum/connectors/api/_request.py +1 -1
- meerschaum/connectors/parse.py +1 -2
- meerschaum/connectors/sql/_SQLConnector.py +3 -0
- meerschaum/connectors/sql/_cli.py +1 -0
- meerschaum/connectors/sql/_create_engine.py +51 -4
- meerschaum/connectors/sql/_pipes.py +38 -6
- meerschaum/connectors/sql/_sql.py +35 -4
- meerschaum/connectors/valkey/_ValkeyConnector.py +2 -0
- meerschaum/connectors/valkey/_pipes.py +51 -39
- meerschaum/core/Pipe/__init__.py +1 -0
- meerschaum/core/Pipe/_data.py +1 -2
- meerschaum/core/Pipe/_sync.py +64 -4
- meerschaum/plugins/_Plugin.py +21 -5
- meerschaum/plugins/__init__.py +32 -8
- meerschaum/utils/dataframe.py +139 -2
- meerschaum/utils/dtypes/__init__.py +211 -1
- meerschaum/utils/dtypes/sql.py +296 -5
- meerschaum/utils/formatting/_shell.py +1 -4
- meerschaum/utils/misc.py +1 -1
- meerschaum/utils/packages/_packages.py +7 -1
- meerschaum/utils/sql.py +139 -11
- meerschaum/utils/venv/__init__.py +6 -1
- {meerschaum-2.8.4.dist-info → meerschaum-2.9.0.dist-info}/METADATA +17 -3
- {meerschaum-2.8.4.dist-info → meerschaum-2.9.0.dist-info}/RECORD +52 -52
- {meerschaum-2.8.4.dist-info → meerschaum-2.9.0.dist-info}/WHEEL +1 -1
- meerschaum/_internal/gui/__init__.py +0 -43
- meerschaum/_internal/gui/app/__init__.py +0 -50
- meerschaum/_internal/gui/app/_windows.py +0 -74
- meerschaum/_internal/gui/app/actions.py +0 -30
- meerschaum/_internal/gui/app/pipes.py +0 -47
- {meerschaum-2.8.4.dist-info → meerschaum-2.9.0.dist-info}/LICENSE +0 -0
- {meerschaum-2.8.4.dist-info → meerschaum-2.9.0.dist-info}/NOTICE +0 -0
- {meerschaum-2.8.4.dist-info → meerschaum-2.9.0.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.8.4.dist-info → meerschaum-2.9.0.dist-info}/top_level.txt +0 -0
- {meerschaum-2.8.4.dist-info → meerschaum-2.9.0.dist-info}/zip-safe +0 -0
meerschaum/plugins/__init__.py
CHANGED
@@ -91,8 +91,8 @@ def make_action(
|
|
91
91
|
|
92
92
|
|
93
93
|
def pre_sync_hook(
|
94
|
-
|
95
|
-
|
94
|
+
function: Callable[[Any], Any],
|
95
|
+
) -> Callable[[Any], Any]:
|
96
96
|
"""
|
97
97
|
Register a function as a sync hook to be executed right before sync.
|
98
98
|
|
@@ -166,10 +166,12 @@ def post_sync_hook(
|
|
166
166
|
|
167
167
|
_plugin_endpoints_to_pages = {}
|
168
168
|
def web_page(
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
169
|
+
page: Union[str, None, Callable[[Any], Any]] = None,
|
170
|
+
login_required: bool = True,
|
171
|
+
skip_navbar: bool = False,
|
172
|
+
page_group: Optional[str] = None,
|
173
|
+
**kwargs
|
174
|
+
) -> Any:
|
173
175
|
"""
|
174
176
|
Quickly add pages to the dash application.
|
175
177
|
|
@@ -187,7 +189,7 @@ def web_page(
|
|
187
189
|
page_str = None
|
188
190
|
|
189
191
|
def _decorator(_func: Callable[[Any], Any]) -> Callable[[Any], Any]:
|
190
|
-
nonlocal page_str
|
192
|
+
nonlocal page_str, page_group
|
191
193
|
|
192
194
|
@functools.wraps(_func)
|
193
195
|
def wrapper(*_args, **_kwargs):
|
@@ -197,9 +199,31 @@ def web_page(
|
|
197
199
|
page_str = _func.__name__
|
198
200
|
|
199
201
|
page_str = page_str.lstrip('/').rstrip('/').strip()
|
200
|
-
|
202
|
+
page_key = (
|
203
|
+
' '.join(
|
204
|
+
[
|
205
|
+
word.capitalize()
|
206
|
+
for word in (
|
207
|
+
page_str.replace('/dash', '').lstrip('/').rstrip('/').strip()
|
208
|
+
.replace('-', ' ').replace('_', ' ').split(' ')
|
209
|
+
)
|
210
|
+
]
|
211
|
+
)
|
212
|
+
)
|
213
|
+
|
214
|
+
package_name = _func.__globals__['__name__']
|
215
|
+
plugin_name = (
|
216
|
+
package_name.split('.')[1]
|
217
|
+
if package_name.startswith('plugins.') else None
|
218
|
+
)
|
219
|
+
page_group = page_group or plugin_name
|
220
|
+
if page_group not in _plugin_endpoints_to_pages:
|
221
|
+
_plugin_endpoints_to_pages[page_group] = {}
|
222
|
+
_plugin_endpoints_to_pages[page_group][page_str] = {
|
201
223
|
'function': _func,
|
202
224
|
'login_required': login_required,
|
225
|
+
'skip_navbar': skip_navbar,
|
226
|
+
'page_key': page_key,
|
203
227
|
}
|
204
228
|
return wrapper
|
205
229
|
|
meerschaum/utils/dataframe.py
CHANGED
@@ -153,6 +153,7 @@ def filter_unseen_df(
|
|
153
153
|
attempt_cast_to_numeric,
|
154
154
|
attempt_cast_to_uuid,
|
155
155
|
attempt_cast_to_bytes,
|
156
|
+
attempt_cast_to_geometry,
|
156
157
|
coerce_timezone,
|
157
158
|
serialize_decimal,
|
158
159
|
)
|
@@ -350,6 +351,10 @@ def filter_unseen_df(
|
|
350
351
|
new_bytes_cols = get_bytes_cols(new_df)
|
351
352
|
bytes_cols = set(new_bytes_cols + old_bytes_cols)
|
352
353
|
|
354
|
+
old_geometry_cols = get_geometry_cols(old_df)
|
355
|
+
new_geometry_cols = get_geometry_cols(new_df)
|
356
|
+
geometry_cols = set(new_geometry_cols + old_geometry_cols)
|
357
|
+
|
353
358
|
joined_df = merge(
|
354
359
|
new_df.infer_objects(copy=False).fillna(NA),
|
355
360
|
old_df.infer_objects(copy=False).fillna(NA),
|
@@ -400,6 +405,14 @@ def filter_unseen_df(
|
|
400
405
|
except Exception:
|
401
406
|
warn(f"Unable to parse bytes column '{bytes_col}':\n{traceback.format_exc()}")
|
402
407
|
|
408
|
+
for geometry_col in geometry_cols:
|
409
|
+
if geometry_col not in delta_df.columns:
|
410
|
+
continue
|
411
|
+
try:
|
412
|
+
delta_df[geometry_col] = delta_df[geometry_col].apply(attempt_cast_to_geometry)
|
413
|
+
except Exception:
|
414
|
+
warn(f"Unable to parse bytes column '{bytes_col}':\n{traceback.format_exc()}")
|
415
|
+
|
403
416
|
return delta_df
|
404
417
|
|
405
418
|
|
@@ -858,6 +871,76 @@ def get_bytes_cols(df: 'pd.DataFrame') -> List[str]:
|
|
858
871
|
]
|
859
872
|
|
860
873
|
|
874
|
+
def get_geometry_cols(
|
875
|
+
df: 'pd.DataFrame',
|
876
|
+
with_types_srids: bool = False,
|
877
|
+
) -> Union[List[str], Dict[str, Any]]:
|
878
|
+
"""
|
879
|
+
Get the columns which contain shapely objects from a Pandas DataFrame.
|
880
|
+
|
881
|
+
Parameters
|
882
|
+
----------
|
883
|
+
df: pd.DataFrame
|
884
|
+
The DataFrame which may contain bytes strings.
|
885
|
+
|
886
|
+
with_types_srids: bool, default False
|
887
|
+
If `True`, return a dictionary mapping columns to geometry types and SRIDs.
|
888
|
+
|
889
|
+
Returns
|
890
|
+
-------
|
891
|
+
A list of columns to treat as `geometry`.
|
892
|
+
If `with_types_srids`, return a dictionary mapping columns to tuples in the form (type, SRID).
|
893
|
+
"""
|
894
|
+
if df is None:
|
895
|
+
return []
|
896
|
+
|
897
|
+
is_dask = 'dask' in df.__module__
|
898
|
+
if is_dask:
|
899
|
+
df = get_first_valid_dask_partition(df)
|
900
|
+
|
901
|
+
if len(df) == 0:
|
902
|
+
return []
|
903
|
+
|
904
|
+
cols_indices = {
|
905
|
+
col: df[col].first_valid_index()
|
906
|
+
for col in df.columns
|
907
|
+
}
|
908
|
+
geo_cols = [
|
909
|
+
col
|
910
|
+
for col, ix in cols_indices.items()
|
911
|
+
if (
|
912
|
+
ix is not None
|
913
|
+
and
|
914
|
+
'shapely' in str(type(df.loc[ix][col]))
|
915
|
+
)
|
916
|
+
]
|
917
|
+
if not with_types_srids:
|
918
|
+
return geo_cols
|
919
|
+
|
920
|
+
gpd = mrsm.attempt_import('geopandas', lazy=False)
|
921
|
+
geo_cols_types_srids = {}
|
922
|
+
for col in geo_cols:
|
923
|
+
try:
|
924
|
+
sample_geo_series = gpd.GeoSeries(df[col], crs=None)
|
925
|
+
geometry_types = {geom.geom_type for geom in sample_geo_series}
|
926
|
+
srid = (
|
927
|
+
(
|
928
|
+
sample_geo_series.crs.sub_crs_list[0].to_epsg()
|
929
|
+
if sample_geo_series.crs.is_compound
|
930
|
+
else sample_geo_series.crs.to_epsg()
|
931
|
+
)
|
932
|
+
if sample_geo_series.crs
|
933
|
+
else 0
|
934
|
+
)
|
935
|
+
geometry_type = list(geometry_types)[0] if len(geometry_types) == 1 else 'geometry'
|
936
|
+
except Exception:
|
937
|
+
srid = 0
|
938
|
+
geometry_type = 'geometry'
|
939
|
+
geo_cols_types_srids[col] = (geometry_type, srid)
|
940
|
+
|
941
|
+
return geo_cols_types_srids
|
942
|
+
|
943
|
+
|
861
944
|
def enforce_dtypes(
|
862
945
|
df: 'pd.DataFrame',
|
863
946
|
dtypes: Dict[str, str],
|
@@ -911,6 +994,7 @@ def enforce_dtypes(
|
|
911
994
|
attempt_cast_to_numeric,
|
912
995
|
attempt_cast_to_uuid,
|
913
996
|
attempt_cast_to_bytes,
|
997
|
+
attempt_cast_to_geometry,
|
914
998
|
coerce_timezone as _coerce_timezone,
|
915
999
|
)
|
916
1000
|
from meerschaum.utils.dtypes.sql import get_numeric_precision_scale
|
@@ -937,6 +1021,11 @@ def enforce_dtypes(
|
|
937
1021
|
for col, typ in dtypes.items()
|
938
1022
|
if typ.startswith('numeric')
|
939
1023
|
]
|
1024
|
+
geometry_cols = [
|
1025
|
+
col
|
1026
|
+
for col, typ in dtypes.items()
|
1027
|
+
if typ.startswith('geometry') or typ.startswith('geography')
|
1028
|
+
]
|
940
1029
|
uuid_cols = [
|
941
1030
|
col
|
942
1031
|
for col, typ in dtypes.items()
|
@@ -1026,6 +1115,28 @@ def enforce_dtypes(
|
|
1026
1115
|
if col in df.columns:
|
1027
1116
|
df[col] = _coerce_timezone(df[col], strip_utc=strip_timezone)
|
1028
1117
|
|
1118
|
+
if geometry_cols:
|
1119
|
+
geopandas = mrsm.attempt_import('geopandas')
|
1120
|
+
if debug:
|
1121
|
+
dprint(f"Checking for geometry: {geometry_cols}")
|
1122
|
+
parsed_geom_cols = []
|
1123
|
+
for col in geometry_cols:
|
1124
|
+
try:
|
1125
|
+
df[col] = df[col].apply(attempt_cast_to_geometry)
|
1126
|
+
parsed_geom_cols.append(col)
|
1127
|
+
except Exception as e:
|
1128
|
+
if debug:
|
1129
|
+
dprint(f"Unable to parse column '{col}' as geometry:\n{e}")
|
1130
|
+
|
1131
|
+
if parsed_geom_cols:
|
1132
|
+
if debug:
|
1133
|
+
dprint(f"Converting to GeoDataFrame (geometry column: '{parsed_geom_cols[0]}')...")
|
1134
|
+
df = geopandas.GeoDataFrame(df, geometry=parsed_geom_cols[0])
|
1135
|
+
try:
|
1136
|
+
df.rename_geometry(parsed_geom_cols[0], inplace=True)
|
1137
|
+
except ValueError:
|
1138
|
+
pass
|
1139
|
+
|
1029
1140
|
df_dtypes = {c: str(t) for c, t in df.dtypes.items()}
|
1030
1141
|
if are_dtypes_equal(df_dtypes, pipe_pandas_dtypes):
|
1031
1142
|
if debug:
|
@@ -1579,6 +1690,8 @@ def to_json(
|
|
1579
1690
|
orient: str = 'records',
|
1580
1691
|
date_format: str = 'iso',
|
1581
1692
|
date_unit: str = 'us',
|
1693
|
+
double_precision: int = 15,
|
1694
|
+
geometry_format: str = 'geojson',
|
1582
1695
|
**kwargs: Any
|
1583
1696
|
) -> str:
|
1584
1697
|
"""
|
@@ -1598,17 +1711,31 @@ def to_json(
|
|
1598
1711
|
date_unit: str, default 'us'
|
1599
1712
|
The precision of the timestamps.
|
1600
1713
|
|
1714
|
+
double_precision: int, default 15
|
1715
|
+
The number of decimal places to use when encoding floating point values (maximum 15).
|
1716
|
+
|
1717
|
+
geometry_format: str, default 'geojson'
|
1718
|
+
The serialization format for geometry data.
|
1719
|
+
Accepted values are `geojson`, `wkb_hex`, and `wkt`.
|
1720
|
+
|
1601
1721
|
Returns
|
1602
1722
|
-------
|
1603
1723
|
A JSON string.
|
1604
1724
|
"""
|
1725
|
+
import warnings
|
1726
|
+
import functools
|
1605
1727
|
from meerschaum.utils.packages import import_pandas
|
1606
|
-
from meerschaum.utils.dtypes import
|
1728
|
+
from meerschaum.utils.dtypes import (
|
1729
|
+
serialize_bytes,
|
1730
|
+
serialize_decimal,
|
1731
|
+
serialize_geometry,
|
1732
|
+
)
|
1607
1733
|
pd = import_pandas()
|
1608
1734
|
uuid_cols = get_uuid_cols(df)
|
1609
1735
|
bytes_cols = get_bytes_cols(df)
|
1610
1736
|
numeric_cols = get_numeric_cols(df)
|
1611
|
-
|
1737
|
+
geometry_cols = get_geometry_cols(df)
|
1738
|
+
if safe_copy and bool(uuid_cols or bytes_cols or geometry_cols or numeric_cols):
|
1612
1739
|
df = df.copy()
|
1613
1740
|
for col in uuid_cols:
|
1614
1741
|
df[col] = df[col].astype(str)
|
@@ -1616,9 +1743,19 @@ def to_json(
|
|
1616
1743
|
df[col] = df[col].apply(serialize_bytes)
|
1617
1744
|
for col in numeric_cols:
|
1618
1745
|
df[col] = df[col].apply(serialize_decimal)
|
1746
|
+
with warnings.catch_warnings():
|
1747
|
+
warnings.simplefilter("ignore")
|
1748
|
+
for col in geometry_cols:
|
1749
|
+
df[col] = df[col].apply(
|
1750
|
+
functools.partial(
|
1751
|
+
serialize_geometry,
|
1752
|
+
geometry_format=geometry_format,
|
1753
|
+
)
|
1754
|
+
)
|
1619
1755
|
return df.infer_objects(copy=False).fillna(pd.NA).to_json(
|
1620
1756
|
date_format=date_format,
|
1621
1757
|
date_unit=date_unit,
|
1758
|
+
double_precision=double_precision,
|
1622
1759
|
orient=orient,
|
1623
1760
|
**kwargs
|
1624
1761
|
)
|
@@ -7,12 +7,13 @@ Utility functions for working with data types.
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
import traceback
|
10
|
+
import json
|
10
11
|
import uuid
|
11
12
|
from datetime import timezone, datetime
|
12
13
|
from decimal import Decimal, Context, InvalidOperation, ROUND_HALF_UP
|
13
14
|
|
14
15
|
import meerschaum as mrsm
|
15
|
-
from meerschaum.utils.typing import Dict, Union, Any, Optional
|
16
|
+
from meerschaum.utils.typing import Dict, Union, Any, Optional, Tuple
|
16
17
|
from meerschaum.utils.warnings import warn
|
17
18
|
|
18
19
|
MRSM_ALIAS_DTYPES: Dict[str, str] = {
|
@@ -27,10 +28,14 @@ MRSM_ALIAS_DTYPES: Dict[str, str] = {
|
|
27
28
|
'bytea': 'bytes',
|
28
29
|
'guid': 'uuid',
|
29
30
|
'UUID': 'uuid',
|
31
|
+
'geom': 'geometry',
|
32
|
+
'geog': 'geography',
|
30
33
|
}
|
31
34
|
MRSM_PD_DTYPES: Dict[Union[str, None], str] = {
|
32
35
|
'json': 'object',
|
33
36
|
'numeric': 'object',
|
37
|
+
'geometry': 'object',
|
38
|
+
'geography': 'object',
|
34
39
|
'uuid': 'object',
|
35
40
|
'datetime': 'datetime64[ns, UTC]',
|
36
41
|
'bool': 'bool[pyarrow]',
|
@@ -60,6 +65,12 @@ def to_pandas_dtype(dtype: str) -> str:
|
|
60
65
|
if dtype.startswith('numeric'):
|
61
66
|
return MRSM_PD_DTYPES['numeric']
|
62
67
|
|
68
|
+
if dtype.startswith('geometry'):
|
69
|
+
return MRSM_PD_DTYPES['geometry']
|
70
|
+
|
71
|
+
if dtype.startswith('geography'):
|
72
|
+
return MRSM_PD_DTYPES['geography']
|
73
|
+
|
63
74
|
### NOTE: Kind of a hack, but if the first word of the given dtype is in all caps,
|
64
75
|
### treat it as a SQL db type.
|
65
76
|
if dtype.split(' ')[0].isupper():
|
@@ -67,6 +78,7 @@ def to_pandas_dtype(dtype: str) -> str:
|
|
67
78
|
return get_pd_type_from_db_type(dtype)
|
68
79
|
|
69
80
|
from meerschaum.utils.packages import attempt_import
|
81
|
+
_ = attempt_import('pyarrow', lazy=False)
|
70
82
|
pandas = attempt_import('pandas', lazy=False)
|
71
83
|
|
72
84
|
try:
|
@@ -147,6 +159,10 @@ def are_dtypes_equal(
|
|
147
159
|
if ldtype in bytes_dtypes and rdtype in bytes_dtypes:
|
148
160
|
return True
|
149
161
|
|
162
|
+
geometry_dtypes = ('geometry', 'object', 'geography')
|
163
|
+
if ldtype in geometry_dtypes and rdtype in geometry_dtypes:
|
164
|
+
return True
|
165
|
+
|
150
166
|
if ldtype.lower() == rdtype.lower():
|
151
167
|
return True
|
152
168
|
|
@@ -277,6 +293,70 @@ def attempt_cast_to_bytes(value: Any) -> Any:
|
|
277
293
|
return value
|
278
294
|
|
279
295
|
|
296
|
+
def attempt_cast_to_geometry(value: Any) -> Any:
|
297
|
+
"""
|
298
|
+
Given a value, attempt to coerce it into a `shapely` (`geometry`) object.
|
299
|
+
"""
|
300
|
+
shapely, shapely_wkt, shapely_wkb = mrsm.attempt_import(
|
301
|
+
'shapely',
|
302
|
+
'shapely.wkt',
|
303
|
+
'shapely.wkb',
|
304
|
+
lazy=False,
|
305
|
+
)
|
306
|
+
if 'shapely' in str(type(value)):
|
307
|
+
return value
|
308
|
+
|
309
|
+
if isinstance(value, (dict, list)):
|
310
|
+
try:
|
311
|
+
return shapely.from_geojson(json.dumps(value))
|
312
|
+
except Exception as e:
|
313
|
+
return value
|
314
|
+
|
315
|
+
value_is_wkt = geometry_is_wkt(value)
|
316
|
+
if value_is_wkt is None:
|
317
|
+
return value
|
318
|
+
|
319
|
+
try:
|
320
|
+
return (
|
321
|
+
shapely_wkt.loads(value)
|
322
|
+
if value_is_wkt
|
323
|
+
else shapely_wkb.loads(value)
|
324
|
+
)
|
325
|
+
except Exception:
|
326
|
+
return value
|
327
|
+
|
328
|
+
|
329
|
+
def geometry_is_wkt(value: Union[str, bytes]) -> Union[bool, None]:
|
330
|
+
"""
|
331
|
+
Determine whether an input value should be treated as WKT or WKB geometry data.
|
332
|
+
|
333
|
+
Parameters
|
334
|
+
----------
|
335
|
+
value: Union[str, bytes]
|
336
|
+
The input data to be parsed into geometry data.
|
337
|
+
|
338
|
+
Returns
|
339
|
+
-------
|
340
|
+
A `bool` (`True` if `value` is WKT and `False` if it should be treated as WKB).
|
341
|
+
Return `None` if `value` should be parsed as neither.
|
342
|
+
"""
|
343
|
+
import re
|
344
|
+
if not isinstance(value, (str, bytes)):
|
345
|
+
return None
|
346
|
+
|
347
|
+
if isinstance(value, bytes):
|
348
|
+
return False
|
349
|
+
|
350
|
+
wkt_pattern = r'^\s*(POINT|LINESTRING|POLYGON|MULTIPOINT|MULTILINESTRING|MULTIPOLYGON|GEOMETRYCOLLECTION)\s*\(.*\)\s*$'
|
351
|
+
if re.match(wkt_pattern, value, re.IGNORECASE):
|
352
|
+
return True
|
353
|
+
|
354
|
+
if all(c in '0123456789ABCDEFabcdef' for c in value) and len(value) % 2 == 0:
|
355
|
+
return False
|
356
|
+
|
357
|
+
return None
|
358
|
+
|
359
|
+
|
280
360
|
def value_is_null(value: Any) -> bool:
|
281
361
|
"""
|
282
362
|
Determine if a value is a null-like string.
|
@@ -458,6 +538,47 @@ def serialize_bytes(data: bytes) -> str:
|
|
458
538
|
return base64.b64encode(data).decode('utf-8')
|
459
539
|
|
460
540
|
|
541
|
+
def serialize_geometry(
|
542
|
+
geom: Any,
|
543
|
+
geometry_format: str = 'wkb_hex',
|
544
|
+
as_wkt: bool = False,
|
545
|
+
) -> Union[str, Dict[str, Any]]:
|
546
|
+
"""
|
547
|
+
Serialize geometry data as a hex-encoded well-known-binary string.
|
548
|
+
|
549
|
+
Parameters
|
550
|
+
----------
|
551
|
+
geom: Any
|
552
|
+
The potential geometry data to be serialized.
|
553
|
+
|
554
|
+
geometry_format: str, default 'wkb_hex'
|
555
|
+
The serialization format for geometry data.
|
556
|
+
Accepted formats are `wkb_hex` (well-known binary hex string),
|
557
|
+
`wkt` (well-known text), and `geojson`.
|
558
|
+
|
559
|
+
Returns
|
560
|
+
-------
|
561
|
+
A string containing the geometry data.
|
562
|
+
"""
|
563
|
+
shapely = mrsm.attempt_import('shapely', lazy=False)
|
564
|
+
if geometry_format == 'geojson':
|
565
|
+
geojson_str = shapely.to_geojson(geom)
|
566
|
+
return json.loads(geojson_str)
|
567
|
+
|
568
|
+
if hasattr(geom, 'wkb_hex'):
|
569
|
+
return geom.wkb_hex if geometry_format == 'wkb_hex' else geom.wkt
|
570
|
+
|
571
|
+
return str(geom)
|
572
|
+
|
573
|
+
|
574
|
+
def deserialize_geometry(geom_wkb: Union[str, bytes]):
|
575
|
+
"""
|
576
|
+
Deserialize a WKB string into a shapely geometry object.
|
577
|
+
"""
|
578
|
+
shapely = mrsm.attempt_import(lazy=False)
|
579
|
+
return shapely.wkb.loads(geom_wkb)
|
580
|
+
|
581
|
+
|
461
582
|
def deserialize_bytes_string(data: str | None, force_hex: bool = False) -> bytes | None:
|
462
583
|
"""
|
463
584
|
Given a serialized ASCII string of bytes data, return the original bytes.
|
@@ -559,7 +680,96 @@ def json_serialize_value(x: Any, default_to_str: bool = True) -> str:
|
|
559
680
|
if isinstance(x, Decimal):
|
560
681
|
return serialize_decimal(x)
|
561
682
|
|
683
|
+
if 'shapely' in str(type(x)):
|
684
|
+
return serialize_geometry(x)
|
685
|
+
|
562
686
|
if value_is_null(x):
|
563
687
|
return None
|
564
688
|
|
565
689
|
return str(x) if default_to_str else x
|
690
|
+
|
691
|
+
|
692
|
+
def get_geometry_type_srid(
|
693
|
+
dtype: str = 'geometry',
|
694
|
+
default_type: str = 'geometry',
|
695
|
+
default_srid: int = 4326,
|
696
|
+
) -> Union[Tuple[str, int], Tuple[str, None]]:
|
697
|
+
"""
|
698
|
+
Given the specified geometry `dtype`, return a tuple in the form (type, SRID).
|
699
|
+
|
700
|
+
Parameters
|
701
|
+
----------
|
702
|
+
dtype: Optional[str], default None
|
703
|
+
Optionally provide a specific `geometry` syntax (e.g. `geometry[MultiLineString, 4326]`).
|
704
|
+
You may specify a supported `shapely` geometry type and an SRID in the dtype modifier:
|
705
|
+
|
706
|
+
- `Point`
|
707
|
+
- `LineString`
|
708
|
+
- `LinearRing`
|
709
|
+
- `Polygon`
|
710
|
+
- `MultiPoint`
|
711
|
+
- `MultiLineString`
|
712
|
+
- `MultiPolygon`
|
713
|
+
- `GeometryCollection`
|
714
|
+
|
715
|
+
Returns
|
716
|
+
-------
|
717
|
+
A tuple in the form (type, SRID).
|
718
|
+
Defaults to `(default_type, default_srid)`.
|
719
|
+
|
720
|
+
Examples
|
721
|
+
--------
|
722
|
+
>>> from meerschaum.utils.dtypes import get_geometry_type_srid
|
723
|
+
>>> get_geometry_type_srid()
|
724
|
+
('geometry', 4326)
|
725
|
+
>>> get_geometry_type_srid('geometry[]')
|
726
|
+
('geometry', 4326)
|
727
|
+
>>> get_geometry_type_srid('geometry[Point, 0]')
|
728
|
+
('Point', 0)
|
729
|
+
>>> get_geometry_type_srid('geometry[0, Point]')
|
730
|
+
('Point', 0)
|
731
|
+
>>> get_geometry_type_srid('geometry[0]')
|
732
|
+
('geometry', 0)
|
733
|
+
>>> get_geometry_type_srid('geometry[MULTILINESTRING, 4326]')
|
734
|
+
('MultiLineString', 4326)
|
735
|
+
>>> get_geometry_type_srid('geography')
|
736
|
+
('geometry', 4326)
|
737
|
+
>>> get_geometry_type_srid('geography[POINT]')
|
738
|
+
('Point', 4376)
|
739
|
+
"""
|
740
|
+
from meerschaum.utils.misc import is_int
|
741
|
+
### NOTE: PostGIS syntax must also be parsed.
|
742
|
+
dtype = dtype.replace('(', '[').replace(')', ']')
|
743
|
+
bare_dtype = dtype.split('[', maxsplit=1)[0]
|
744
|
+
modifier = dtype.split(bare_dtype, maxsplit=1)[-1].lstrip('[').rstrip(']')
|
745
|
+
if not modifier:
|
746
|
+
return default_type, default_srid
|
747
|
+
|
748
|
+
shapely_geometry_base = mrsm.attempt_import('shapely.geometry.base')
|
749
|
+
geometry_types = {
|
750
|
+
typ.lower(): typ
|
751
|
+
for typ in shapely_geometry_base.GEOMETRY_TYPES
|
752
|
+
}
|
753
|
+
|
754
|
+
parts = [part.lower().replace('srid=', '').replace('type=', '').strip() for part in modifier.split(',')]
|
755
|
+
parts_casted = [
|
756
|
+
(
|
757
|
+
int(part)
|
758
|
+
if is_int(part)
|
759
|
+
else part
|
760
|
+
) for part in parts]
|
761
|
+
|
762
|
+
srid = default_srid
|
763
|
+
geometry_type = default_type
|
764
|
+
|
765
|
+
for part in parts_casted:
|
766
|
+
if isinstance(part, int):
|
767
|
+
srid = part
|
768
|
+
break
|
769
|
+
|
770
|
+
for part in parts:
|
771
|
+
if part.lower() in geometry_types:
|
772
|
+
geometry_type = geometry_types.get(part)
|
773
|
+
break
|
774
|
+
|
775
|
+
return geometry_type, srid
|