meerschaum 2.8.4__py3-none-any.whl → 2.9.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/api/_chunks.py +67 -0
- meerschaum/api/dash/callbacks/custom.py +23 -2
- meerschaum/api/dash/callbacks/dashboard.py +41 -3
- meerschaum/api/dash/components.py +27 -19
- meerschaum/api/dash/pages/dashboard.py +11 -9
- meerschaum/api/dash/pages/plugins.py +31 -27
- meerschaum/api/dash/webterm.py +6 -3
- meerschaum/api/resources/static/css/dash.css +1 -1
- meerschaum/api/resources/templates/termpage.html +4 -0
- meerschaum/api/routes/_pipes.py +191 -78
- meerschaum/config/_default.py +4 -0
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/api/_APIConnector.py +12 -1
- meerschaum/connectors/api/_pipes.py +27 -15
- meerschaum/connectors/api/_plugins.py +51 -45
- meerschaum/connectors/api/_request.py +1 -1
- meerschaum/connectors/parse.py +1 -2
- meerschaum/connectors/sql/_SQLConnector.py +3 -0
- meerschaum/connectors/sql/_cli.py +1 -0
- meerschaum/connectors/sql/_create_engine.py +51 -4
- meerschaum/connectors/sql/_pipes.py +13 -2
- meerschaum/connectors/sql/_sql.py +35 -4
- meerschaum/core/Pipe/_data.py +1 -2
- meerschaum/plugins/_Plugin.py +21 -5
- meerschaum/plugins/__init__.py +6 -4
- meerschaum/utils/dataframe.py +87 -2
- meerschaum/utils/dtypes/__init__.py +182 -1
- meerschaum/utils/dtypes/sql.py +114 -2
- meerschaum/utils/formatting/_shell.py +1 -4
- meerschaum/utils/packages/_packages.py +3 -0
- meerschaum/utils/sql.py +17 -5
- meerschaum/utils/venv/__init__.py +2 -0
- {meerschaum-2.8.4.dist-info → meerschaum-2.9.0rc1.dist-info}/METADATA +10 -1
- {meerschaum-2.8.4.dist-info → meerschaum-2.9.0rc1.dist-info}/RECORD +40 -39
- {meerschaum-2.8.4.dist-info → meerschaum-2.9.0rc1.dist-info}/WHEEL +1 -1
- {meerschaum-2.8.4.dist-info → meerschaum-2.9.0rc1.dist-info}/LICENSE +0 -0
- {meerschaum-2.8.4.dist-info → meerschaum-2.9.0rc1.dist-info}/NOTICE +0 -0
- {meerschaum-2.8.4.dist-info → meerschaum-2.9.0rc1.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.8.4.dist-info → meerschaum-2.9.0rc1.dist-info}/top_level.txt +0 -0
- {meerschaum-2.8.4.dist-info → meerschaum-2.9.0rc1.dist-info}/zip-safe +0 -0
meerschaum/utils/dataframe.py
CHANGED
@@ -153,6 +153,7 @@ def filter_unseen_df(
|
|
153
153
|
attempt_cast_to_numeric,
|
154
154
|
attempt_cast_to_uuid,
|
155
155
|
attempt_cast_to_bytes,
|
156
|
+
attempt_cast_to_geometry,
|
156
157
|
coerce_timezone,
|
157
158
|
serialize_decimal,
|
158
159
|
)
|
@@ -350,6 +351,10 @@ def filter_unseen_df(
|
|
350
351
|
new_bytes_cols = get_bytes_cols(new_df)
|
351
352
|
bytes_cols = set(new_bytes_cols + old_bytes_cols)
|
352
353
|
|
354
|
+
old_geometry_cols = get_geometry_cols(old_df)
|
355
|
+
new_geometry_cols = get_geometry_cols(new_df)
|
356
|
+
geometry_cols = set(new_geometry_cols + old_geometry_cols)
|
357
|
+
|
353
358
|
joined_df = merge(
|
354
359
|
new_df.infer_objects(copy=False).fillna(NA),
|
355
360
|
old_df.infer_objects(copy=False).fillna(NA),
|
@@ -400,6 +405,14 @@ def filter_unseen_df(
|
|
400
405
|
except Exception:
|
401
406
|
warn(f"Unable to parse bytes column '{bytes_col}':\n{traceback.format_exc()}")
|
402
407
|
|
408
|
+
for geometry_col in geometry_cols:
|
409
|
+
if geometry_col not in delta_df.columns:
|
410
|
+
continue
|
411
|
+
try:
|
412
|
+
delta_df[geometry_col] = delta_df[geometry_col].apply(attempt_cast_to_geometry)
|
413
|
+
except Exception:
|
414
|
+
warn(f"Unable to parse bytes column '{bytes_col}':\n{traceback.format_exc()}")
|
415
|
+
|
403
416
|
return delta_df
|
404
417
|
|
405
418
|
|
@@ -858,6 +871,44 @@ def get_bytes_cols(df: 'pd.DataFrame') -> List[str]:
|
|
858
871
|
]
|
859
872
|
|
860
873
|
|
874
|
+
def get_geometry_cols(df: 'pd.DataFrame') -> List[str]:
|
875
|
+
"""
|
876
|
+
Get the columns which contain shapely objects from a Pandas DataFrame.
|
877
|
+
|
878
|
+
Parameters
|
879
|
+
----------
|
880
|
+
df: pd.DataFrame
|
881
|
+
The DataFrame which may contain bytes strings.
|
882
|
+
|
883
|
+
Returns
|
884
|
+
-------
|
885
|
+
A list of columns to treat as `geometry`.
|
886
|
+
"""
|
887
|
+
if df is None:
|
888
|
+
return []
|
889
|
+
|
890
|
+
is_dask = 'dask' in df.__module__
|
891
|
+
if is_dask:
|
892
|
+
df = get_first_valid_dask_partition(df)
|
893
|
+
|
894
|
+
if len(df) == 0:
|
895
|
+
return []
|
896
|
+
|
897
|
+
cols_indices = {
|
898
|
+
col: df[col].first_valid_index()
|
899
|
+
for col in df.columns
|
900
|
+
}
|
901
|
+
return [
|
902
|
+
col
|
903
|
+
for col, ix in cols_indices.items()
|
904
|
+
if (
|
905
|
+
ix is not None
|
906
|
+
and
|
907
|
+
'shapely' in str(type(df.loc[ix][col]))
|
908
|
+
)
|
909
|
+
]
|
910
|
+
|
911
|
+
|
861
912
|
def enforce_dtypes(
|
862
913
|
df: 'pd.DataFrame',
|
863
914
|
dtypes: Dict[str, str],
|
@@ -911,6 +962,7 @@ def enforce_dtypes(
|
|
911
962
|
attempt_cast_to_numeric,
|
912
963
|
attempt_cast_to_uuid,
|
913
964
|
attempt_cast_to_bytes,
|
965
|
+
attempt_cast_to_geometry,
|
914
966
|
coerce_timezone as _coerce_timezone,
|
915
967
|
)
|
916
968
|
from meerschaum.utils.dtypes.sql import get_numeric_precision_scale
|
@@ -937,6 +989,11 @@ def enforce_dtypes(
|
|
937
989
|
for col, typ in dtypes.items()
|
938
990
|
if typ.startswith('numeric')
|
939
991
|
]
|
992
|
+
geometry_cols = [
|
993
|
+
col
|
994
|
+
for col, typ in dtypes.items()
|
995
|
+
if typ.startswith('geometry') or typ.startswith('geography')
|
996
|
+
]
|
940
997
|
uuid_cols = [
|
941
998
|
col
|
942
999
|
for col, typ in dtypes.items()
|
@@ -1026,6 +1083,24 @@ def enforce_dtypes(
|
|
1026
1083
|
if col in df.columns:
|
1027
1084
|
df[col] = _coerce_timezone(df[col], strip_utc=strip_timezone)
|
1028
1085
|
|
1086
|
+
if geometry_cols:
|
1087
|
+
geopandas = mrsm.attempt_import('geopandas')
|
1088
|
+
if debug:
|
1089
|
+
dprint(f"Checking for geometry: {geometry_cols}")
|
1090
|
+
parsed_geom_cols = []
|
1091
|
+
for col in geometry_cols:
|
1092
|
+
try:
|
1093
|
+
df[col] = df[col].apply(attempt_cast_to_geometry)
|
1094
|
+
parsed_geom_cols.append(col)
|
1095
|
+
except Exception as e:
|
1096
|
+
if debug:
|
1097
|
+
dprint(f"Unable to parse column '{col}' as geometry:\n{e}")
|
1098
|
+
|
1099
|
+
if parsed_geom_cols:
|
1100
|
+
if debug:
|
1101
|
+
dprint(f"Converting to GeoDataFrame (geometry column: '{parsed_geom_cols[0]}')...")
|
1102
|
+
df = geopandas.GeoDataFrame(df, geometry=parsed_geom_cols[0])
|
1103
|
+
|
1029
1104
|
df_dtypes = {c: str(t) for c, t in df.dtypes.items()}
|
1030
1105
|
if are_dtypes_equal(df_dtypes, pipe_pandas_dtypes):
|
1031
1106
|
if debug:
|
@@ -1602,13 +1677,19 @@ def to_json(
|
|
1602
1677
|
-------
|
1603
1678
|
A JSON string.
|
1604
1679
|
"""
|
1680
|
+
import warnings
|
1605
1681
|
from meerschaum.utils.packages import import_pandas
|
1606
|
-
from meerschaum.utils.dtypes import
|
1682
|
+
from meerschaum.utils.dtypes import (
|
1683
|
+
serialize_bytes,
|
1684
|
+
serialize_decimal,
|
1685
|
+
serialize_geometry,
|
1686
|
+
)
|
1607
1687
|
pd = import_pandas()
|
1608
1688
|
uuid_cols = get_uuid_cols(df)
|
1609
1689
|
bytes_cols = get_bytes_cols(df)
|
1610
1690
|
numeric_cols = get_numeric_cols(df)
|
1611
|
-
|
1691
|
+
geometry_cols = get_geometry_cols(df)
|
1692
|
+
if safe_copy and bool(uuid_cols or bytes_cols or geometry_cols or numeric_cols):
|
1612
1693
|
df = df.copy()
|
1613
1694
|
for col in uuid_cols:
|
1614
1695
|
df[col] = df[col].astype(str)
|
@@ -1616,6 +1697,10 @@ def to_json(
|
|
1616
1697
|
df[col] = df[col].apply(serialize_bytes)
|
1617
1698
|
for col in numeric_cols:
|
1618
1699
|
df[col] = df[col].apply(serialize_decimal)
|
1700
|
+
with warnings.catch_warnings():
|
1701
|
+
warnings.simplefilter("ignore")
|
1702
|
+
for col in geometry_cols:
|
1703
|
+
df[col] = df[col].apply(serialize_geometry)
|
1619
1704
|
return df.infer_objects(copy=False).fillna(pd.NA).to_json(
|
1620
1705
|
date_format=date_format,
|
1621
1706
|
date_unit=date_unit,
|
@@ -12,7 +12,7 @@ from datetime import timezone, datetime
|
|
12
12
|
from decimal import Decimal, Context, InvalidOperation, ROUND_HALF_UP
|
13
13
|
|
14
14
|
import meerschaum as mrsm
|
15
|
-
from meerschaum.utils.typing import Dict, Union, Any, Optional
|
15
|
+
from meerschaum.utils.typing import Dict, Union, Any, Optional, Tuple
|
16
16
|
from meerschaum.utils.warnings import warn
|
17
17
|
|
18
18
|
MRSM_ALIAS_DTYPES: Dict[str, str] = {
|
@@ -27,10 +27,13 @@ MRSM_ALIAS_DTYPES: Dict[str, str] = {
|
|
27
27
|
'bytea': 'bytes',
|
28
28
|
'guid': 'uuid',
|
29
29
|
'UUID': 'uuid',
|
30
|
+
'geom': 'geometry',
|
30
31
|
}
|
31
32
|
MRSM_PD_DTYPES: Dict[Union[str, None], str] = {
|
32
33
|
'json': 'object',
|
33
34
|
'numeric': 'object',
|
35
|
+
'geometry': 'object',
|
36
|
+
'geography': 'object',
|
34
37
|
'uuid': 'object',
|
35
38
|
'datetime': 'datetime64[ns, UTC]',
|
36
39
|
'bool': 'bool[pyarrow]',
|
@@ -60,6 +63,12 @@ def to_pandas_dtype(dtype: str) -> str:
|
|
60
63
|
if dtype.startswith('numeric'):
|
61
64
|
return MRSM_PD_DTYPES['numeric']
|
62
65
|
|
66
|
+
if dtype.startswith('geometry'):
|
67
|
+
return MRSM_PD_DTYPES['geometry']
|
68
|
+
|
69
|
+
if dtype.startswith('geography'):
|
70
|
+
return MRSM_PD_DTYPES['geography']
|
71
|
+
|
63
72
|
### NOTE: Kind of a hack, but if the first word of the given dtype is in all caps,
|
64
73
|
### treat it as a SQL db type.
|
65
74
|
if dtype.split(' ')[0].isupper():
|
@@ -147,6 +156,10 @@ def are_dtypes_equal(
|
|
147
156
|
if ldtype in bytes_dtypes and rdtype in bytes_dtypes:
|
148
157
|
return True
|
149
158
|
|
159
|
+
geometry_dtypes = ('geometry', 'object', 'geography')
|
160
|
+
if ldtype in geometry_dtypes and rdtype in geometry_dtypes:
|
161
|
+
return True
|
162
|
+
|
150
163
|
if ldtype.lower() == rdtype.lower():
|
151
164
|
return True
|
152
165
|
|
@@ -277,6 +290,56 @@ def attempt_cast_to_bytes(value: Any) -> Any:
|
|
277
290
|
return value
|
278
291
|
|
279
292
|
|
293
|
+
def attempt_cast_to_geometry(value: Any) -> Any:
|
294
|
+
"""
|
295
|
+
Given a value, attempt to coerce it into a `shapely` (`geometry`) object.
|
296
|
+
"""
|
297
|
+
shapely = mrsm.attempt_import('shapely', lazy=False)
|
298
|
+
if 'shapely' in str(type(value)):
|
299
|
+
return value
|
300
|
+
|
301
|
+
value_is_wkt = geometry_is_wkt(value)
|
302
|
+
if value_is_wkt is None:
|
303
|
+
return value
|
304
|
+
|
305
|
+
try:
|
306
|
+
return (
|
307
|
+
shapely.wkt.loads(value)
|
308
|
+
if value_is_wkt
|
309
|
+
else shapely.wkb.loads(value)
|
310
|
+
)
|
311
|
+
except Exception:
|
312
|
+
return value
|
313
|
+
|
314
|
+
|
315
|
+
def geometry_is_wkt(value: Union[str, bytes]) -> Union[bool, None]:
|
316
|
+
"""
|
317
|
+
Determine whether an input value should be treated as WKT or WKB geometry data.
|
318
|
+
|
319
|
+
Parameters
|
320
|
+
----------
|
321
|
+
value: Union[str, bytes]
|
322
|
+
The input data to be parsed into geometry data.
|
323
|
+
|
324
|
+
Returns
|
325
|
+
-------
|
326
|
+
A `bool` (`True` if `value` is WKT and `False` if it should be treated as WKB).
|
327
|
+
Return `None` if `value` should be parsed as neither.
|
328
|
+
"""
|
329
|
+
import re
|
330
|
+
if isinstance(value, bytes):
|
331
|
+
return False
|
332
|
+
|
333
|
+
wkt_pattern = r'^\s*(POINT|LINESTRING|POLYGON|MULTIPOINT|MULTILINESTRING|MULTIPOLYGON|GEOMETRYCOLLECTION)\s*\(.*\)\s*$'
|
334
|
+
if re.match(wkt_pattern, value, re.IGNORECASE):
|
335
|
+
return True
|
336
|
+
|
337
|
+
if all(c in '0123456789ABCDEFabcdef' for c in value) and len(value) % 2 == 0:
|
338
|
+
return False
|
339
|
+
|
340
|
+
return None
|
341
|
+
|
342
|
+
|
280
343
|
def value_is_null(value: Any) -> bool:
|
281
344
|
"""
|
282
345
|
Determine if a value is a null-like string.
|
@@ -458,6 +521,37 @@ def serialize_bytes(data: bytes) -> str:
|
|
458
521
|
return base64.b64encode(data).decode('utf-8')
|
459
522
|
|
460
523
|
|
524
|
+
def serialize_geometry(geom: Any, as_wkt: bool = False) -> str:
|
525
|
+
"""
|
526
|
+
Serialize geometry data as a hex-encoded well-known-binary string.
|
527
|
+
|
528
|
+
Parameters
|
529
|
+
----------
|
530
|
+
geom: Any
|
531
|
+
The potential geometry data to be serialized.
|
532
|
+
|
533
|
+
as_wkt, bool, default False
|
534
|
+
If `True`, serialize geometry data as well-known text (WKT)
|
535
|
+
instead of well-known binary (WKB).
|
536
|
+
|
537
|
+
Returns
|
538
|
+
-------
|
539
|
+
A string containing the geometry data.
|
540
|
+
"""
|
541
|
+
if hasattr(geom, 'wkb_hex'):
|
542
|
+
return geom.wkb_hex if not as_wkt else geom.wkt
|
543
|
+
|
544
|
+
return str(geom)
|
545
|
+
|
546
|
+
|
547
|
+
def deserialize_geometry(geom_wkb: Union[str, bytes]):
|
548
|
+
"""
|
549
|
+
Deserialize a WKB string into a shapely geometry object.
|
550
|
+
"""
|
551
|
+
shapely = mrsm.attempt_import(lazy=False)
|
552
|
+
return shapely.wkb.loads(geom_wkb)
|
553
|
+
|
554
|
+
|
461
555
|
def deserialize_bytes_string(data: str | None, force_hex: bool = False) -> bytes | None:
|
462
556
|
"""
|
463
557
|
Given a serialized ASCII string of bytes data, return the original bytes.
|
@@ -559,7 +653,94 @@ def json_serialize_value(x: Any, default_to_str: bool = True) -> str:
|
|
559
653
|
if isinstance(x, Decimal):
|
560
654
|
return serialize_decimal(x)
|
561
655
|
|
656
|
+
if 'shapely' in str(type(x)):
|
657
|
+
return serialize_geometry(x)
|
658
|
+
|
562
659
|
if value_is_null(x):
|
563
660
|
return None
|
564
661
|
|
565
662
|
return str(x) if default_to_str else x
|
663
|
+
|
664
|
+
|
665
|
+
def get_geometry_type_srid(
|
666
|
+
dtype: str = 'geometry',
|
667
|
+
default_type: str = 'geometry',
|
668
|
+
default_srid: int = 4326,
|
669
|
+
) -> Union[Tuple[str, int], Tuple[str, None]]:
|
670
|
+
"""
|
671
|
+
Given the specified geometry `dtype`, return a tuple in the form (type, SRID).
|
672
|
+
|
673
|
+
Parameters
|
674
|
+
----------
|
675
|
+
dtype: Optional[str], default None
|
676
|
+
Optionally provide a specific `geometry` syntax (e.g. `geometry[MultiLineString, 4326]`).
|
677
|
+
You may specify a supported `shapely` geometry type and an SRID in the dtype modifier:
|
678
|
+
|
679
|
+
- `Point`
|
680
|
+
- `LineString`
|
681
|
+
- `LinearRing`
|
682
|
+
- `Polygon`
|
683
|
+
- `MultiPoint`
|
684
|
+
- `MultiLineString`
|
685
|
+
- `MultiPolygon`
|
686
|
+
- `GeometryCollection`
|
687
|
+
|
688
|
+
Returns
|
689
|
+
-------
|
690
|
+
A tuple in the form (type, SRID).
|
691
|
+
Defaults to `(default_type, default_srid)`.
|
692
|
+
|
693
|
+
Examples
|
694
|
+
--------
|
695
|
+
>>> from meerschaum.utils.dtypes import get_geometry_type_srid
|
696
|
+
>>> get_geometry_type_srid()
|
697
|
+
('geometry', 4326)
|
698
|
+
>>> get_geometry_type_srid('geometry[]')
|
699
|
+
('geometry', 4326)
|
700
|
+
>>> get_geometry_type_srid('geometry[Point, 0]')
|
701
|
+
('Point', 0)
|
702
|
+
>>> get_geometry_type_srid('geometry[0, Point]')
|
703
|
+
('Point', 0)
|
704
|
+
>>> get_geometry_type_srid('geometry[0]')
|
705
|
+
('geometry', 0)
|
706
|
+
>>> get_geometry_type_srid('geometry[MULTILINESTRING, 4326]')
|
707
|
+
('MultiLineString', 4326)
|
708
|
+
>>> get_geometry_type_srid('geography')
|
709
|
+
('geometry', 4326)
|
710
|
+
>>> get_geometry_type_srid('geography[POINT]')
|
711
|
+
('Point', 4376)
|
712
|
+
"""
|
713
|
+
from meerschaum.utils.misc import is_int
|
714
|
+
bare_dtype = dtype.split('[', maxsplit=1)[0]
|
715
|
+
modifier = dtype.split(bare_dtype, maxsplit=1)[-1].lstrip('[').rstrip(']')
|
716
|
+
if not modifier:
|
717
|
+
return default_type, default_srid
|
718
|
+
|
719
|
+
shapely_geometry_base = mrsm.attempt_import('shapely.geometry.base')
|
720
|
+
geometry_types = {
|
721
|
+
typ.lower(): typ
|
722
|
+
for typ in shapely_geometry_base.GEOMETRY_TYPES
|
723
|
+
}
|
724
|
+
|
725
|
+
parts = [part.lower().replace('srid=', '').replace('type=', '').strip() for part in modifier.split(',')]
|
726
|
+
parts_casted = [
|
727
|
+
(
|
728
|
+
int(part)
|
729
|
+
if is_int(part)
|
730
|
+
else part
|
731
|
+
) for part in parts]
|
732
|
+
|
733
|
+
srid = default_srid
|
734
|
+
geometry_type = default_type
|
735
|
+
|
736
|
+
for part in parts_casted:
|
737
|
+
if isinstance(part, int):
|
738
|
+
srid = part
|
739
|
+
break
|
740
|
+
|
741
|
+
for part in parts:
|
742
|
+
if part.lower() in geometry_types:
|
743
|
+
geometry_type = geometry_types.get(part)
|
744
|
+
break
|
745
|
+
|
746
|
+
return geometry_type, srid
|