meerschaum 2.8.4__py3-none-any.whl → 2.9.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. meerschaum/api/_chunks.py +67 -0
  2. meerschaum/api/dash/callbacks/custom.py +23 -2
  3. meerschaum/api/dash/callbacks/dashboard.py +41 -3
  4. meerschaum/api/dash/components.py +27 -19
  5. meerschaum/api/dash/pages/dashboard.py +11 -9
  6. meerschaum/api/dash/pages/plugins.py +31 -27
  7. meerschaum/api/dash/webterm.py +6 -3
  8. meerschaum/api/resources/static/css/dash.css +1 -1
  9. meerschaum/api/resources/templates/termpage.html +4 -0
  10. meerschaum/api/routes/_pipes.py +191 -78
  11. meerschaum/config/_default.py +4 -0
  12. meerschaum/config/_version.py +1 -1
  13. meerschaum/connectors/api/_APIConnector.py +12 -1
  14. meerschaum/connectors/api/_pipes.py +27 -15
  15. meerschaum/connectors/api/_plugins.py +51 -45
  16. meerschaum/connectors/api/_request.py +1 -1
  17. meerschaum/connectors/parse.py +1 -2
  18. meerschaum/connectors/sql/_SQLConnector.py +3 -0
  19. meerschaum/connectors/sql/_cli.py +1 -0
  20. meerschaum/connectors/sql/_create_engine.py +51 -4
  21. meerschaum/connectors/sql/_pipes.py +13 -2
  22. meerschaum/connectors/sql/_sql.py +35 -4
  23. meerschaum/core/Pipe/_data.py +1 -2
  24. meerschaum/plugins/_Plugin.py +21 -5
  25. meerschaum/plugins/__init__.py +6 -4
  26. meerschaum/utils/dataframe.py +87 -2
  27. meerschaum/utils/dtypes/__init__.py +182 -1
  28. meerschaum/utils/dtypes/sql.py +114 -2
  29. meerschaum/utils/formatting/_shell.py +1 -4
  30. meerschaum/utils/packages/_packages.py +3 -0
  31. meerschaum/utils/sql.py +17 -5
  32. meerschaum/utils/venv/__init__.py +2 -0
  33. {meerschaum-2.8.4.dist-info → meerschaum-2.9.0rc1.dist-info}/METADATA +10 -1
  34. {meerschaum-2.8.4.dist-info → meerschaum-2.9.0rc1.dist-info}/RECORD +40 -39
  35. {meerschaum-2.8.4.dist-info → meerschaum-2.9.0rc1.dist-info}/WHEEL +1 -1
  36. {meerschaum-2.8.4.dist-info → meerschaum-2.9.0rc1.dist-info}/LICENSE +0 -0
  37. {meerschaum-2.8.4.dist-info → meerschaum-2.9.0rc1.dist-info}/NOTICE +0 -0
  38. {meerschaum-2.8.4.dist-info → meerschaum-2.9.0rc1.dist-info}/entry_points.txt +0 -0
  39. {meerschaum-2.8.4.dist-info → meerschaum-2.9.0rc1.dist-info}/top_level.txt +0 -0
  40. {meerschaum-2.8.4.dist-info → meerschaum-2.9.0rc1.dist-info}/zip-safe +0 -0
@@ -153,6 +153,7 @@ def filter_unseen_df(
153
153
  attempt_cast_to_numeric,
154
154
  attempt_cast_to_uuid,
155
155
  attempt_cast_to_bytes,
156
+ attempt_cast_to_geometry,
156
157
  coerce_timezone,
157
158
  serialize_decimal,
158
159
  )
@@ -350,6 +351,10 @@ def filter_unseen_df(
350
351
  new_bytes_cols = get_bytes_cols(new_df)
351
352
  bytes_cols = set(new_bytes_cols + old_bytes_cols)
352
353
 
354
+ old_geometry_cols = get_geometry_cols(old_df)
355
+ new_geometry_cols = get_geometry_cols(new_df)
356
+ geometry_cols = set(new_geometry_cols + old_geometry_cols)
357
+
353
358
  joined_df = merge(
354
359
  new_df.infer_objects(copy=False).fillna(NA),
355
360
  old_df.infer_objects(copy=False).fillna(NA),
@@ -400,6 +405,14 @@ def filter_unseen_df(
400
405
  except Exception:
401
406
  warn(f"Unable to parse bytes column '{bytes_col}':\n{traceback.format_exc()}")
402
407
 
408
+ for geometry_col in geometry_cols:
409
+ if geometry_col not in delta_df.columns:
410
+ continue
411
+ try:
412
+ delta_df[geometry_col] = delta_df[geometry_col].apply(attempt_cast_to_geometry)
413
+ except Exception:
414
+ warn(f"Unable to parse bytes column '{bytes_col}':\n{traceback.format_exc()}")
415
+
403
416
  return delta_df
404
417
 
405
418
 
@@ -858,6 +871,44 @@ def get_bytes_cols(df: 'pd.DataFrame') -> List[str]:
858
871
  ]
859
872
 
860
873
 
874
+ def get_geometry_cols(df: 'pd.DataFrame') -> List[str]:
875
+ """
876
+ Get the columns which contain shapely objects from a Pandas DataFrame.
877
+
878
+ Parameters
879
+ ----------
880
+ df: pd.DataFrame
881
+ The DataFrame which may contain bytes strings.
882
+
883
+ Returns
884
+ -------
885
+ A list of columns to treat as `geometry`.
886
+ """
887
+ if df is None:
888
+ return []
889
+
890
+ is_dask = 'dask' in df.__module__
891
+ if is_dask:
892
+ df = get_first_valid_dask_partition(df)
893
+
894
+ if len(df) == 0:
895
+ return []
896
+
897
+ cols_indices = {
898
+ col: df[col].first_valid_index()
899
+ for col in df.columns
900
+ }
901
+ return [
902
+ col
903
+ for col, ix in cols_indices.items()
904
+ if (
905
+ ix is not None
906
+ and
907
+ 'shapely' in str(type(df.loc[ix][col]))
908
+ )
909
+ ]
910
+
911
+
861
912
  def enforce_dtypes(
862
913
  df: 'pd.DataFrame',
863
914
  dtypes: Dict[str, str],
@@ -911,6 +962,7 @@ def enforce_dtypes(
911
962
  attempt_cast_to_numeric,
912
963
  attempt_cast_to_uuid,
913
964
  attempt_cast_to_bytes,
965
+ attempt_cast_to_geometry,
914
966
  coerce_timezone as _coerce_timezone,
915
967
  )
916
968
  from meerschaum.utils.dtypes.sql import get_numeric_precision_scale
@@ -937,6 +989,11 @@ def enforce_dtypes(
937
989
  for col, typ in dtypes.items()
938
990
  if typ.startswith('numeric')
939
991
  ]
992
+ geometry_cols = [
993
+ col
994
+ for col, typ in dtypes.items()
995
+ if typ.startswith('geometry') or typ.startswith('geography')
996
+ ]
940
997
  uuid_cols = [
941
998
  col
942
999
  for col, typ in dtypes.items()
@@ -1026,6 +1083,24 @@ def enforce_dtypes(
1026
1083
  if col in df.columns:
1027
1084
  df[col] = _coerce_timezone(df[col], strip_utc=strip_timezone)
1028
1085
 
1086
+ if geometry_cols:
1087
+ geopandas = mrsm.attempt_import('geopandas')
1088
+ if debug:
1089
+ dprint(f"Checking for geometry: {geometry_cols}")
1090
+ parsed_geom_cols = []
1091
+ for col in geometry_cols:
1092
+ try:
1093
+ df[col] = df[col].apply(attempt_cast_to_geometry)
1094
+ parsed_geom_cols.append(col)
1095
+ except Exception as e:
1096
+ if debug:
1097
+ dprint(f"Unable to parse column '{col}' as geometry:\n{e}")
1098
+
1099
+ if parsed_geom_cols:
1100
+ if debug:
1101
+ dprint(f"Converting to GeoDataFrame (geometry column: '{parsed_geom_cols[0]}')...")
1102
+ df = geopandas.GeoDataFrame(df, geometry=parsed_geom_cols[0])
1103
+
1029
1104
  df_dtypes = {c: str(t) for c, t in df.dtypes.items()}
1030
1105
  if are_dtypes_equal(df_dtypes, pipe_pandas_dtypes):
1031
1106
  if debug:
@@ -1602,13 +1677,19 @@ def to_json(
1602
1677
  -------
1603
1678
  A JSON string.
1604
1679
  """
1680
+ import warnings
1605
1681
  from meerschaum.utils.packages import import_pandas
1606
- from meerschaum.utils.dtypes import serialize_bytes, serialize_decimal
1682
+ from meerschaum.utils.dtypes import (
1683
+ serialize_bytes,
1684
+ serialize_decimal,
1685
+ serialize_geometry,
1686
+ )
1607
1687
  pd = import_pandas()
1608
1688
  uuid_cols = get_uuid_cols(df)
1609
1689
  bytes_cols = get_bytes_cols(df)
1610
1690
  numeric_cols = get_numeric_cols(df)
1611
- if safe_copy and bool(uuid_cols or bytes_cols):
1691
+ geometry_cols = get_geometry_cols(df)
1692
+ if safe_copy and bool(uuid_cols or bytes_cols or geometry_cols or numeric_cols):
1612
1693
  df = df.copy()
1613
1694
  for col in uuid_cols:
1614
1695
  df[col] = df[col].astype(str)
@@ -1616,6 +1697,10 @@ def to_json(
1616
1697
  df[col] = df[col].apply(serialize_bytes)
1617
1698
  for col in numeric_cols:
1618
1699
  df[col] = df[col].apply(serialize_decimal)
1700
+ with warnings.catch_warnings():
1701
+ warnings.simplefilter("ignore")
1702
+ for col in geometry_cols:
1703
+ df[col] = df[col].apply(serialize_geometry)
1619
1704
  return df.infer_objects(copy=False).fillna(pd.NA).to_json(
1620
1705
  date_format=date_format,
1621
1706
  date_unit=date_unit,
@@ -12,7 +12,7 @@ from datetime import timezone, datetime
12
12
  from decimal import Decimal, Context, InvalidOperation, ROUND_HALF_UP
13
13
 
14
14
  import meerschaum as mrsm
15
- from meerschaum.utils.typing import Dict, Union, Any, Optional
15
+ from meerschaum.utils.typing import Dict, Union, Any, Optional, Tuple
16
16
  from meerschaum.utils.warnings import warn
17
17
 
18
18
  MRSM_ALIAS_DTYPES: Dict[str, str] = {
@@ -27,10 +27,13 @@ MRSM_ALIAS_DTYPES: Dict[str, str] = {
27
27
  'bytea': 'bytes',
28
28
  'guid': 'uuid',
29
29
  'UUID': 'uuid',
30
+ 'geom': 'geometry',
30
31
  }
31
32
  MRSM_PD_DTYPES: Dict[Union[str, None], str] = {
32
33
  'json': 'object',
33
34
  'numeric': 'object',
35
+ 'geometry': 'object',
36
+ 'geography': 'object',
34
37
  'uuid': 'object',
35
38
  'datetime': 'datetime64[ns, UTC]',
36
39
  'bool': 'bool[pyarrow]',
@@ -60,6 +63,12 @@ def to_pandas_dtype(dtype: str) -> str:
60
63
  if dtype.startswith('numeric'):
61
64
  return MRSM_PD_DTYPES['numeric']
62
65
 
66
+ if dtype.startswith('geometry'):
67
+ return MRSM_PD_DTYPES['geometry']
68
+
69
+ if dtype.startswith('geography'):
70
+ return MRSM_PD_DTYPES['geography']
71
+
63
72
  ### NOTE: Kind of a hack, but if the first word of the given dtype is in all caps,
64
73
  ### treat it as a SQL db type.
65
74
  if dtype.split(' ')[0].isupper():
@@ -147,6 +156,10 @@ def are_dtypes_equal(
147
156
  if ldtype in bytes_dtypes and rdtype in bytes_dtypes:
148
157
  return True
149
158
 
159
+ geometry_dtypes = ('geometry', 'object', 'geography')
160
+ if ldtype in geometry_dtypes and rdtype in geometry_dtypes:
161
+ return True
162
+
150
163
  if ldtype.lower() == rdtype.lower():
151
164
  return True
152
165
 
@@ -277,6 +290,56 @@ def attempt_cast_to_bytes(value: Any) -> Any:
277
290
  return value
278
291
 
279
292
 
293
+ def attempt_cast_to_geometry(value: Any) -> Any:
294
+ """
295
+ Given a value, attempt to coerce it into a `shapely` (`geometry`) object.
296
+ """
297
+ shapely = mrsm.attempt_import('shapely', lazy=False)
298
+ if 'shapely' in str(type(value)):
299
+ return value
300
+
301
+ value_is_wkt = geometry_is_wkt(value)
302
+ if value_is_wkt is None:
303
+ return value
304
+
305
+ try:
306
+ return (
307
+ shapely.wkt.loads(value)
308
+ if value_is_wkt
309
+ else shapely.wkb.loads(value)
310
+ )
311
+ except Exception:
312
+ return value
313
+
314
+
315
+ def geometry_is_wkt(value: Union[str, bytes]) -> Union[bool, None]:
316
+ """
317
+ Determine whether an input value should be treated as WKT or WKB geometry data.
318
+
319
+ Parameters
320
+ ----------
321
+ value: Union[str, bytes]
322
+ The input data to be parsed into geometry data.
323
+
324
+ Returns
325
+ -------
326
+ A `bool` (`True` if `value` is WKT and `False` if it should be treated as WKB).
327
+ Return `None` if `value` should be parsed as neither.
328
+ """
329
+ import re
330
+ if isinstance(value, bytes):
331
+ return False
332
+
333
+ wkt_pattern = r'^\s*(POINT|LINESTRING|POLYGON|MULTIPOINT|MULTILINESTRING|MULTIPOLYGON|GEOMETRYCOLLECTION)\s*\(.*\)\s*$'
334
+ if re.match(wkt_pattern, value, re.IGNORECASE):
335
+ return True
336
+
337
+ if all(c in '0123456789ABCDEFabcdef' for c in value) and len(value) % 2 == 0:
338
+ return False
339
+
340
+ return None
341
+
342
+
280
343
  def value_is_null(value: Any) -> bool:
281
344
  """
282
345
  Determine if a value is a null-like string.
@@ -458,6 +521,37 @@ def serialize_bytes(data: bytes) -> str:
458
521
  return base64.b64encode(data).decode('utf-8')
459
522
 
460
523
 
524
+ def serialize_geometry(geom: Any, as_wkt: bool = False) -> str:
525
+ """
526
+ Serialize geometry data as a hex-encoded well-known-binary string.
527
+
528
+ Parameters
529
+ ----------
530
+ geom: Any
531
+ The potential geometry data to be serialized.
532
+
533
+ as_wkt, bool, default False
534
+ If `True`, serialize geometry data as well-known text (WKT)
535
+ instead of well-known binary (WKB).
536
+
537
+ Returns
538
+ -------
539
+ A string containing the geometry data.
540
+ """
541
+ if hasattr(geom, 'wkb_hex'):
542
+ return geom.wkb_hex if not as_wkt else geom.wkt
543
+
544
+ return str(geom)
545
+
546
+
547
+ def deserialize_geometry(geom_wkb: Union[str, bytes]):
548
+ """
549
+ Deserialize a WKB string into a shapely geometry object.
550
+ """
551
+ shapely = mrsm.attempt_import(lazy=False)
552
+ return shapely.wkb.loads(geom_wkb)
553
+
554
+
461
555
  def deserialize_bytes_string(data: str | None, force_hex: bool = False) -> bytes | None:
462
556
  """
463
557
  Given a serialized ASCII string of bytes data, return the original bytes.
@@ -559,7 +653,94 @@ def json_serialize_value(x: Any, default_to_str: bool = True) -> str:
559
653
  if isinstance(x, Decimal):
560
654
  return serialize_decimal(x)
561
655
 
656
+ if 'shapely' in str(type(x)):
657
+ return serialize_geometry(x)
658
+
562
659
  if value_is_null(x):
563
660
  return None
564
661
 
565
662
  return str(x) if default_to_str else x
663
+
664
+
665
+ def get_geometry_type_srid(
666
+ dtype: str = 'geometry',
667
+ default_type: str = 'geometry',
668
+ default_srid: int = 4326,
669
+ ) -> Union[Tuple[str, int], Tuple[str, None]]:
670
+ """
671
+ Given the specified geometry `dtype`, return a tuple in the form (type, SRID).
672
+
673
+ Parameters
674
+ ----------
675
+ dtype: Optional[str], default None
676
+ Optionally provide a specific `geometry` syntax (e.g. `geometry[MultiLineString, 4326]`).
677
+ You may specify a supported `shapely` geometry type and an SRID in the dtype modifier:
678
+
679
+ - `Point`
680
+ - `LineString`
681
+ - `LinearRing`
682
+ - `Polygon`
683
+ - `MultiPoint`
684
+ - `MultiLineString`
685
+ - `MultiPolygon`
686
+ - `GeometryCollection`
687
+
688
+ Returns
689
+ -------
690
+ A tuple in the form (type, SRID).
691
+ Defaults to `(default_type, default_srid)`.
692
+
693
+ Examples
694
+ --------
695
+ >>> from meerschaum.utils.dtypes import get_geometry_type_srid
696
+ >>> get_geometry_type_srid()
697
+ ('geometry', 4326)
698
+ >>> get_geometry_type_srid('geometry[]')
699
+ ('geometry', 4326)
700
+ >>> get_geometry_type_srid('geometry[Point, 0]')
701
+ ('Point', 0)
702
+ >>> get_geometry_type_srid('geometry[0, Point]')
703
+ ('Point', 0)
704
+ >>> get_geometry_type_srid('geometry[0]')
705
+ ('geometry', 0)
706
+ >>> get_geometry_type_srid('geometry[MULTILINESTRING, 4326]')
707
+ ('MultiLineString', 4326)
708
+ >>> get_geometry_type_srid('geography')
709
+ ('geometry', 4326)
710
+ >>> get_geometry_type_srid('geography[POINT]')
711
+ ('Point', 4376)
712
+ """
713
+ from meerschaum.utils.misc import is_int
714
+ bare_dtype = dtype.split('[', maxsplit=1)[0]
715
+ modifier = dtype.split(bare_dtype, maxsplit=1)[-1].lstrip('[').rstrip(']')
716
+ if not modifier:
717
+ return default_type, default_srid
718
+
719
+ shapely_geometry_base = mrsm.attempt_import('shapely.geometry.base')
720
+ geometry_types = {
721
+ typ.lower(): typ
722
+ for typ in shapely_geometry_base.GEOMETRY_TYPES
723
+ }
724
+
725
+ parts = [part.lower().replace('srid=', '').replace('type=', '').strip() for part in modifier.split(',')]
726
+ parts_casted = [
727
+ (
728
+ int(part)
729
+ if is_int(part)
730
+ else part
731
+ ) for part in parts]
732
+
733
+ srid = default_srid
734
+ geometry_type = default_type
735
+
736
+ for part in parts_casted:
737
+ if isinstance(part, int):
738
+ srid = part
739
+ break
740
+
741
+ for part in parts:
742
+ if part.lower() in geometry_types:
743
+ geometry_type = geometry_types.get(part)
744
+ break
745
+
746
+ return geometry_type, srid