meerschaum 2.8.4__py3-none-any.whl → 2.9.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/api/_chunks.py +67 -0
- meerschaum/api/dash/callbacks/__init__.py +5 -2
- meerschaum/api/dash/callbacks/custom.py +21 -8
- meerschaum/api/dash/callbacks/dashboard.py +26 -4
- meerschaum/api/dash/callbacks/settings/__init__.py +8 -0
- meerschaum/api/dash/callbacks/settings/password_reset.py +76 -0
- meerschaum/api/dash/components.py +136 -25
- meerschaum/api/dash/pages/__init__.py +1 -0
- meerschaum/api/dash/pages/dashboard.py +11 -9
- meerschaum/api/dash/pages/plugins.py +31 -27
- meerschaum/api/dash/pages/settings/__init__.py +8 -0
- meerschaum/api/dash/pages/settings/password_reset.py +63 -0
- meerschaum/api/dash/webterm.py +6 -3
- meerschaum/api/resources/static/css/dash.css +8 -1
- meerschaum/api/resources/templates/termpage.html +4 -0
- meerschaum/api/routes/_pipes.py +232 -79
- meerschaum/config/_default.py +4 -0
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/__init__.py +1 -0
- meerschaum/connectors/api/_APIConnector.py +12 -1
- meerschaum/connectors/api/_pipes.py +106 -45
- meerschaum/connectors/api/_plugins.py +51 -45
- meerschaum/connectors/api/_request.py +1 -1
- meerschaum/connectors/parse.py +1 -2
- meerschaum/connectors/sql/_SQLConnector.py +3 -0
- meerschaum/connectors/sql/_cli.py +1 -0
- meerschaum/connectors/sql/_create_engine.py +51 -4
- meerschaum/connectors/sql/_pipes.py +38 -6
- meerschaum/connectors/sql/_sql.py +35 -4
- meerschaum/connectors/valkey/_ValkeyConnector.py +2 -0
- meerschaum/connectors/valkey/_pipes.py +51 -39
- meerschaum/core/Pipe/__init__.py +1 -0
- meerschaum/core/Pipe/_data.py +1 -2
- meerschaum/core/Pipe/_sync.py +64 -4
- meerschaum/plugins/_Plugin.py +21 -5
- meerschaum/plugins/__init__.py +32 -8
- meerschaum/utils/dataframe.py +139 -2
- meerschaum/utils/dtypes/__init__.py +211 -1
- meerschaum/utils/dtypes/sql.py +296 -5
- meerschaum/utils/formatting/_shell.py +1 -4
- meerschaum/utils/misc.py +1 -1
- meerschaum/utils/packages/_packages.py +7 -1
- meerschaum/utils/sql.py +139 -11
- meerschaum/utils/venv/__init__.py +6 -1
- {meerschaum-2.8.4.dist-info → meerschaum-2.9.0.dist-info}/METADATA +17 -3
- {meerschaum-2.8.4.dist-info → meerschaum-2.9.0.dist-info}/RECORD +52 -52
- {meerschaum-2.8.4.dist-info → meerschaum-2.9.0.dist-info}/WHEEL +1 -1
- meerschaum/_internal/gui/__init__.py +0 -43
- meerschaum/_internal/gui/app/__init__.py +0 -50
- meerschaum/_internal/gui/app/_windows.py +0 -74
- meerschaum/_internal/gui/app/actions.py +0 -30
- meerschaum/_internal/gui/app/pipes.py +0 -47
- {meerschaum-2.8.4.dist-info → meerschaum-2.9.0.dist-info}/LICENSE +0 -0
- {meerschaum-2.8.4.dist-info → meerschaum-2.9.0.dist-info}/NOTICE +0 -0
- {meerschaum-2.8.4.dist-info → meerschaum-2.9.0.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.8.4.dist-info → meerschaum-2.9.0.dist-info}/top_level.txt +0 -0
- {meerschaum-2.8.4.dist-info → meerschaum-2.9.0.dist-info}/zip-safe +0 -0
@@ -731,7 +731,7 @@ def get_create_index_queries(
|
|
731
731
|
) + f"{primary_key_name})"
|
732
732
|
),
|
733
733
|
])
|
734
|
-
elif self.flavor in ('citus', 'postgresql', 'duckdb'):
|
734
|
+
elif self.flavor in ('citus', 'postgresql', 'duckdb', 'postgis'):
|
735
735
|
primary_queries.extend([
|
736
736
|
(
|
737
737
|
f"ALTER TABLE {_pipe_name}\n"
|
@@ -1052,6 +1052,7 @@ def get_pipe_data(
|
|
1052
1052
|
attempt_cast_to_numeric,
|
1053
1053
|
attempt_cast_to_uuid,
|
1054
1054
|
attempt_cast_to_bytes,
|
1055
|
+
attempt_cast_to_geometry,
|
1055
1056
|
are_dtypes_equal,
|
1056
1057
|
)
|
1057
1058
|
from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
|
@@ -1138,6 +1139,11 @@ def get_pipe_data(
|
|
1138
1139
|
for col, typ in pipe.dtypes.items()
|
1139
1140
|
if typ == 'bytes' and col in dtypes
|
1140
1141
|
]
|
1142
|
+
geometry_columns = [
|
1143
|
+
col
|
1144
|
+
for col, typ in pipe.dtypes.items()
|
1145
|
+
if typ.startswith('geometry') and col in dtypes
|
1146
|
+
]
|
1141
1147
|
|
1142
1148
|
kw['coerce_float'] = kw.get('coerce_float', (len(numeric_columns) == 0))
|
1143
1149
|
|
@@ -1162,6 +1168,11 @@ def get_pipe_data(
|
|
1162
1168
|
continue
|
1163
1169
|
df[col] = df[col].apply(attempt_cast_to_bytes)
|
1164
1170
|
|
1171
|
+
for col in geometry_columns:
|
1172
|
+
if col not in df.columns:
|
1173
|
+
continue
|
1174
|
+
df[col] = df[col].apply(attempt_cast_to_geometry)
|
1175
|
+
|
1165
1176
|
if self.flavor == 'sqlite':
|
1166
1177
|
ignore_dt_cols = [
|
1167
1178
|
col
|
@@ -1511,7 +1522,7 @@ def get_pipe_attributes(
|
|
1511
1522
|
if isinstance(parameters, str) and parameters[0] == '{':
|
1512
1523
|
parameters = json.loads(parameters)
|
1513
1524
|
attributes['parameters'] = parameters
|
1514
|
-
except Exception
|
1525
|
+
except Exception:
|
1515
1526
|
attributes['parameters'] = {}
|
1516
1527
|
|
1517
1528
|
return attributes
|
@@ -1533,7 +1544,11 @@ def create_pipe_table_from_df(
|
|
1533
1544
|
get_datetime_cols,
|
1534
1545
|
get_bytes_cols,
|
1535
1546
|
)
|
1536
|
-
from meerschaum.utils.sql import
|
1547
|
+
from meerschaum.utils.sql import (
|
1548
|
+
get_create_table_queries,
|
1549
|
+
sql_item_name,
|
1550
|
+
get_create_schema_if_not_exists_queries,
|
1551
|
+
)
|
1537
1552
|
from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
|
1538
1553
|
primary_key = pipe.columns.get('primary', None)
|
1539
1554
|
primary_key_typ = (
|
@@ -1590,15 +1605,21 @@ def create_pipe_table_from_df(
|
|
1590
1605
|
if autoincrement:
|
1591
1606
|
_ = new_dtypes.pop(primary_key, None)
|
1592
1607
|
|
1608
|
+
schema = self.get_pipe_schema(pipe)
|
1593
1609
|
create_table_queries = get_create_table_queries(
|
1594
1610
|
new_dtypes,
|
1595
1611
|
pipe.target,
|
1596
1612
|
self.flavor,
|
1597
|
-
schema=
|
1613
|
+
schema=schema,
|
1598
1614
|
primary_key=primary_key,
|
1599
1615
|
primary_key_db_type=primary_key_db_type,
|
1600
1616
|
datetime_column=dt_col,
|
1601
1617
|
)
|
1618
|
+
if schema:
|
1619
|
+
create_table_queries = (
|
1620
|
+
get_create_schema_if_not_exists_queries(schema, self.flavor)
|
1621
|
+
+ create_table_queries
|
1622
|
+
)
|
1602
1623
|
success = all(
|
1603
1624
|
self.exec_queries(create_table_queries, break_on_error=True, rollback=True, debug=debug)
|
1604
1625
|
)
|
@@ -2074,6 +2095,7 @@ def sync_pipe_inplace(
|
|
2074
2095
|
get_update_queries,
|
2075
2096
|
get_null_replacement,
|
2076
2097
|
get_create_table_queries,
|
2098
|
+
get_create_schema_if_not_exists_queries,
|
2077
2099
|
get_table_cols_types,
|
2078
2100
|
session_execute,
|
2079
2101
|
dateadd_str,
|
@@ -2153,18 +2175,28 @@ def sync_pipe_inplace(
|
|
2153
2175
|
warn(drop_stale_msg)
|
2154
2176
|
return drop_stale_success, drop_stale_msg
|
2155
2177
|
|
2156
|
-
sqlalchemy, sqlalchemy_orm = mrsm.attempt_import(
|
2178
|
+
sqlalchemy, sqlalchemy_orm = mrsm.attempt_import(
|
2179
|
+
'sqlalchemy',
|
2180
|
+
'sqlalchemy.orm',
|
2181
|
+
)
|
2157
2182
|
if not pipe.exists(debug=debug):
|
2183
|
+
schema = self.get_pipe_schema(pipe)
|
2158
2184
|
create_pipe_queries = get_create_table_queries(
|
2159
2185
|
metadef,
|
2160
2186
|
pipe.target,
|
2161
2187
|
self.flavor,
|
2162
|
-
schema=
|
2188
|
+
schema=schema,
|
2163
2189
|
primary_key=primary_key,
|
2164
2190
|
primary_key_db_type=primary_key_db_type,
|
2165
2191
|
autoincrement=autoincrement,
|
2166
2192
|
datetime_column=dt_col,
|
2167
2193
|
)
|
2194
|
+
if schema:
|
2195
|
+
create_pipe_queries = (
|
2196
|
+
get_create_schema_if_not_exists_queries(schema, self.flavor)
|
2197
|
+
+ create_pipe_queries
|
2198
|
+
)
|
2199
|
+
|
2168
2200
|
results = self.exec_queries(create_pipe_queries, debug=debug)
|
2169
2201
|
if not all(results):
|
2170
2202
|
_ = clean_up_temp_tables()
|
@@ -17,7 +17,7 @@ from meerschaum.utils.debug import dprint
|
|
17
17
|
from meerschaum.utils.warnings import warn
|
18
18
|
|
19
19
|
### database flavors that can use bulk insert
|
20
|
-
_bulk_flavors = {'postgresql', 'timescaledb', 'citus', 'mssql'}
|
20
|
+
_bulk_flavors = {'postgresql', 'postgis', 'timescaledb', 'citus', 'mssql'}
|
21
21
|
### flavors that do not support chunks
|
22
22
|
_disallow_chunks_flavors = ['duckdb']
|
23
23
|
_max_chunks_flavors = {'sqlite': 1000}
|
@@ -798,6 +798,7 @@ def to_sql(
|
|
798
798
|
get_numeric_cols,
|
799
799
|
get_uuid_cols,
|
800
800
|
get_bytes_cols,
|
801
|
+
get_geometry_cols,
|
801
802
|
)
|
802
803
|
from meerschaum.utils.dtypes import (
|
803
804
|
are_dtypes_equal,
|
@@ -805,7 +806,9 @@ def to_sql(
|
|
805
806
|
encode_bytes_for_bytea,
|
806
807
|
serialize_bytes,
|
807
808
|
serialize_decimal,
|
809
|
+
serialize_geometry,
|
808
810
|
json_serialize_value,
|
811
|
+
get_geometry_type_srid,
|
809
812
|
)
|
810
813
|
from meerschaum.utils.dtypes.sql import (
|
811
814
|
PD_TO_SQLALCHEMY_DTYPES_FLAVORS,
|
@@ -822,6 +825,7 @@ def to_sql(
|
|
822
825
|
|
823
826
|
bytes_cols = get_bytes_cols(df)
|
824
827
|
numeric_cols = get_numeric_cols(df)
|
828
|
+
geometry_cols = get_geometry_cols(df)
|
825
829
|
### NOTE: This excludes non-numeric serialized Decimals (e.g. SQLite).
|
826
830
|
numeric_cols_dtypes = {
|
827
831
|
col: typ
|
@@ -830,7 +834,6 @@ def to_sql(
|
|
830
834
|
col in df.columns
|
831
835
|
and 'numeric' in str(typ).lower()
|
832
836
|
)
|
833
|
-
|
834
837
|
}
|
835
838
|
numeric_cols.extend([col for col in numeric_cols_dtypes if col not in numeric_cols])
|
836
839
|
numeric_cols_precisions_scales = {
|
@@ -841,6 +844,22 @@ def to_sql(
|
|
841
844
|
)
|
842
845
|
for col, typ in numeric_cols_dtypes.items()
|
843
846
|
}
|
847
|
+
geometry_cols_dtypes = {
|
848
|
+
col: typ
|
849
|
+
for col, typ in kw.get('dtype', {}).items()
|
850
|
+
if (
|
851
|
+
col in df.columns
|
852
|
+
and 'geometry' in str(typ).lower() or 'geography' in str(typ).lower()
|
853
|
+
)
|
854
|
+
}
|
855
|
+
geometry_cols.extend([col for col in geometry_cols_dtypes if col not in geometry_cols])
|
856
|
+
geometry_cols_types_srids = {
|
857
|
+
col: (typ.geometry_type, typ.srid)
|
858
|
+
if hasattr(typ, 'srid')
|
859
|
+
else get_geometry_type_srid()
|
860
|
+
for col, typ in geometry_cols_dtypes.items()
|
861
|
+
}
|
862
|
+
|
844
863
|
cols_pd_types = {
|
845
864
|
col: get_pd_type_from_db_type(str(typ))
|
846
865
|
for col, typ in kw.get('dtype', {}).items()
|
@@ -856,8 +875,9 @@ def to_sql(
|
|
856
875
|
}
|
857
876
|
|
858
877
|
enable_bulk_insert = mrsm.get_config(
|
859
|
-
'system', 'connectors', 'sql', 'bulk_insert'
|
860
|
-
|
878
|
+
'system', 'connectors', 'sql', 'bulk_insert', self.flavor,
|
879
|
+
warn=False,
|
880
|
+
) or False
|
861
881
|
stats = {'target': name}
|
862
882
|
### resort to defaults if None
|
863
883
|
copied = False
|
@@ -901,6 +921,17 @@ def to_sql(
|
|
901
921
|
)
|
902
922
|
)
|
903
923
|
|
924
|
+
for col in geometry_cols:
|
925
|
+
geometry_type, srid = geometry_cols_types_srids.get(col, get_geometry_type_srid())
|
926
|
+
with warnings.catch_warnings():
|
927
|
+
warnings.simplefilter("ignore")
|
928
|
+
df[col] = df[col].apply(
|
929
|
+
functools.partial(
|
930
|
+
serialize_geometry,
|
931
|
+
as_wkt=(self.flavor == 'mssql')
|
932
|
+
)
|
933
|
+
)
|
934
|
+
|
904
935
|
stats['method'] = method.__name__ if hasattr(method, '__name__') else str(method)
|
905
936
|
|
906
937
|
default_chunksize = self._sys_config.get('chunksize', None)
|
@@ -10,8 +10,9 @@ from datetime import datetime, timezone
|
|
10
10
|
|
11
11
|
import meerschaum as mrsm
|
12
12
|
from meerschaum.utils.typing import SuccessTuple, Any, Union, Optional, Dict, List, Tuple
|
13
|
-
from meerschaum.utils.misc import
|
14
|
-
from meerschaum.utils.
|
13
|
+
from meerschaum.utils.misc import string_to_dict
|
14
|
+
from meerschaum.utils.dtypes import json_serialize_value
|
15
|
+
from meerschaum.utils.warnings import warn, dprint
|
15
16
|
from meerschaum.config.static import STATIC_CONFIG
|
16
17
|
|
17
18
|
PIPES_TABLE: str = 'mrsm_pipes'
|
@@ -46,25 +47,15 @@ def serialize_document(doc: Dict[str, Any]) -> str:
|
|
46
47
|
-------
|
47
48
|
A serialized string for the document.
|
48
49
|
"""
|
49
|
-
from meerschaum.utils.dtypes import serialize_bytes
|
50
50
|
return json.dumps(
|
51
51
|
doc,
|
52
|
-
default=
|
53
|
-
lambda x: (
|
54
|
-
json_serialize_datetime(x)
|
55
|
-
if hasattr(x, 'tzinfo')
|
56
|
-
else (
|
57
|
-
serialize_bytes(x)
|
58
|
-
if isinstance(x, bytes)
|
59
|
-
else str(x)
|
60
|
-
)
|
61
|
-
)
|
62
|
-
),
|
52
|
+
default=json_serialize_value,
|
63
53
|
separators=(',', ':'),
|
64
54
|
sort_keys=True,
|
65
55
|
)
|
66
56
|
|
67
57
|
|
58
|
+
@staticmethod
|
68
59
|
def get_document_key(
|
69
60
|
doc: Dict[str, Any],
|
70
61
|
indices: List[str],
|
@@ -91,25 +82,39 @@ def get_document_key(
|
|
91
82
|
from meerschaum.utils.dtypes import coerce_timezone
|
92
83
|
index_vals = {
|
93
84
|
key: (
|
94
|
-
str(val)
|
85
|
+
str(val).replace(':', COLON)
|
95
86
|
if not isinstance(val, datetime)
|
96
87
|
else str(int(coerce_timezone(val).replace(tzinfo=timezone.utc).timestamp()))
|
97
88
|
)
|
98
89
|
for key, val in doc.items()
|
99
|
-
if key in indices
|
100
|
-
}
|
101
|
-
indices_str = (
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
90
|
+
if ((key in indices) if indices else True)
|
91
|
+
}
|
92
|
+
indices_str = (
|
93
|
+
(
|
94
|
+
(
|
95
|
+
(
|
96
|
+
table_name
|
97
|
+
+ ':'
|
98
|
+
+ ('indices:' if True else '')
|
99
|
+
)
|
100
|
+
)
|
101
|
+
if table_name
|
102
|
+
else ''
|
103
|
+
) + ','.join(
|
104
|
+
sorted(
|
105
|
+
[
|
106
|
+
f'{key}{COLON}{val}'
|
107
|
+
for key, val in index_vals.items()
|
108
|
+
]
|
109
|
+
)
|
107
110
|
)
|
108
|
-
)
|
111
|
+
)
|
109
112
|
return indices_str
|
110
113
|
|
111
114
|
|
115
|
+
@classmethod
|
112
116
|
def get_table_quoted_doc_key(
|
117
|
+
cls,
|
113
118
|
table_name: str,
|
114
119
|
doc: Dict[str, Any],
|
115
120
|
indices: List[str],
|
@@ -120,7 +125,7 @@ def get_table_quoted_doc_key(
|
|
120
125
|
"""
|
121
126
|
return json.dumps(
|
122
127
|
{
|
123
|
-
get_document_key(doc, indices, table_name): serialize_document(doc),
|
128
|
+
cls.get_document_key(doc, indices, table_name): serialize_document(doc),
|
124
129
|
**(
|
125
130
|
{datetime_column: doc.get(datetime_column, 0)}
|
126
131
|
if datetime_column
|
@@ -129,7 +134,7 @@ def get_table_quoted_doc_key(
|
|
129
134
|
},
|
130
135
|
sort_keys=True,
|
131
136
|
separators=(',', ':'),
|
132
|
-
default=
|
137
|
+
default=json_serialize_value,
|
133
138
|
)
|
134
139
|
|
135
140
|
|
@@ -377,7 +382,7 @@ def delete_pipe(
|
|
377
382
|
doc = docs[0]
|
378
383
|
doc_str = json.dumps(
|
379
384
|
doc,
|
380
|
-
default=
|
385
|
+
default=json_serialize_value,
|
381
386
|
separators=(',', ':'),
|
382
387
|
sort_keys=True,
|
383
388
|
)
|
@@ -443,11 +448,16 @@ def get_pipe_data(
|
|
443
448
|
debug=debug,
|
444
449
|
)
|
445
450
|
]
|
451
|
+
print(f"{ix_docs=}")
|
446
452
|
try:
|
447
453
|
docs_strings = [
|
448
|
-
self.get(
|
449
|
-
|
450
|
-
|
454
|
+
self.get(
|
455
|
+
self.get_document_key(
|
456
|
+
doc,
|
457
|
+
indices,
|
458
|
+
table_name,
|
459
|
+
)
|
460
|
+
)
|
451
461
|
for doc in ix_docs
|
452
462
|
]
|
453
463
|
except Exception as e:
|
@@ -535,7 +545,7 @@ def sync_pipe(
|
|
535
545
|
def _serialize_indices_docs(_docs):
|
536
546
|
return [
|
537
547
|
{
|
538
|
-
'ix': get_document_key(doc, indices),
|
548
|
+
'ix': self.get_document_key(doc, indices),
|
539
549
|
**(
|
540
550
|
{
|
541
551
|
dt_col: doc.get(dt_col, 0)
|
@@ -594,7 +604,7 @@ def sync_pipe(
|
|
594
604
|
unseen_docs = unseen_df.to_dict(orient='records') if unseen_df is not None else []
|
595
605
|
unseen_indices_docs = _serialize_indices_docs(unseen_docs)
|
596
606
|
unseen_ix_vals = {
|
597
|
-
get_document_key(doc, indices, table_name): serialize_document(doc)
|
607
|
+
self.get_document_key(doc, indices, table_name): serialize_document(doc)
|
598
608
|
for doc in unseen_docs
|
599
609
|
}
|
600
610
|
for key, val in unseen_ix_vals.items():
|
@@ -615,7 +625,7 @@ def sync_pipe(
|
|
615
625
|
|
616
626
|
update_docs = update_df.to_dict(orient='records') if update_df is not None else []
|
617
627
|
update_ix_docs = {
|
618
|
-
get_document_key(doc, indices, table_name): doc
|
628
|
+
self.get_document_key(doc, indices, table_name): doc
|
619
629
|
for doc in update_docs
|
620
630
|
}
|
621
631
|
existing_docs_data = {
|
@@ -633,7 +643,7 @@ def sync_pipe(
|
|
633
643
|
if key not in existing_docs
|
634
644
|
}
|
635
645
|
new_ix_vals = {
|
636
|
-
get_document_key(doc, indices, table_name): serialize_document(doc)
|
646
|
+
self.get_document_key(doc, indices, table_name): serialize_document(doc)
|
637
647
|
for doc in new_update_docs.values()
|
638
648
|
}
|
639
649
|
for key, val in new_ix_vals.items():
|
@@ -743,8 +753,8 @@ def clear_pipe(
|
|
743
753
|
table_name = self.quote_table(pipe.target)
|
744
754
|
indices = [col for col in pipe.columns.values() if col]
|
745
755
|
for doc in docs:
|
746
|
-
set_doc_key = get_document_key(doc, indices)
|
747
|
-
table_doc_key = get_document_key(doc, indices, table_name)
|
756
|
+
set_doc_key = self.get_document_key(doc, indices)
|
757
|
+
table_doc_key = self.get_document_key(doc, indices, table_name)
|
748
758
|
try:
|
749
759
|
if dt_col:
|
750
760
|
self.client.zrem(table_name, set_doc_key)
|
@@ -826,13 +836,15 @@ def get_pipe_rowcount(
|
|
826
836
|
return 0
|
827
837
|
|
828
838
|
try:
|
829
|
-
if begin is None and end is None and params
|
839
|
+
if begin is None and end is None and not params:
|
830
840
|
return (
|
831
841
|
self.client.zcard(table_name)
|
832
842
|
if dt_col
|
833
|
-
else self.client.
|
843
|
+
else self.client.scard(table_name)
|
834
844
|
)
|
835
|
-
except Exception:
|
845
|
+
except Exception as e:
|
846
|
+
if debug:
|
847
|
+
dprint(f"Failed to get rowcount for {pipe}:\n{e}")
|
836
848
|
return None
|
837
849
|
|
838
850
|
df = pipe.get_data(begin=begin, end=end, params=params, debug=debug)
|
meerschaum/core/Pipe/__init__.py
CHANGED
meerschaum/core/Pipe/_data.py
CHANGED
@@ -88,9 +88,8 @@ def get_data(
|
|
88
88
|
limit: Optional[int], default None
|
89
89
|
If provided, cap the dataframe to this many rows.
|
90
90
|
|
91
|
-
fresh: bool, default
|
91
|
+
fresh: bool, default False
|
92
92
|
If `True`, skip local cache and directly query the instance connector.
|
93
|
-
Defaults to `True`.
|
94
93
|
|
95
94
|
debug: bool, default False
|
96
95
|
Verbosity toggle.
|
meerschaum/core/Pipe/_sync.py
CHANGED
@@ -158,6 +158,7 @@ def sync(
|
|
158
158
|
'error_callback': error_callback,
|
159
159
|
'sync_chunks': sync_chunks,
|
160
160
|
'chunksize': chunksize,
|
161
|
+
'safe_copy': True,
|
161
162
|
})
|
162
163
|
|
163
164
|
### NOTE: Invalidate `_exists` cache before and after syncing.
|
@@ -268,6 +269,7 @@ def sync(
|
|
268
269
|
**kw
|
269
270
|
)
|
270
271
|
)
|
272
|
+
kw['safe_copy'] = False
|
271
273
|
except Exception as e:
|
272
274
|
get_console().print_exception(
|
273
275
|
suppress=[
|
@@ -402,6 +404,7 @@ def sync(
|
|
402
404
|
self._persist_new_numeric_columns(df, debug=debug)
|
403
405
|
self._persist_new_uuid_columns(df, debug=debug)
|
404
406
|
self._persist_new_bytes_columns(df, debug=debug)
|
407
|
+
self._persist_new_geometry_columns(df, debug=debug)
|
405
408
|
|
406
409
|
if debug:
|
407
410
|
dprint(
|
@@ -1009,7 +1012,7 @@ def _persist_new_numeric_columns(self, df, debug: bool = False) -> SuccessTuple:
|
|
1009
1012
|
|
1010
1013
|
self._attributes_sync_time = None
|
1011
1014
|
dtypes = self.parameters.get('dtypes', {})
|
1012
|
-
dtypes.update({col: 'numeric' for col in
|
1015
|
+
dtypes.update({col: 'numeric' for col in new_numeric_cols})
|
1013
1016
|
self.parameters['dtypes'] = dtypes
|
1014
1017
|
if not self.temporary:
|
1015
1018
|
edit_success, edit_msg = self.edit(interactive=False, debug=debug)
|
@@ -1034,7 +1037,7 @@ def _persist_new_uuid_columns(self, df, debug: bool = False) -> SuccessTuple:
|
|
1034
1037
|
|
1035
1038
|
self._attributes_sync_time = None
|
1036
1039
|
dtypes = self.parameters.get('dtypes', {})
|
1037
|
-
dtypes.update({col: 'uuid' for col in
|
1040
|
+
dtypes.update({col: 'uuid' for col in new_uuid_cols})
|
1038
1041
|
self.parameters['dtypes'] = dtypes
|
1039
1042
|
if not self.temporary:
|
1040
1043
|
edit_success, edit_msg = self.edit(interactive=False, debug=debug)
|
@@ -1059,7 +1062,7 @@ def _persist_new_json_columns(self, df, debug: bool = False) -> SuccessTuple:
|
|
1059
1062
|
|
1060
1063
|
self._attributes_sync_time = None
|
1061
1064
|
dtypes = self.parameters.get('dtypes', {})
|
1062
|
-
dtypes.update({col: 'json' for col in
|
1065
|
+
dtypes.update({col: 'json' for col in new_json_cols})
|
1063
1066
|
self.parameters['dtypes'] = dtypes
|
1064
1067
|
|
1065
1068
|
if not self.temporary:
|
@@ -1085,7 +1088,64 @@ def _persist_new_bytes_columns(self, df, debug: bool = False) -> SuccessTuple:
|
|
1085
1088
|
|
1086
1089
|
self._attributes_sync_time = None
|
1087
1090
|
dtypes = self.parameters.get('dtypes', {})
|
1088
|
-
dtypes.update({col: 'bytes' for col in
|
1091
|
+
dtypes.update({col: 'bytes' for col in new_bytes_cols})
|
1092
|
+
self.parameters['dtypes'] = dtypes
|
1093
|
+
|
1094
|
+
if not self.temporary:
|
1095
|
+
edit_success, edit_msg = self.edit(interactive=False, debug=debug)
|
1096
|
+
if not edit_success:
|
1097
|
+
warn(f"Unable to update bytes dtypes for {self}:\n{edit_msg}")
|
1098
|
+
|
1099
|
+
return edit_success, edit_msg
|
1100
|
+
|
1101
|
+
return True, "Success"
|
1102
|
+
|
1103
|
+
|
1104
|
+
def _persist_new_geometry_columns(self, df, debug: bool = False) -> SuccessTuple:
|
1105
|
+
"""
|
1106
|
+
Check for new `geometry` columns and update the parameters.
|
1107
|
+
"""
|
1108
|
+
from meerschaum.utils.dataframe import get_geometry_cols
|
1109
|
+
geometry_cols_types_srids = get_geometry_cols(df, with_types_srids=True)
|
1110
|
+
existing_geometry_cols = [
|
1111
|
+
col
|
1112
|
+
for col, typ in self.dtypes.items()
|
1113
|
+
if typ.startswith('geometry') or typ.startswith('geography')
|
1114
|
+
]
|
1115
|
+
new_geometry_cols = [
|
1116
|
+
col
|
1117
|
+
for col in geometry_cols_types_srids
|
1118
|
+
if col not in existing_geometry_cols
|
1119
|
+
]
|
1120
|
+
if not new_geometry_cols:
|
1121
|
+
return True, "Success"
|
1122
|
+
|
1123
|
+
self._attributes_sync_time = None
|
1124
|
+
dtypes = self.parameters.get('dtypes', {})
|
1125
|
+
|
1126
|
+
new_cols_types = {}
|
1127
|
+
for col, (geometry_type, srid) in geometry_cols_types_srids.items():
|
1128
|
+
if col not in new_geometry_cols:
|
1129
|
+
continue
|
1130
|
+
|
1131
|
+
new_dtype = "geometry"
|
1132
|
+
modifier = ""
|
1133
|
+
if not srid and geometry_type.lower() == 'geometry':
|
1134
|
+
new_cols_types[col] = new_dtype
|
1135
|
+
continue
|
1136
|
+
|
1137
|
+
modifier = "["
|
1138
|
+
if geometry_type.lower() != 'geometry':
|
1139
|
+
modifier += f"{geometry_type}"
|
1140
|
+
|
1141
|
+
if srid:
|
1142
|
+
if modifier != '[':
|
1143
|
+
modifier += ", "
|
1144
|
+
modifier += f"{srid}"
|
1145
|
+
modifier += "]"
|
1146
|
+
new_cols_types[col] = f"{new_dtype}{modifier}"
|
1147
|
+
|
1148
|
+
dtypes.update(new_cols_types)
|
1089
1149
|
self.parameters['dtypes'] = dtypes
|
1090
1150
|
|
1091
1151
|
if not self.temporary:
|
meerschaum/plugins/_Plugin.py
CHANGED
@@ -450,7 +450,7 @@ class Plugin:
|
|
450
450
|
success, msg = False, (
|
451
451
|
f"Failed to run post-install setup for plugin '{self}'." + '\n' +
|
452
452
|
f"Check `setup()` in '{self.__file__}' for more information " +
|
453
|
-
|
453
|
+
"(no error message provided)."
|
454
454
|
)
|
455
455
|
else:
|
456
456
|
success, msg = True, success_msg
|
@@ -458,7 +458,7 @@ class Plugin:
|
|
458
458
|
success = True
|
459
459
|
msg = (
|
460
460
|
f"Post-install for plugin '{self}' returned None. " +
|
461
|
-
|
461
|
+
"Assuming plugin successfully installed."
|
462
462
|
)
|
463
463
|
warn(msg)
|
464
464
|
else:
|
@@ -469,7 +469,7 @@ class Plugin:
|
|
469
469
|
)
|
470
470
|
|
471
471
|
_ongoing_installations.remove(self.full_name)
|
472
|
-
|
472
|
+
_ = self.module
|
473
473
|
return success, msg
|
474
474
|
|
475
475
|
|
@@ -716,13 +716,14 @@ class Plugin:
|
|
716
716
|
return required
|
717
717
|
|
718
718
|
|
719
|
-
def get_required_plugins(self, debug: bool=False) -> List[
|
719
|
+
def get_required_plugins(self, debug: bool=False) -> List[mrsm.plugins.Plugin]:
|
720
720
|
"""
|
721
721
|
Return a list of required Plugin objects.
|
722
722
|
"""
|
723
723
|
from meerschaum.utils.warnings import warn
|
724
724
|
from meerschaum.config import get_config
|
725
725
|
from meerschaum.config.static import STATIC_CONFIG
|
726
|
+
from meerschaum.connectors.parse import is_valid_connector_keys
|
726
727
|
plugins = []
|
727
728
|
_deps = self.get_dependencies(debug=debug)
|
728
729
|
sep = STATIC_CONFIG['plugins']['repo_separator']
|
@@ -731,11 +732,13 @@ class Plugin:
|
|
731
732
|
if _d.startswith('plugin:') and len(_d) > len('plugin:')
|
732
733
|
]
|
733
734
|
default_repo_keys = get_config('meerschaum', 'default_repository')
|
735
|
+
skipped_repo_keys = set()
|
736
|
+
|
734
737
|
for _plugin_name in plugin_names:
|
735
738
|
if sep in _plugin_name:
|
736
739
|
try:
|
737
740
|
_plugin_name, _repo_keys = _plugin_name.split(sep)
|
738
|
-
except Exception
|
741
|
+
except Exception:
|
739
742
|
_repo_keys = default_repo_keys
|
740
743
|
warn(
|
741
744
|
f"Invalid repo keys for required plugin '{_plugin_name}'.\n "
|
@@ -744,7 +747,20 @@ class Plugin:
|
|
744
747
|
)
|
745
748
|
else:
|
746
749
|
_repo_keys = default_repo_keys
|
750
|
+
|
751
|
+
if _repo_keys in skipped_repo_keys:
|
752
|
+
continue
|
753
|
+
|
754
|
+
if not is_valid_connector_keys(_repo_keys):
|
755
|
+
warn(
|
756
|
+
f"Invalid connector '{_repo_keys}'.\n"
|
757
|
+
f" Skipping required plugins from repository '{_repo_keys}'",
|
758
|
+
stack=False,
|
759
|
+
)
|
760
|
+
continue
|
761
|
+
|
747
762
|
plugins.append(Plugin(_plugin_name, repo=_repo_keys))
|
763
|
+
|
748
764
|
return plugins
|
749
765
|
|
750
766
|
|