meerschaum 2.9.5__py3-none-any.whl → 3.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/__init__.py +5 -2
- meerschaum/_internal/__init__.py +1 -0
- meerschaum/_internal/arguments/_parse_arguments.py +4 -4
- meerschaum/_internal/arguments/_parser.py +19 -2
- meerschaum/_internal/docs/index.py +49 -2
- meerschaum/_internal/entry.py +6 -6
- meerschaum/_internal/shell/Shell.py +1 -1
- meerschaum/_internal/static.py +356 -0
- meerschaum/actions/api.py +12 -2
- meerschaum/actions/bootstrap.py +7 -7
- meerschaum/actions/edit.py +142 -18
- meerschaum/actions/register.py +137 -6
- meerschaum/actions/show.py +117 -29
- meerschaum/actions/stop.py +4 -1
- meerschaum/actions/sync.py +1 -1
- meerschaum/actions/tag.py +9 -8
- meerschaum/actions/verify.py +5 -8
- meerschaum/api/__init__.py +11 -3
- meerschaum/api/_events.py +39 -2
- meerschaum/api/_oauth2.py +118 -8
- meerschaum/api/_tokens.py +102 -0
- meerschaum/api/dash/__init__.py +0 -3
- meerschaum/api/dash/callbacks/custom.py +2 -2
- meerschaum/api/dash/callbacks/dashboard.py +103 -19
- meerschaum/api/dash/callbacks/plugins.py +0 -1
- meerschaum/api/dash/callbacks/register.py +1 -1
- meerschaum/api/dash/callbacks/settings/__init__.py +1 -0
- meerschaum/api/dash/callbacks/settings/password_reset.py +2 -2
- meerschaum/api/dash/callbacks/settings/tokens.py +388 -0
- meerschaum/api/dash/components.py +30 -8
- meerschaum/api/dash/keys.py +19 -93
- meerschaum/api/dash/pages/dashboard.py +1 -20
- meerschaum/api/dash/pages/settings/__init__.py +1 -0
- meerschaum/api/dash/pages/settings/password_reset.py +1 -1
- meerschaum/api/dash/pages/settings/tokens.py +55 -0
- meerschaum/api/dash/pipes.py +94 -59
- meerschaum/api/dash/sessions.py +12 -0
- meerschaum/api/dash/tokens.py +606 -0
- meerschaum/api/dash/websockets.py +1 -1
- meerschaum/api/dash/webterm.py +4 -0
- meerschaum/api/models/__init__.py +23 -3
- meerschaum/api/models/_actions.py +22 -0
- meerschaum/api/models/_pipes.py +85 -7
- meerschaum/api/models/_tokens.py +81 -0
- meerschaum/api/resources/templates/termpage.html +12 -0
- meerschaum/api/routes/__init__.py +1 -0
- meerschaum/api/routes/_actions.py +3 -4
- meerschaum/api/routes/_connectors.py +3 -7
- meerschaum/api/routes/_jobs.py +14 -35
- meerschaum/api/routes/_login.py +49 -12
- meerschaum/api/routes/_misc.py +5 -10
- meerschaum/api/routes/_pipes.py +173 -140
- meerschaum/api/routes/_plugins.py +38 -28
- meerschaum/api/routes/_tokens.py +236 -0
- meerschaum/api/routes/_users.py +47 -35
- meerschaum/api/routes/_version.py +3 -3
- meerschaum/config/__init__.py +43 -20
- meerschaum/config/_default.py +43 -6
- meerschaum/config/_edit.py +28 -24
- meerschaum/config/_environment.py +1 -1
- meerschaum/config/_patch.py +6 -6
- meerschaum/config/_paths.py +5 -1
- meerschaum/config/_read_config.py +65 -34
- meerschaum/config/_sync.py +6 -3
- meerschaum/config/_version.py +1 -1
- meerschaum/config/stack/__init__.py +31 -11
- meerschaum/config/static.py +18 -0
- meerschaum/connectors/_Connector.py +10 -4
- meerschaum/connectors/__init__.py +4 -20
- meerschaum/connectors/api/_APIConnector.py +34 -6
- meerschaum/connectors/api/_actions.py +2 -2
- meerschaum/connectors/api/_jobs.py +1 -1
- meerschaum/connectors/api/_login.py +33 -7
- meerschaum/connectors/api/_misc.py +2 -2
- meerschaum/connectors/api/_pipes.py +16 -31
- meerschaum/connectors/api/_plugins.py +2 -2
- meerschaum/connectors/api/_request.py +1 -1
- meerschaum/connectors/api/_tokens.py +146 -0
- meerschaum/connectors/api/_users.py +70 -58
- meerschaum/connectors/instance/_InstanceConnector.py +83 -0
- meerschaum/connectors/instance/__init__.py +10 -0
- meerschaum/connectors/instance/_pipes.py +442 -0
- meerschaum/connectors/instance/_plugins.py +151 -0
- meerschaum/connectors/instance/_tokens.py +296 -0
- meerschaum/connectors/instance/_users.py +181 -0
- meerschaum/connectors/parse.py +4 -1
- meerschaum/connectors/sql/_SQLConnector.py +8 -5
- meerschaum/connectors/sql/_cli.py +12 -11
- meerschaum/connectors/sql/_create_engine.py +9 -168
- meerschaum/connectors/sql/_fetch.py +2 -18
- meerschaum/connectors/sql/_pipes.py +156 -190
- meerschaum/connectors/sql/_plugins.py +29 -0
- meerschaum/connectors/sql/_sql.py +46 -21
- meerschaum/connectors/sql/_users.py +29 -2
- meerschaum/connectors/sql/tables/__init__.py +1 -1
- meerschaum/connectors/valkey/_ValkeyConnector.py +2 -4
- meerschaum/connectors/valkey/_pipes.py +53 -26
- meerschaum/connectors/valkey/_plugins.py +2 -26
- meerschaum/core/Pipe/__init__.py +59 -19
- meerschaum/core/Pipe/_attributes.py +412 -90
- meerschaum/core/Pipe/_bootstrap.py +54 -24
- meerschaum/core/Pipe/_data.py +96 -18
- meerschaum/core/Pipe/_dtypes.py +48 -18
- meerschaum/core/Pipe/_edit.py +14 -4
- meerschaum/core/Pipe/_fetch.py +1 -1
- meerschaum/core/Pipe/_show.py +5 -5
- meerschaum/core/Pipe/_sync.py +118 -193
- meerschaum/core/Pipe/_verify.py +4 -4
- meerschaum/{plugins → core/Plugin}/_Plugin.py +9 -11
- meerschaum/core/Plugin/__init__.py +1 -1
- meerschaum/core/Token/_Token.py +220 -0
- meerschaum/core/Token/__init__.py +12 -0
- meerschaum/core/User/_User.py +34 -8
- meerschaum/core/User/__init__.py +9 -1
- meerschaum/core/__init__.py +1 -0
- meerschaum/jobs/_Job.py +3 -2
- meerschaum/jobs/__init__.py +3 -2
- meerschaum/jobs/systemd.py +1 -1
- meerschaum/models/__init__.py +35 -0
- meerschaum/models/pipes.py +247 -0
- meerschaum/models/tokens.py +38 -0
- meerschaum/models/users.py +26 -0
- meerschaum/plugins/__init__.py +22 -7
- meerschaum/plugins/bootstrap.py +2 -1
- meerschaum/utils/_get_pipes.py +68 -27
- meerschaum/utils/daemon/Daemon.py +2 -1
- meerschaum/utils/daemon/__init__.py +30 -2
- meerschaum/utils/dataframe.py +473 -81
- meerschaum/utils/debug.py +15 -15
- meerschaum/utils/dtypes/__init__.py +473 -34
- meerschaum/utils/dtypes/sql.py +368 -28
- meerschaum/utils/formatting/__init__.py +1 -1
- meerschaum/utils/formatting/_pipes.py +5 -4
- meerschaum/utils/formatting/_shell.py +11 -9
- meerschaum/utils/misc.py +246 -148
- meerschaum/utils/packages/__init__.py +10 -27
- meerschaum/utils/packages/_packages.py +41 -34
- meerschaum/utils/pipes.py +181 -0
- meerschaum/utils/process.py +1 -1
- meerschaum/utils/prompt.py +3 -1
- meerschaum/utils/schedule.py +2 -1
- meerschaum/utils/sql.py +121 -44
- meerschaum/utils/typing.py +1 -4
- meerschaum/utils/venv/_Venv.py +2 -2
- meerschaum/utils/venv/__init__.py +5 -7
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/METADATA +92 -96
- meerschaum-3.0.0rc2.dist-info/RECORD +283 -0
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/WHEEL +1 -1
- meerschaum-3.0.0rc2.dist-info/licenses/NOTICE +2 -0
- meerschaum/api/models/_interfaces.py +0 -15
- meerschaum/api/models/_locations.py +0 -15
- meerschaum/api/models/_metrics.py +0 -15
- meerschaum/config/static/__init__.py +0 -186
- meerschaum-2.9.5.dist-info/RECORD +0 -263
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/licenses/LICENSE +0 -0
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/top_level.txt +0 -0
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/zip-safe +0 -0
@@ -25,7 +25,6 @@ def register_pipe(
|
|
25
25
|
Register a new pipe.
|
26
26
|
A pipe's attributes must be set before registering.
|
27
27
|
"""
|
28
|
-
from meerschaum.utils.debug import dprint
|
29
28
|
from meerschaum.utils.packages import attempt_import
|
30
29
|
from meerschaum.utils.sql import json_flavors
|
31
30
|
|
@@ -45,7 +44,7 @@ def register_pipe(
|
|
45
44
|
### (which shouldn't be able to be registered anyway but that's an issue for later).
|
46
45
|
parameters = None
|
47
46
|
try:
|
48
|
-
parameters = pipe.
|
47
|
+
parameters = pipe.get_parameters(apply_symlinks=False)
|
49
48
|
except Exception as e:
|
50
49
|
if debug:
|
51
50
|
dprint(str(e))
|
@@ -76,7 +75,7 @@ def register_pipe(
|
|
76
75
|
|
77
76
|
def edit_pipe(
|
78
77
|
self,
|
79
|
-
pipe
|
78
|
+
pipe: mrsm.Pipe,
|
80
79
|
patch: bool = False,
|
81
80
|
debug: bool = False,
|
82
81
|
**kw : Any
|
@@ -108,10 +107,10 @@ def edit_pipe(
|
|
108
107
|
original_parameters = Pipe(
|
109
108
|
pipe.connector_keys, pipe.metric_key, pipe.location_key,
|
110
109
|
mrsm_instance=pipe.instance_keys
|
111
|
-
).
|
110
|
+
).get_parameters(apply_symlinks=False)
|
112
111
|
parameters = apply_patch_to_config(
|
113
112
|
original_parameters,
|
114
|
-
pipe.parameters
|
113
|
+
pipe._attributes['parameters']
|
115
114
|
)
|
116
115
|
|
117
116
|
### ensure pipes table exists
|
@@ -170,11 +169,10 @@ def fetch_pipes_keys(
|
|
170
169
|
debug: bool, default False
|
171
170
|
Verbosity toggle.
|
172
171
|
"""
|
173
|
-
from meerschaum.utils.debug import dprint
|
174
172
|
from meerschaum.utils.packages import attempt_import
|
175
173
|
from meerschaum.utils.misc import separate_negation_values
|
176
174
|
from meerschaum.utils.sql import OMIT_NULLSFIRST_FLAVORS, table_exists
|
177
|
-
from meerschaum.
|
175
|
+
from meerschaum._internal.static import STATIC_CONFIG
|
178
176
|
import json
|
179
177
|
from copy import deepcopy
|
180
178
|
sqlalchemy, sqlalchemy_sql_functions = attempt_import(
|
@@ -338,7 +336,6 @@ def create_indices(
|
|
338
336
|
"""
|
339
337
|
Create a pipe's indices.
|
340
338
|
"""
|
341
|
-
from meerschaum.utils.debug import dprint
|
342
339
|
if debug:
|
343
340
|
dprint(f"Creating indices for {pipe}...")
|
344
341
|
|
@@ -392,7 +389,6 @@ def drop_indices(
|
|
392
389
|
"""
|
393
390
|
Drop a pipe's indices.
|
394
391
|
"""
|
395
|
-
from meerschaum.utils.debug import dprint
|
396
392
|
if debug:
|
397
393
|
dprint(f"Dropping indices for {pipe}...")
|
398
394
|
|
@@ -603,7 +599,10 @@ def get_create_index_queries(
|
|
603
599
|
### create datetime index
|
604
600
|
dt_query = None
|
605
601
|
if _datetime is not None:
|
606
|
-
if
|
602
|
+
if (
|
603
|
+
self.flavor in ('timescaledb', 'timescaledb-ha')
|
604
|
+
and pipe.parameters.get('hypertable', True)
|
605
|
+
):
|
607
606
|
_id_count = (
|
608
607
|
get_distinct_col_count(_id, f"SELECT {_id_name} FROM {_pipe_name}", self)
|
609
608
|
if (_id is not None and _create_space_partition) else None
|
@@ -719,7 +718,7 @@ def get_create_index_queries(
|
|
719
718
|
f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
|
720
719
|
)
|
721
720
|
])
|
722
|
-
elif self.flavor
|
721
|
+
elif self.flavor in ('timescaledb', 'timescaledb-ha'):
|
723
722
|
primary_queries.extend([
|
724
723
|
(
|
725
724
|
f"ALTER TABLE {_pipe_name}\n"
|
@@ -758,7 +757,7 @@ def get_create_index_queries(
|
|
758
757
|
|
759
758
|
### create id index
|
760
759
|
if _id_name is not None:
|
761
|
-
if self.flavor
|
760
|
+
if self.flavor in ('timescaledb', 'timescaledb-ha'):
|
762
761
|
### Already created indices via create_hypertable.
|
763
762
|
id_query = (
|
764
763
|
None if (_id is not None and _create_space_partition)
|
@@ -797,7 +796,7 @@ def get_create_index_queries(
|
|
797
796
|
|
798
797
|
cols_names_str = ", ".join(cols_names)
|
799
798
|
index_query_params_clause = f" ({cols_names_str})"
|
800
|
-
if self.flavor
|
799
|
+
if self.flavor in ('postgis', 'timescaledb-ha'):
|
801
800
|
for col in cols:
|
802
801
|
col_typ = existing_cols_pd_types.get(cols[0], 'object')
|
803
802
|
if col_typ != 'object' and are_dtypes_equal(col_typ, 'geometry'):
|
@@ -1005,6 +1004,8 @@ def get_pipe_data(
|
|
1005
1004
|
limit: Optional[int] = None,
|
1006
1005
|
begin_add_minutes: int = 0,
|
1007
1006
|
end_add_minutes: int = 0,
|
1007
|
+
chunksize: Optional[int] = -1,
|
1008
|
+
as_iterator: bool = False,
|
1008
1009
|
debug: bool = False,
|
1009
1010
|
**kw: Any
|
1010
1011
|
) -> Union[pd.DataFrame, None]:
|
@@ -1041,14 +1042,17 @@ def get_pipe_data(
|
|
1041
1042
|
If specified, limit the number of rows retrieved to this value.
|
1042
1043
|
|
1043
1044
|
begin_add_minutes: int, default 0
|
1044
|
-
The number of minutes to add to the `begin` datetime (i.e. `DATEADD
|
1045
|
+
The number of minutes to add to the `begin` datetime (i.e. `DATEADD`).
|
1045
1046
|
|
1046
1047
|
end_add_minutes: int, default 0
|
1047
|
-
The number of minutes to add to the `end` datetime (i.e. `DATEADD
|
1048
|
+
The number of minutes to add to the `end` datetime (i.e. `DATEADD`).
|
1048
1049
|
|
1049
1050
|
chunksize: Optional[int], default -1
|
1050
1051
|
The size of dataframe chunks to load into memory.
|
1051
1052
|
|
1053
|
+
as_iterator: bool, default False
|
1054
|
+
If `True`, return the chunks iterator directly.
|
1055
|
+
|
1052
1056
|
debug: bool, default False
|
1053
1057
|
Verbosity toggle.
|
1054
1058
|
|
@@ -1057,43 +1061,58 @@ def get_pipe_data(
|
|
1057
1061
|
A `pd.DataFrame` of the pipe's data.
|
1058
1062
|
|
1059
1063
|
"""
|
1060
|
-
import
|
1061
|
-
from meerschaum.utils.misc import parse_df_datetimes, to_pandas_dtype
|
1064
|
+
import functools
|
1062
1065
|
from meerschaum.utils.packages import import_pandas
|
1063
|
-
from meerschaum.utils.dtypes import
|
1064
|
-
attempt_cast_to_numeric,
|
1065
|
-
attempt_cast_to_uuid,
|
1066
|
-
attempt_cast_to_bytes,
|
1067
|
-
attempt_cast_to_geometry,
|
1068
|
-
are_dtypes_equal,
|
1069
|
-
)
|
1066
|
+
from meerschaum.utils.dtypes import to_pandas_dtype, are_dtypes_equal
|
1070
1067
|
from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
|
1071
1068
|
pd = import_pandas()
|
1072
1069
|
is_dask = 'dask' in pd.__name__
|
1073
1070
|
|
1074
1071
|
cols_types = pipe.get_columns_types(debug=debug) if pipe.enforce else {}
|
1072
|
+
pipe_dtypes = pipe.get_dtypes(infer=False, debug=debug) if pipe.enforce else {}
|
1073
|
+
|
1074
|
+
remote_pandas_types = {
|
1075
|
+
col: to_pandas_dtype(get_pd_type_from_db_type(typ))
|
1076
|
+
for col, typ in cols_types.items()
|
1077
|
+
}
|
1078
|
+
remote_dt_cols_types = {
|
1079
|
+
col: typ
|
1080
|
+
for col, typ in remote_pandas_types.items()
|
1081
|
+
if are_dtypes_equal(typ, 'datetime')
|
1082
|
+
}
|
1083
|
+
remote_dt_tz_aware_cols_types = {
|
1084
|
+
col: typ
|
1085
|
+
for col, typ in remote_dt_cols_types.items()
|
1086
|
+
if ',' in typ or typ == 'datetime'
|
1087
|
+
}
|
1088
|
+
remote_dt_tz_naive_cols_types = {
|
1089
|
+
col: typ
|
1090
|
+
for col, typ in remote_dt_cols_types.items()
|
1091
|
+
if col not in remote_dt_tz_aware_cols_types
|
1092
|
+
}
|
1093
|
+
|
1094
|
+
configured_pandas_types = {
|
1095
|
+
col: to_pandas_dtype(typ)
|
1096
|
+
for col, typ in pipe_dtypes.items()
|
1097
|
+
}
|
1098
|
+
configured_lower_precision_dt_cols_types = {
|
1099
|
+
col: typ
|
1100
|
+
for col, typ in pipe_dtypes.items()
|
1101
|
+
if (
|
1102
|
+
are_dtypes_equal('datetime', typ)
|
1103
|
+
and '[' in typ
|
1104
|
+
and 'ns' not in typ
|
1105
|
+
)
|
1106
|
+
|
1107
|
+
}
|
1108
|
+
|
1075
1109
|
dtypes = {
|
1076
|
-
**
|
1077
|
-
|
1078
|
-
|
1079
|
-
|
1080
|
-
**
|
1081
|
-
col: get_pd_type_from_db_type(typ)
|
1082
|
-
for col, typ in cols_types.items()
|
1083
|
-
}
|
1110
|
+
**remote_pandas_types,
|
1111
|
+
**configured_pandas_types,
|
1112
|
+
**remote_dt_tz_aware_cols_types,
|
1113
|
+
**remote_dt_tz_naive_cols_types,
|
1114
|
+
**configured_lower_precision_dt_cols_types
|
1084
1115
|
} if pipe.enforce else {}
|
1085
|
-
if dtypes:
|
1086
|
-
if self.flavor == 'sqlite':
|
1087
|
-
if not pipe.columns.get('datetime', None):
|
1088
|
-
_dt = pipe.guess_datetime()
|
1089
|
-
else:
|
1090
|
-
_dt = pipe.get_columns('datetime')
|
1091
|
-
|
1092
|
-
if _dt:
|
1093
|
-
dt_type = dtypes.get(_dt, 'object').lower()
|
1094
|
-
if 'datetime' not in dt_type:
|
1095
|
-
if 'int' not in dt_type:
|
1096
|
-
dtypes[_dt] = 'datetime64[ns, UTC]'
|
1097
1116
|
|
1098
1117
|
existing_cols = cols_types.keys()
|
1099
1118
|
select_columns = (
|
@@ -1110,13 +1129,20 @@ def get_pipe_data(
|
|
1110
1129
|
and col not in (omit_columns or [])
|
1111
1130
|
]
|
1112
1131
|
) if pipe.enforce else select_columns
|
1132
|
+
|
1113
1133
|
if select_columns:
|
1114
1134
|
dtypes = {col: typ for col, typ in dtypes.items() if col in select_columns}
|
1135
|
+
|
1115
1136
|
dtypes = {
|
1116
|
-
col:
|
1137
|
+
col: typ
|
1117
1138
|
for col, typ in dtypes.items()
|
1118
|
-
if col in select_columns and col not in (omit_columns or [])
|
1139
|
+
if col in (select_columns or [col]) and col not in (omit_columns or [])
|
1119
1140
|
} if pipe.enforce else {}
|
1141
|
+
|
1142
|
+
if debug:
|
1143
|
+
dprint(f"[{self}] `read()` dtypes:")
|
1144
|
+
mrsm.pprint(dtypes)
|
1145
|
+
|
1120
1146
|
query = self.get_pipe_data_query(
|
1121
1147
|
pipe,
|
1122
1148
|
select_columns=select_columns,
|
@@ -1132,91 +1158,25 @@ def get_pipe_data(
|
|
1132
1158
|
**kw
|
1133
1159
|
)
|
1134
1160
|
|
1161
|
+
read_kwargs = {}
|
1135
1162
|
if is_dask:
|
1136
1163
|
index_col = pipe.columns.get('datetime', None)
|
1137
|
-
|
1164
|
+
read_kwargs['index_col'] = index_col
|
1138
1165
|
|
1139
|
-
|
1140
|
-
col
|
1141
|
-
for col, typ in pipe.dtypes.items()
|
1142
|
-
if typ.startswith('numeric') and col in dtypes
|
1143
|
-
]
|
1144
|
-
uuid_columns = [
|
1145
|
-
col
|
1146
|
-
for col, typ in pipe.dtypes.items()
|
1147
|
-
if typ == 'uuid' and col in dtypes
|
1148
|
-
]
|
1149
|
-
bytes_columns = [
|
1150
|
-
col
|
1151
|
-
for col, typ in pipe.dtypes.items()
|
1152
|
-
if typ == 'bytes' and col in dtypes
|
1153
|
-
]
|
1154
|
-
geometry_columns = [
|
1155
|
-
col
|
1156
|
-
for col, typ in pipe.dtypes.items()
|
1157
|
-
if typ.startswith('geometry') and col in dtypes
|
1158
|
-
]
|
1159
|
-
|
1160
|
-
kw['coerce_float'] = kw.get('coerce_float', (len(numeric_columns) == 0))
|
1161
|
-
|
1162
|
-
df = self.read(
|
1166
|
+
chunks = self.read(
|
1163
1167
|
query,
|
1168
|
+
chunksize=chunksize,
|
1169
|
+
as_iterator=True,
|
1170
|
+
coerce_float=False,
|
1164
1171
|
dtype=dtypes,
|
1165
1172
|
debug=debug,
|
1166
|
-
**
|
1173
|
+
**read_kwargs
|
1167
1174
|
)
|
1168
|
-
for col in numeric_columns:
|
1169
|
-
if col not in df.columns:
|
1170
|
-
continue
|
1171
|
-
df[col] = df[col].apply(attempt_cast_to_numeric)
|
1172
1175
|
|
1173
|
-
|
1174
|
-
|
1175
|
-
continue
|
1176
|
-
df[col] = df[col].apply(attempt_cast_to_uuid)
|
1177
|
-
|
1178
|
-
for col in bytes_columns:
|
1179
|
-
if col not in df.columns:
|
1180
|
-
continue
|
1181
|
-
df[col] = df[col].apply(attempt_cast_to_bytes)
|
1176
|
+
if as_iterator:
|
1177
|
+
return chunks
|
1182
1178
|
|
1183
|
-
|
1184
|
-
if col not in df.columns:
|
1185
|
-
continue
|
1186
|
-
df[col] = df[col].apply(attempt_cast_to_geometry)
|
1187
|
-
|
1188
|
-
if self.flavor == 'sqlite':
|
1189
|
-
ignore_dt_cols = [
|
1190
|
-
col
|
1191
|
-
for col, dtype in pipe.dtypes.items()
|
1192
|
-
if not are_dtypes_equal(str(dtype), 'datetime')
|
1193
|
-
]
|
1194
|
-
### NOTE: We have to consume the iterator here to ensure that datetimes are parsed correctly
|
1195
|
-
df = (
|
1196
|
-
parse_df_datetimes(
|
1197
|
-
df,
|
1198
|
-
ignore_cols=ignore_dt_cols,
|
1199
|
-
chunksize=kw.get('chunksize', None),
|
1200
|
-
strip_timezone=(pipe.tzinfo is None),
|
1201
|
-
debug=debug,
|
1202
|
-
) if isinstance(df, pd.DataFrame) else (
|
1203
|
-
[
|
1204
|
-
parse_df_datetimes(
|
1205
|
-
c,
|
1206
|
-
ignore_cols=ignore_dt_cols,
|
1207
|
-
chunksize=kw.get('chunksize', None),
|
1208
|
-
strip_timezone=(pipe.tzinfo is None),
|
1209
|
-
debug=debug,
|
1210
|
-
)
|
1211
|
-
for c in df
|
1212
|
-
]
|
1213
|
-
)
|
1214
|
-
)
|
1215
|
-
for col, typ in dtypes.items():
|
1216
|
-
if typ != 'json':
|
1217
|
-
continue
|
1218
|
-
df[col] = df[col].apply(lambda x: json.loads(x) if x is not None else x)
|
1219
|
-
return df
|
1179
|
+
return pd.concat(chunks)
|
1220
1180
|
|
1221
1181
|
|
1222
1182
|
def get_pipe_data_query(
|
@@ -1419,7 +1379,7 @@ def get_pipe_data_query(
|
|
1419
1379
|
if k in existing_cols or skip_existing_cols_check
|
1420
1380
|
}
|
1421
1381
|
if valid_params:
|
1422
|
-
where += build_where(valid_params, self).replace(
|
1382
|
+
where += ' ' + build_where(valid_params, self).lstrip().replace(
|
1423
1383
|
'WHERE', (' AND' if is_dt_bound else " ")
|
1424
1384
|
)
|
1425
1385
|
|
@@ -1549,13 +1509,7 @@ def create_pipe_table_from_df(
|
|
1549
1509
|
"""
|
1550
1510
|
Create a pipe's table from its configured dtypes and an incoming dataframe.
|
1551
1511
|
"""
|
1552
|
-
from meerschaum.utils.dataframe import
|
1553
|
-
get_json_cols,
|
1554
|
-
get_numeric_cols,
|
1555
|
-
get_uuid_cols,
|
1556
|
-
get_datetime_cols,
|
1557
|
-
get_bytes_cols,
|
1558
|
-
)
|
1512
|
+
from meerschaum.utils.dataframe import get_special_cols
|
1559
1513
|
from meerschaum.utils.sql import (
|
1560
1514
|
get_create_table_queries,
|
1561
1515
|
sql_item_name,
|
@@ -1584,30 +1538,7 @@ def create_pipe_table_from_df(
|
|
1584
1538
|
for col_ix, col in pipe.columns.items()
|
1585
1539
|
if col and col_ix != 'primary'
|
1586
1540
|
},
|
1587
|
-
**
|
1588
|
-
col: 'uuid'
|
1589
|
-
for col in get_uuid_cols(df)
|
1590
|
-
},
|
1591
|
-
**{
|
1592
|
-
col: 'json'
|
1593
|
-
for col in get_json_cols(df)
|
1594
|
-
},
|
1595
|
-
**{
|
1596
|
-
col: 'numeric'
|
1597
|
-
for col in get_numeric_cols(df)
|
1598
|
-
},
|
1599
|
-
**{
|
1600
|
-
col: 'bytes'
|
1601
|
-
for col in get_bytes_cols(df)
|
1602
|
-
},
|
1603
|
-
**{
|
1604
|
-
col: 'datetime64[ns, UTC]'
|
1605
|
-
for col in get_datetime_cols(df, timezone_aware=True, timezone_naive=False)
|
1606
|
-
},
|
1607
|
-
**{
|
1608
|
-
col: 'datetime64[ns]'
|
1609
|
-
for col in get_datetime_cols(df, timezone_aware=False, timezone_naive=True)
|
1610
|
-
},
|
1541
|
+
**get_special_cols(df),
|
1611
1542
|
**pipe.dtypes
|
1612
1543
|
}
|
1613
1544
|
autoincrement = (
|
@@ -1648,8 +1579,8 @@ def sync_pipe(
|
|
1648
1579
|
self,
|
1649
1580
|
pipe: mrsm.Pipe,
|
1650
1581
|
df: Union[pd.DataFrame, str, Dict[Any, Any], None] = None,
|
1651
|
-
begin:
|
1652
|
-
end:
|
1582
|
+
begin: Union[datetime, int, None] = None,
|
1583
|
+
end: Union[datetime, int, None] = None,
|
1653
1584
|
chunksize: Optional[int] = -1,
|
1654
1585
|
check_existing: bool = True,
|
1655
1586
|
blocking: bool = True,
|
@@ -1669,11 +1600,11 @@ def sync_pipe(
|
|
1669
1600
|
An optional DataFrame or equivalent to sync into the pipe.
|
1670
1601
|
Defaults to `None`.
|
1671
1602
|
|
1672
|
-
begin:
|
1603
|
+
begin: Union[datetime, int, None], default None
|
1673
1604
|
Optionally specify the earliest datetime to search for data.
|
1674
1605
|
Defaults to `None`.
|
1675
1606
|
|
1676
|
-
end:
|
1607
|
+
end: Union[datetime, int, None], default None
|
1677
1608
|
Optionally specify the latest datetime to search for data.
|
1678
1609
|
Defaults to `None`.
|
1679
1610
|
|
@@ -1759,18 +1690,16 @@ def sync_pipe(
|
|
1759
1690
|
_ = pipe.__dict__.pop('_columns_types', None)
|
1760
1691
|
if not self.exec_queries(alter_cols_queries, debug=debug):
|
1761
1692
|
warn(f"Failed to alter columns for {pipe}.")
|
1762
|
-
else:
|
1763
|
-
_ = pipe.infer_dtypes(persist=True)
|
1764
1693
|
|
1765
1694
|
### NOTE: Oracle SQL < 23c (2023) and SQLite does not support booleans,
|
1766
1695
|
### so infer bools and persist them to `dtypes`.
|
1767
1696
|
if self.flavor in ('oracle', 'sqlite', 'mysql', 'mariadb'):
|
1768
|
-
pipe_dtypes = pipe.
|
1697
|
+
pipe_dtypes = pipe.get_dtypes(infer=False, debug=debug)
|
1769
1698
|
new_bool_cols = {
|
1770
1699
|
col: 'bool[pyarrow]'
|
1771
1700
|
for col, typ in df.dtypes.items()
|
1772
1701
|
if col not in pipe_dtypes
|
1773
|
-
|
1702
|
+
and are_dtypes_equal(str(typ), 'bool')
|
1774
1703
|
}
|
1775
1704
|
pipe_dtypes.update(new_bool_cols)
|
1776
1705
|
pipe.dtypes = pipe_dtypes
|
@@ -1833,10 +1762,12 @@ def sync_pipe(
|
|
1833
1762
|
)
|
1834
1763
|
)
|
1835
1764
|
if autoincrement and autoincrement not in pipe.parameters:
|
1836
|
-
|
1837
|
-
|
1838
|
-
|
1839
|
-
|
1765
|
+
update_success, update_msg = pipe.update_parameters(
|
1766
|
+
{'autoincrement': autoincrement},
|
1767
|
+
debug=debug,
|
1768
|
+
)
|
1769
|
+
if not update_success:
|
1770
|
+
return update_success, update_msg
|
1840
1771
|
|
1841
1772
|
def _check_pk(_df_to_clear):
|
1842
1773
|
if _df_to_clear is None:
|
@@ -1969,7 +1900,11 @@ def sync_pipe(
|
|
1969
1900
|
if col and col in existing_cols
|
1970
1901
|
] if not primary_key or self.flavor == 'oracle' else (
|
1971
1902
|
[dt_col, primary_key]
|
1972
|
-
if
|
1903
|
+
if (
|
1904
|
+
self.flavor in ('timescaledb', 'timescaledb-ha')
|
1905
|
+
and dt_col
|
1906
|
+
and dt_col in update_df.columns
|
1907
|
+
)
|
1973
1908
|
else [primary_key]
|
1974
1909
|
)
|
1975
1910
|
update_queries = get_update_queries(
|
@@ -2779,7 +2714,6 @@ def pipe_exists(
|
|
2779
2714
|
debug=debug,
|
2780
2715
|
)
|
2781
2716
|
if debug:
|
2782
|
-
from meerschaum.utils.debug import dprint
|
2783
2717
|
dprint(f"{pipe} " + ('exists.' if exists else 'does not exist.'))
|
2784
2718
|
return exists
|
2785
2719
|
|
@@ -2833,7 +2767,6 @@ def get_pipe_rowcount(
|
|
2833
2767
|
error(msg)
|
2834
2768
|
return None
|
2835
2769
|
|
2836
|
-
|
2837
2770
|
flavor = self.flavor if not remote else pipe.connector.flavor
|
2838
2771
|
conn = self if not remote else pipe.connector
|
2839
2772
|
_pipe_name = sql_item_name(pipe.target, flavor, self.get_pipe_schema(pipe))
|
@@ -3117,11 +3050,17 @@ def get_pipe_columns_types(
|
|
3117
3050
|
debug=debug,
|
3118
3051
|
)
|
3119
3052
|
|
3053
|
+
if debug:
|
3054
|
+
dprint(f"Fetching columns_types for {pipe} with via SQLAlchemy table.")
|
3055
|
+
|
3120
3056
|
table_columns = {}
|
3121
3057
|
try:
|
3122
3058
|
pipe_table = self.get_pipe_table(pipe, debug=debug)
|
3123
3059
|
if pipe_table is None:
|
3124
3060
|
return {}
|
3061
|
+
if debug:
|
3062
|
+
dprint(f"Found columns:")
|
3063
|
+
mrsm.pprint(dict(pipe_table.columns))
|
3125
3064
|
for col in pipe_table.columns:
|
3126
3065
|
table_columns[str(col.name)] = str(col.type)
|
3127
3066
|
except Exception as e:
|
@@ -3313,10 +3252,9 @@ def get_alter_columns_queries(
|
|
3313
3252
|
-------
|
3314
3253
|
A list of the `ALTER TABLE` SQL query or queries to be executed on the provided connector.
|
3315
3254
|
"""
|
3316
|
-
if not pipe.exists(debug=debug):
|
3255
|
+
if not pipe.exists(debug=debug) or pipe.static:
|
3317
3256
|
return []
|
3318
|
-
|
3319
|
-
return
|
3257
|
+
|
3320
3258
|
from meerschaum.utils.sql import (
|
3321
3259
|
sql_item_name,
|
3322
3260
|
get_table_cols_types,
|
@@ -3362,7 +3300,8 @@ def get_alter_columns_queries(
|
|
3362
3300
|
debug=debug,
|
3363
3301
|
).items()
|
3364
3302
|
}
|
3365
|
-
|
3303
|
+
pipe_dtypes = pipe.dtypes
|
3304
|
+
pipe_bool_cols = [col for col, typ in pipe_dtypes.items() if are_dtypes_equal(str(typ), 'bool')]
|
3366
3305
|
pd_db_df_aliases = {
|
3367
3306
|
'int': 'bool',
|
3368
3307
|
'float': 'bool',
|
@@ -3370,7 +3309,10 @@ def get_alter_columns_queries(
|
|
3370
3309
|
'guid': 'object',
|
3371
3310
|
}
|
3372
3311
|
if self.flavor == 'oracle':
|
3373
|
-
pd_db_df_aliases
|
3312
|
+
pd_db_df_aliases.update({
|
3313
|
+
'int': 'numeric',
|
3314
|
+
'date': 'datetime',
|
3315
|
+
})
|
3374
3316
|
|
3375
3317
|
altered_cols = {
|
3376
3318
|
col: (db_cols_types.get(col, 'object'), typ)
|
@@ -3379,6 +3321,10 @@ def get_alter_columns_queries(
|
|
3379
3321
|
and not are_dtypes_equal(db_cols_types.get(col, 'object'), 'string')
|
3380
3322
|
}
|
3381
3323
|
|
3324
|
+
if debug and altered_cols:
|
3325
|
+
dprint(f"Columns to be altered:")
|
3326
|
+
mrsm.pprint(altered_cols)
|
3327
|
+
|
3382
3328
|
### NOTE: Sometimes bools are coerced into ints or floats.
|
3383
3329
|
altered_cols_to_ignore = set()
|
3384
3330
|
for col, (db_typ, df_typ) in altered_cols.items():
|
@@ -3405,21 +3351,29 @@ def get_alter_columns_queries(
|
|
3405
3351
|
if db_is_bool_compatible and df_is_bool_compatible:
|
3406
3352
|
altered_cols_to_ignore.add(bool_col)
|
3407
3353
|
|
3354
|
+
if debug and altered_cols_to_ignore:
|
3355
|
+
dprint(f"Ignoring the following altered columns (false positives).")
|
3356
|
+
mrsm.pprint(altered_cols_to_ignore)
|
3357
|
+
|
3408
3358
|
for col in altered_cols_to_ignore:
|
3409
3359
|
_ = altered_cols.pop(col, None)
|
3360
|
+
|
3410
3361
|
if not altered_cols:
|
3411
3362
|
return []
|
3412
3363
|
|
3413
3364
|
if numeric_cols:
|
3414
|
-
pipe.
|
3415
|
-
|
3416
|
-
|
3417
|
-
|
3418
|
-
|
3419
|
-
|
3420
|
-
|
3365
|
+
explicit_pipe_dtypes = pipe.get_dtypes(infer=False, debug=debug)
|
3366
|
+
explicit_pipe_dtypes.update({col: 'numeric' for col in numeric_cols})
|
3367
|
+
pipe.dtypes = explicit_pipe_dtypes
|
3368
|
+
if not pipe.temporary:
|
3369
|
+
edit_success, edit_msg = pipe.edit(debug=debug)
|
3370
|
+
if not edit_success:
|
3371
|
+
warn(
|
3372
|
+
f"Failed to update dtypes for numeric columns {items_str(numeric_cols)}:\n"
|
3373
|
+
+ f"{edit_msg}"
|
3374
|
+
)
|
3421
3375
|
else:
|
3422
|
-
numeric_cols.extend([col for col, typ in
|
3376
|
+
numeric_cols.extend([col for col, typ in pipe_dtypes.items() if typ.startswith('numeric')])
|
3423
3377
|
|
3424
3378
|
numeric_type = get_db_type_from_pd_type('numeric', self.flavor, as_sqlalchemy=False)
|
3425
3379
|
text_type = get_db_type_from_pd_type('str', self.flavor, as_sqlalchemy=False)
|
@@ -3627,20 +3581,18 @@ def get_to_sql_dtype(
|
|
3627
3581
|
>>> get_to_sql_dtype(pipe, df)
|
3628
3582
|
{'a': <class 'sqlalchemy.sql.sqltypes.JSON'>}
|
3629
3583
|
"""
|
3630
|
-
from meerschaum.utils.dataframe import
|
3584
|
+
from meerschaum.utils.dataframe import get_special_cols
|
3631
3585
|
from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
|
3632
3586
|
df_dtypes = {
|
3633
3587
|
col: str(typ)
|
3634
3588
|
for col, typ in df.dtypes.items()
|
3635
3589
|
}
|
3636
|
-
|
3637
|
-
|
3638
|
-
|
3639
|
-
df_dtypes.update({col: 'json' for col in json_cols})
|
3640
|
-
df_dtypes.update({col: 'numeric' for col in numeric_cols})
|
3641
|
-
df_dtypes.update({col: 'uuid' for col in uuid_cols})
|
3590
|
+
special_cols = get_special_cols(df)
|
3591
|
+
df_dtypes.update(special_cols)
|
3592
|
+
|
3642
3593
|
if update_dtypes:
|
3643
3594
|
df_dtypes.update(pipe.dtypes)
|
3595
|
+
|
3644
3596
|
return {
|
3645
3597
|
col: get_db_type_from_pd_type(typ, self.flavor, as_sqlalchemy=True)
|
3646
3598
|
for col, typ in df_dtypes.items()
|
@@ -3881,13 +3833,15 @@ def get_pipe_schema(self, pipe: mrsm.Pipe) -> Union[str, None]:
|
|
3881
3833
|
-------
|
3882
3834
|
A schema string or `None` if nothing is configured.
|
3883
3835
|
"""
|
3836
|
+
if self.flavor == 'sqlite':
|
3837
|
+
return self.schema
|
3884
3838
|
return pipe.parameters.get('schema', self.schema)
|
3885
3839
|
|
3886
3840
|
|
3887
3841
|
@staticmethod
|
3888
3842
|
def get_temporary_target(
|
3889
3843
|
target: str,
|
3890
|
-
transact_id: Optional[str
|
3844
|
+
transact_id: Optional[str] = None,
|
3891
3845
|
label: Optional[str] = None,
|
3892
3846
|
separator: Optional[str] = None,
|
3893
3847
|
) -> str:
|
@@ -3909,3 +3863,15 @@ def get_temporary_target(
|
|
3909
3863
|
+ transact_id
|
3910
3864
|
+ ((separator + label) if label else '')
|
3911
3865
|
)
|
3866
|
+
|
3867
|
+
|
3868
|
+
def _enforce_pipe_dtypes_chunks_hook(
|
3869
|
+
pipe: mrsm.Pipe,
|
3870
|
+
chunk_df: 'pd.DataFrame',
|
3871
|
+
debug: bool = False,
|
3872
|
+
**kwargs
|
3873
|
+
) -> 'pd.DataFrame':
|
3874
|
+
"""
|
3875
|
+
Enforce a pipe's dtypes on each chunk.
|
3876
|
+
"""
|
3877
|
+
return pipe.enforce_dtypes(chunk_df, debug=debug)
|
@@ -13,6 +13,35 @@ import json
|
|
13
13
|
import meerschaum as mrsm
|
14
14
|
from meerschaum.utils.typing import Optional, Any, List, SuccessTuple, Dict
|
15
15
|
|
16
|
+
|
17
|
+
def get_plugins_pipe(self) -> mrsm.Pipe:
|
18
|
+
"""
|
19
|
+
Return the internal metadata plugins pipe.
|
20
|
+
"""
|
21
|
+
users_pipe = self.get_users_pipe()
|
22
|
+
user_id_dtype = users_pipe.dtypes.get('user_id', 'int')
|
23
|
+
return mrsm.Pipe(
|
24
|
+
'mrsm', 'plugins',
|
25
|
+
instance=self,
|
26
|
+
temporary=True,
|
27
|
+
static=True,
|
28
|
+
null_indices=False,
|
29
|
+
columns={
|
30
|
+
'primary': 'plugin_id',
|
31
|
+
'user_id': 'user_id',
|
32
|
+
},
|
33
|
+
dtypes={
|
34
|
+
'plugin_name': 'string',
|
35
|
+
'user_id': user_id_dtype,
|
36
|
+
'attributes': 'json',
|
37
|
+
'version': 'string',
|
38
|
+
},
|
39
|
+
indices={
|
40
|
+
'unique': 'plugin_name',
|
41
|
+
},
|
42
|
+
)
|
43
|
+
|
44
|
+
|
16
45
|
def register_plugin(
|
17
46
|
self,
|
18
47
|
plugin: 'mrsm.core.Plugin',
|