meerschaum 2.9.5__py3-none-any.whl → 3.0.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (158) hide show
  1. meerschaum/__init__.py +5 -2
  2. meerschaum/_internal/__init__.py +1 -0
  3. meerschaum/_internal/arguments/_parse_arguments.py +4 -4
  4. meerschaum/_internal/arguments/_parser.py +19 -2
  5. meerschaum/_internal/docs/index.py +49 -2
  6. meerschaum/_internal/entry.py +6 -6
  7. meerschaum/_internal/shell/Shell.py +1 -1
  8. meerschaum/_internal/static.py +356 -0
  9. meerschaum/actions/api.py +12 -2
  10. meerschaum/actions/bootstrap.py +7 -7
  11. meerschaum/actions/edit.py +142 -18
  12. meerschaum/actions/register.py +137 -6
  13. meerschaum/actions/show.py +117 -29
  14. meerschaum/actions/stop.py +4 -1
  15. meerschaum/actions/sync.py +1 -1
  16. meerschaum/actions/tag.py +9 -8
  17. meerschaum/actions/verify.py +5 -8
  18. meerschaum/api/__init__.py +11 -3
  19. meerschaum/api/_events.py +39 -2
  20. meerschaum/api/_oauth2.py +118 -8
  21. meerschaum/api/_tokens.py +102 -0
  22. meerschaum/api/dash/__init__.py +0 -3
  23. meerschaum/api/dash/callbacks/custom.py +2 -2
  24. meerschaum/api/dash/callbacks/dashboard.py +103 -19
  25. meerschaum/api/dash/callbacks/plugins.py +0 -1
  26. meerschaum/api/dash/callbacks/register.py +1 -1
  27. meerschaum/api/dash/callbacks/settings/__init__.py +1 -0
  28. meerschaum/api/dash/callbacks/settings/password_reset.py +2 -2
  29. meerschaum/api/dash/callbacks/settings/tokens.py +388 -0
  30. meerschaum/api/dash/components.py +30 -8
  31. meerschaum/api/dash/keys.py +19 -93
  32. meerschaum/api/dash/pages/dashboard.py +1 -20
  33. meerschaum/api/dash/pages/settings/__init__.py +1 -0
  34. meerschaum/api/dash/pages/settings/password_reset.py +1 -1
  35. meerschaum/api/dash/pages/settings/tokens.py +55 -0
  36. meerschaum/api/dash/pipes.py +94 -59
  37. meerschaum/api/dash/sessions.py +12 -0
  38. meerschaum/api/dash/tokens.py +606 -0
  39. meerschaum/api/dash/websockets.py +1 -1
  40. meerschaum/api/dash/webterm.py +4 -0
  41. meerschaum/api/models/__init__.py +23 -3
  42. meerschaum/api/models/_actions.py +22 -0
  43. meerschaum/api/models/_pipes.py +85 -7
  44. meerschaum/api/models/_tokens.py +81 -0
  45. meerschaum/api/resources/templates/termpage.html +12 -0
  46. meerschaum/api/routes/__init__.py +1 -0
  47. meerschaum/api/routes/_actions.py +3 -4
  48. meerschaum/api/routes/_connectors.py +3 -7
  49. meerschaum/api/routes/_jobs.py +14 -35
  50. meerschaum/api/routes/_login.py +49 -12
  51. meerschaum/api/routes/_misc.py +5 -10
  52. meerschaum/api/routes/_pipes.py +173 -140
  53. meerschaum/api/routes/_plugins.py +38 -28
  54. meerschaum/api/routes/_tokens.py +236 -0
  55. meerschaum/api/routes/_users.py +47 -35
  56. meerschaum/api/routes/_version.py +3 -3
  57. meerschaum/config/__init__.py +43 -20
  58. meerschaum/config/_default.py +43 -6
  59. meerschaum/config/_edit.py +28 -24
  60. meerschaum/config/_environment.py +1 -1
  61. meerschaum/config/_patch.py +6 -6
  62. meerschaum/config/_paths.py +5 -1
  63. meerschaum/config/_read_config.py +65 -34
  64. meerschaum/config/_sync.py +6 -3
  65. meerschaum/config/_version.py +1 -1
  66. meerschaum/config/stack/__init__.py +31 -11
  67. meerschaum/config/static.py +18 -0
  68. meerschaum/connectors/_Connector.py +10 -4
  69. meerschaum/connectors/__init__.py +4 -20
  70. meerschaum/connectors/api/_APIConnector.py +34 -6
  71. meerschaum/connectors/api/_actions.py +2 -2
  72. meerschaum/connectors/api/_jobs.py +1 -1
  73. meerschaum/connectors/api/_login.py +33 -7
  74. meerschaum/connectors/api/_misc.py +2 -2
  75. meerschaum/connectors/api/_pipes.py +16 -31
  76. meerschaum/connectors/api/_plugins.py +2 -2
  77. meerschaum/connectors/api/_request.py +1 -1
  78. meerschaum/connectors/api/_tokens.py +146 -0
  79. meerschaum/connectors/api/_users.py +70 -58
  80. meerschaum/connectors/instance/_InstanceConnector.py +83 -0
  81. meerschaum/connectors/instance/__init__.py +10 -0
  82. meerschaum/connectors/instance/_pipes.py +442 -0
  83. meerschaum/connectors/instance/_plugins.py +151 -0
  84. meerschaum/connectors/instance/_tokens.py +296 -0
  85. meerschaum/connectors/instance/_users.py +181 -0
  86. meerschaum/connectors/parse.py +4 -1
  87. meerschaum/connectors/sql/_SQLConnector.py +8 -5
  88. meerschaum/connectors/sql/_cli.py +12 -11
  89. meerschaum/connectors/sql/_create_engine.py +9 -168
  90. meerschaum/connectors/sql/_fetch.py +2 -18
  91. meerschaum/connectors/sql/_pipes.py +156 -190
  92. meerschaum/connectors/sql/_plugins.py +29 -0
  93. meerschaum/connectors/sql/_sql.py +46 -21
  94. meerschaum/connectors/sql/_users.py +29 -2
  95. meerschaum/connectors/sql/tables/__init__.py +1 -1
  96. meerschaum/connectors/valkey/_ValkeyConnector.py +2 -4
  97. meerschaum/connectors/valkey/_pipes.py +53 -26
  98. meerschaum/connectors/valkey/_plugins.py +2 -26
  99. meerschaum/core/Pipe/__init__.py +59 -19
  100. meerschaum/core/Pipe/_attributes.py +412 -90
  101. meerschaum/core/Pipe/_bootstrap.py +54 -24
  102. meerschaum/core/Pipe/_data.py +96 -18
  103. meerschaum/core/Pipe/_dtypes.py +48 -18
  104. meerschaum/core/Pipe/_edit.py +14 -4
  105. meerschaum/core/Pipe/_fetch.py +1 -1
  106. meerschaum/core/Pipe/_show.py +5 -5
  107. meerschaum/core/Pipe/_sync.py +118 -193
  108. meerschaum/core/Pipe/_verify.py +4 -4
  109. meerschaum/{plugins → core/Plugin}/_Plugin.py +9 -11
  110. meerschaum/core/Plugin/__init__.py +1 -1
  111. meerschaum/core/Token/_Token.py +220 -0
  112. meerschaum/core/Token/__init__.py +12 -0
  113. meerschaum/core/User/_User.py +34 -8
  114. meerschaum/core/User/__init__.py +9 -1
  115. meerschaum/core/__init__.py +1 -0
  116. meerschaum/jobs/_Job.py +3 -2
  117. meerschaum/jobs/__init__.py +3 -2
  118. meerschaum/jobs/systemd.py +1 -1
  119. meerschaum/models/__init__.py +35 -0
  120. meerschaum/models/pipes.py +247 -0
  121. meerschaum/models/tokens.py +38 -0
  122. meerschaum/models/users.py +26 -0
  123. meerschaum/plugins/__init__.py +22 -7
  124. meerschaum/plugins/bootstrap.py +2 -1
  125. meerschaum/utils/_get_pipes.py +68 -27
  126. meerschaum/utils/daemon/Daemon.py +2 -1
  127. meerschaum/utils/daemon/__init__.py +30 -2
  128. meerschaum/utils/dataframe.py +473 -81
  129. meerschaum/utils/debug.py +15 -15
  130. meerschaum/utils/dtypes/__init__.py +473 -34
  131. meerschaum/utils/dtypes/sql.py +368 -28
  132. meerschaum/utils/formatting/__init__.py +1 -1
  133. meerschaum/utils/formatting/_pipes.py +5 -4
  134. meerschaum/utils/formatting/_shell.py +11 -9
  135. meerschaum/utils/misc.py +246 -148
  136. meerschaum/utils/packages/__init__.py +10 -27
  137. meerschaum/utils/packages/_packages.py +41 -34
  138. meerschaum/utils/pipes.py +181 -0
  139. meerschaum/utils/process.py +1 -1
  140. meerschaum/utils/prompt.py +3 -1
  141. meerschaum/utils/schedule.py +2 -1
  142. meerschaum/utils/sql.py +121 -44
  143. meerschaum/utils/typing.py +1 -4
  144. meerschaum/utils/venv/_Venv.py +2 -2
  145. meerschaum/utils/venv/__init__.py +5 -7
  146. {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/METADATA +92 -96
  147. meerschaum-3.0.0rc2.dist-info/RECORD +283 -0
  148. {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/WHEEL +1 -1
  149. meerschaum-3.0.0rc2.dist-info/licenses/NOTICE +2 -0
  150. meerschaum/api/models/_interfaces.py +0 -15
  151. meerschaum/api/models/_locations.py +0 -15
  152. meerschaum/api/models/_metrics.py +0 -15
  153. meerschaum/config/static/__init__.py +0 -186
  154. meerschaum-2.9.5.dist-info/RECORD +0 -263
  155. {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/entry_points.txt +0 -0
  156. {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/licenses/LICENSE +0 -0
  157. {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/top_level.txt +0 -0
  158. {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/zip-safe +0 -0
@@ -25,7 +25,6 @@ def register_pipe(
25
25
  Register a new pipe.
26
26
  A pipe's attributes must be set before registering.
27
27
  """
28
- from meerschaum.utils.debug import dprint
29
28
  from meerschaum.utils.packages import attempt_import
30
29
  from meerschaum.utils.sql import json_flavors
31
30
 
@@ -45,7 +44,7 @@ def register_pipe(
45
44
  ### (which shouldn't be able to be registered anyway but that's an issue for later).
46
45
  parameters = None
47
46
  try:
48
- parameters = pipe.parameters
47
+ parameters = pipe.get_parameters(apply_symlinks=False)
49
48
  except Exception as e:
50
49
  if debug:
51
50
  dprint(str(e))
@@ -76,7 +75,7 @@ def register_pipe(
76
75
 
77
76
  def edit_pipe(
78
77
  self,
79
- pipe : mrsm.Pipe = None,
78
+ pipe: mrsm.Pipe,
80
79
  patch: bool = False,
81
80
  debug: bool = False,
82
81
  **kw : Any
@@ -108,10 +107,10 @@ def edit_pipe(
108
107
  original_parameters = Pipe(
109
108
  pipe.connector_keys, pipe.metric_key, pipe.location_key,
110
109
  mrsm_instance=pipe.instance_keys
111
- ).parameters
110
+ ).get_parameters(apply_symlinks=False)
112
111
  parameters = apply_patch_to_config(
113
112
  original_parameters,
114
- pipe.parameters
113
+ pipe._attributes['parameters']
115
114
  )
116
115
 
117
116
  ### ensure pipes table exists
@@ -170,11 +169,10 @@ def fetch_pipes_keys(
170
169
  debug: bool, default False
171
170
  Verbosity toggle.
172
171
  """
173
- from meerschaum.utils.debug import dprint
174
172
  from meerschaum.utils.packages import attempt_import
175
173
  from meerschaum.utils.misc import separate_negation_values
176
174
  from meerschaum.utils.sql import OMIT_NULLSFIRST_FLAVORS, table_exists
177
- from meerschaum.config.static import STATIC_CONFIG
175
+ from meerschaum._internal.static import STATIC_CONFIG
178
176
  import json
179
177
  from copy import deepcopy
180
178
  sqlalchemy, sqlalchemy_sql_functions = attempt_import(
@@ -338,7 +336,6 @@ def create_indices(
338
336
  """
339
337
  Create a pipe's indices.
340
338
  """
341
- from meerschaum.utils.debug import dprint
342
339
  if debug:
343
340
  dprint(f"Creating indices for {pipe}...")
344
341
 
@@ -392,7 +389,6 @@ def drop_indices(
392
389
  """
393
390
  Drop a pipe's indices.
394
391
  """
395
- from meerschaum.utils.debug import dprint
396
392
  if debug:
397
393
  dprint(f"Dropping indices for {pipe}...")
398
394
 
@@ -603,7 +599,10 @@ def get_create_index_queries(
603
599
  ### create datetime index
604
600
  dt_query = None
605
601
  if _datetime is not None:
606
- if self.flavor == 'timescaledb' and pipe.parameters.get('hypertable', True):
602
+ if (
603
+ self.flavor in ('timescaledb', 'timescaledb-ha')
604
+ and pipe.parameters.get('hypertable', True)
605
+ ):
607
606
  _id_count = (
608
607
  get_distinct_col_count(_id, f"SELECT {_id_name} FROM {_pipe_name}", self)
609
608
  if (_id is not None and _create_space_partition) else None
@@ -719,7 +718,7 @@ def get_create_index_queries(
719
718
  f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
720
719
  )
721
720
  ])
722
- elif self.flavor == 'timescaledb':
721
+ elif self.flavor in ('timescaledb', 'timescaledb-ha'):
723
722
  primary_queries.extend([
724
723
  (
725
724
  f"ALTER TABLE {_pipe_name}\n"
@@ -758,7 +757,7 @@ def get_create_index_queries(
758
757
 
759
758
  ### create id index
760
759
  if _id_name is not None:
761
- if self.flavor == 'timescaledb':
760
+ if self.flavor in ('timescaledb', 'timescaledb-ha'):
762
761
  ### Already created indices via create_hypertable.
763
762
  id_query = (
764
763
  None if (_id is not None and _create_space_partition)
@@ -797,7 +796,7 @@ def get_create_index_queries(
797
796
 
798
797
  cols_names_str = ", ".join(cols_names)
799
798
  index_query_params_clause = f" ({cols_names_str})"
800
- if self.flavor == 'postgis':
799
+ if self.flavor in ('postgis', 'timescaledb-ha'):
801
800
  for col in cols:
802
801
  col_typ = existing_cols_pd_types.get(cols[0], 'object')
803
802
  if col_typ != 'object' and are_dtypes_equal(col_typ, 'geometry'):
@@ -1005,6 +1004,8 @@ def get_pipe_data(
1005
1004
  limit: Optional[int] = None,
1006
1005
  begin_add_minutes: int = 0,
1007
1006
  end_add_minutes: int = 0,
1007
+ chunksize: Optional[int] = -1,
1008
+ as_iterator: bool = False,
1008
1009
  debug: bool = False,
1009
1010
  **kw: Any
1010
1011
  ) -> Union[pd.DataFrame, None]:
@@ -1041,14 +1042,17 @@ def get_pipe_data(
1041
1042
  If specified, limit the number of rows retrieved to this value.
1042
1043
 
1043
1044
  begin_add_minutes: int, default 0
1044
- The number of minutes to add to the `begin` datetime (i.e. `DATEADD`.
1045
+ The number of minutes to add to the `begin` datetime (i.e. `DATEADD`).
1045
1046
 
1046
1047
  end_add_minutes: int, default 0
1047
- The number of minutes to add to the `end` datetime (i.e. `DATEADD`.
1048
+ The number of minutes to add to the `end` datetime (i.e. `DATEADD`).
1048
1049
 
1049
1050
  chunksize: Optional[int], default -1
1050
1051
  The size of dataframe chunks to load into memory.
1051
1052
 
1053
+ as_iterator: bool, default False
1054
+ If `True`, return the chunks iterator directly.
1055
+
1052
1056
  debug: bool, default False
1053
1057
  Verbosity toggle.
1054
1058
 
@@ -1057,43 +1061,58 @@ def get_pipe_data(
1057
1061
  A `pd.DataFrame` of the pipe's data.
1058
1062
 
1059
1063
  """
1060
- import json
1061
- from meerschaum.utils.misc import parse_df_datetimes, to_pandas_dtype
1064
+ import functools
1062
1065
  from meerschaum.utils.packages import import_pandas
1063
- from meerschaum.utils.dtypes import (
1064
- attempt_cast_to_numeric,
1065
- attempt_cast_to_uuid,
1066
- attempt_cast_to_bytes,
1067
- attempt_cast_to_geometry,
1068
- are_dtypes_equal,
1069
- )
1066
+ from meerschaum.utils.dtypes import to_pandas_dtype, are_dtypes_equal
1070
1067
  from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
1071
1068
  pd = import_pandas()
1072
1069
  is_dask = 'dask' in pd.__name__
1073
1070
 
1074
1071
  cols_types = pipe.get_columns_types(debug=debug) if pipe.enforce else {}
1072
+ pipe_dtypes = pipe.get_dtypes(infer=False, debug=debug) if pipe.enforce else {}
1073
+
1074
+ remote_pandas_types = {
1075
+ col: to_pandas_dtype(get_pd_type_from_db_type(typ))
1076
+ for col, typ in cols_types.items()
1077
+ }
1078
+ remote_dt_cols_types = {
1079
+ col: typ
1080
+ for col, typ in remote_pandas_types.items()
1081
+ if are_dtypes_equal(typ, 'datetime')
1082
+ }
1083
+ remote_dt_tz_aware_cols_types = {
1084
+ col: typ
1085
+ for col, typ in remote_dt_cols_types.items()
1086
+ if ',' in typ or typ == 'datetime'
1087
+ }
1088
+ remote_dt_tz_naive_cols_types = {
1089
+ col: typ
1090
+ for col, typ in remote_dt_cols_types.items()
1091
+ if col not in remote_dt_tz_aware_cols_types
1092
+ }
1093
+
1094
+ configured_pandas_types = {
1095
+ col: to_pandas_dtype(typ)
1096
+ for col, typ in pipe_dtypes.items()
1097
+ }
1098
+ configured_lower_precision_dt_cols_types = {
1099
+ col: typ
1100
+ for col, typ in pipe_dtypes.items()
1101
+ if (
1102
+ are_dtypes_equal('datetime', typ)
1103
+ and '[' in typ
1104
+ and 'ns' not in typ
1105
+ )
1106
+
1107
+ }
1108
+
1075
1109
  dtypes = {
1076
- **{
1077
- p_col: to_pandas_dtype(p_typ)
1078
- for p_col, p_typ in pipe.dtypes.items()
1079
- },
1080
- **{
1081
- col: get_pd_type_from_db_type(typ)
1082
- for col, typ in cols_types.items()
1083
- }
1110
+ **remote_pandas_types,
1111
+ **configured_pandas_types,
1112
+ **remote_dt_tz_aware_cols_types,
1113
+ **remote_dt_tz_naive_cols_types,
1114
+ **configured_lower_precision_dt_cols_types
1084
1115
  } if pipe.enforce else {}
1085
- if dtypes:
1086
- if self.flavor == 'sqlite':
1087
- if not pipe.columns.get('datetime', None):
1088
- _dt = pipe.guess_datetime()
1089
- else:
1090
- _dt = pipe.get_columns('datetime')
1091
-
1092
- if _dt:
1093
- dt_type = dtypes.get(_dt, 'object').lower()
1094
- if 'datetime' not in dt_type:
1095
- if 'int' not in dt_type:
1096
- dtypes[_dt] = 'datetime64[ns, UTC]'
1097
1116
 
1098
1117
  existing_cols = cols_types.keys()
1099
1118
  select_columns = (
@@ -1110,13 +1129,20 @@ def get_pipe_data(
1110
1129
  and col not in (omit_columns or [])
1111
1130
  ]
1112
1131
  ) if pipe.enforce else select_columns
1132
+
1113
1133
  if select_columns:
1114
1134
  dtypes = {col: typ for col, typ in dtypes.items() if col in select_columns}
1135
+
1115
1136
  dtypes = {
1116
- col: to_pandas_dtype(typ)
1137
+ col: typ
1117
1138
  for col, typ in dtypes.items()
1118
- if col in select_columns and col not in (omit_columns or [])
1139
+ if col in (select_columns or [col]) and col not in (omit_columns or [])
1119
1140
  } if pipe.enforce else {}
1141
+
1142
+ if debug:
1143
+ dprint(f"[{self}] `read()` dtypes:")
1144
+ mrsm.pprint(dtypes)
1145
+
1120
1146
  query = self.get_pipe_data_query(
1121
1147
  pipe,
1122
1148
  select_columns=select_columns,
@@ -1132,91 +1158,25 @@ def get_pipe_data(
1132
1158
  **kw
1133
1159
  )
1134
1160
 
1161
+ read_kwargs = {}
1135
1162
  if is_dask:
1136
1163
  index_col = pipe.columns.get('datetime', None)
1137
- kw['index_col'] = index_col
1164
+ read_kwargs['index_col'] = index_col
1138
1165
 
1139
- numeric_columns = [
1140
- col
1141
- for col, typ in pipe.dtypes.items()
1142
- if typ.startswith('numeric') and col in dtypes
1143
- ]
1144
- uuid_columns = [
1145
- col
1146
- for col, typ in pipe.dtypes.items()
1147
- if typ == 'uuid' and col in dtypes
1148
- ]
1149
- bytes_columns = [
1150
- col
1151
- for col, typ in pipe.dtypes.items()
1152
- if typ == 'bytes' and col in dtypes
1153
- ]
1154
- geometry_columns = [
1155
- col
1156
- for col, typ in pipe.dtypes.items()
1157
- if typ.startswith('geometry') and col in dtypes
1158
- ]
1159
-
1160
- kw['coerce_float'] = kw.get('coerce_float', (len(numeric_columns) == 0))
1161
-
1162
- df = self.read(
1166
+ chunks = self.read(
1163
1167
  query,
1168
+ chunksize=chunksize,
1169
+ as_iterator=True,
1170
+ coerce_float=False,
1164
1171
  dtype=dtypes,
1165
1172
  debug=debug,
1166
- **kw
1173
+ **read_kwargs
1167
1174
  )
1168
- for col in numeric_columns:
1169
- if col not in df.columns:
1170
- continue
1171
- df[col] = df[col].apply(attempt_cast_to_numeric)
1172
1175
 
1173
- for col in uuid_columns:
1174
- if col not in df.columns:
1175
- continue
1176
- df[col] = df[col].apply(attempt_cast_to_uuid)
1177
-
1178
- for col in bytes_columns:
1179
- if col not in df.columns:
1180
- continue
1181
- df[col] = df[col].apply(attempt_cast_to_bytes)
1176
+ if as_iterator:
1177
+ return chunks
1182
1178
 
1183
- for col in geometry_columns:
1184
- if col not in df.columns:
1185
- continue
1186
- df[col] = df[col].apply(attempt_cast_to_geometry)
1187
-
1188
- if self.flavor == 'sqlite':
1189
- ignore_dt_cols = [
1190
- col
1191
- for col, dtype in pipe.dtypes.items()
1192
- if not are_dtypes_equal(str(dtype), 'datetime')
1193
- ]
1194
- ### NOTE: We have to consume the iterator here to ensure that datetimes are parsed correctly
1195
- df = (
1196
- parse_df_datetimes(
1197
- df,
1198
- ignore_cols=ignore_dt_cols,
1199
- chunksize=kw.get('chunksize', None),
1200
- strip_timezone=(pipe.tzinfo is None),
1201
- debug=debug,
1202
- ) if isinstance(df, pd.DataFrame) else (
1203
- [
1204
- parse_df_datetimes(
1205
- c,
1206
- ignore_cols=ignore_dt_cols,
1207
- chunksize=kw.get('chunksize', None),
1208
- strip_timezone=(pipe.tzinfo is None),
1209
- debug=debug,
1210
- )
1211
- for c in df
1212
- ]
1213
- )
1214
- )
1215
- for col, typ in dtypes.items():
1216
- if typ != 'json':
1217
- continue
1218
- df[col] = df[col].apply(lambda x: json.loads(x) if x is not None else x)
1219
- return df
1179
+ return pd.concat(chunks)
1220
1180
 
1221
1181
 
1222
1182
  def get_pipe_data_query(
@@ -1419,7 +1379,7 @@ def get_pipe_data_query(
1419
1379
  if k in existing_cols or skip_existing_cols_check
1420
1380
  }
1421
1381
  if valid_params:
1422
- where += build_where(valid_params, self).replace(
1382
+ where += ' ' + build_where(valid_params, self).lstrip().replace(
1423
1383
  'WHERE', (' AND' if is_dt_bound else " ")
1424
1384
  )
1425
1385
 
@@ -1549,13 +1509,7 @@ def create_pipe_table_from_df(
1549
1509
  """
1550
1510
  Create a pipe's table from its configured dtypes and an incoming dataframe.
1551
1511
  """
1552
- from meerschaum.utils.dataframe import (
1553
- get_json_cols,
1554
- get_numeric_cols,
1555
- get_uuid_cols,
1556
- get_datetime_cols,
1557
- get_bytes_cols,
1558
- )
1512
+ from meerschaum.utils.dataframe import get_special_cols
1559
1513
  from meerschaum.utils.sql import (
1560
1514
  get_create_table_queries,
1561
1515
  sql_item_name,
@@ -1584,30 +1538,7 @@ def create_pipe_table_from_df(
1584
1538
  for col_ix, col in pipe.columns.items()
1585
1539
  if col and col_ix != 'primary'
1586
1540
  },
1587
- **{
1588
- col: 'uuid'
1589
- for col in get_uuid_cols(df)
1590
- },
1591
- **{
1592
- col: 'json'
1593
- for col in get_json_cols(df)
1594
- },
1595
- **{
1596
- col: 'numeric'
1597
- for col in get_numeric_cols(df)
1598
- },
1599
- **{
1600
- col: 'bytes'
1601
- for col in get_bytes_cols(df)
1602
- },
1603
- **{
1604
- col: 'datetime64[ns, UTC]'
1605
- for col in get_datetime_cols(df, timezone_aware=True, timezone_naive=False)
1606
- },
1607
- **{
1608
- col: 'datetime64[ns]'
1609
- for col in get_datetime_cols(df, timezone_aware=False, timezone_naive=True)
1610
- },
1541
+ **get_special_cols(df),
1611
1542
  **pipe.dtypes
1612
1543
  }
1613
1544
  autoincrement = (
@@ -1648,8 +1579,8 @@ def sync_pipe(
1648
1579
  self,
1649
1580
  pipe: mrsm.Pipe,
1650
1581
  df: Union[pd.DataFrame, str, Dict[Any, Any], None] = None,
1651
- begin: Optional[datetime] = None,
1652
- end: Optional[datetime] = None,
1582
+ begin: Union[datetime, int, None] = None,
1583
+ end: Union[datetime, int, None] = None,
1653
1584
  chunksize: Optional[int] = -1,
1654
1585
  check_existing: bool = True,
1655
1586
  blocking: bool = True,
@@ -1669,11 +1600,11 @@ def sync_pipe(
1669
1600
  An optional DataFrame or equivalent to sync into the pipe.
1670
1601
  Defaults to `None`.
1671
1602
 
1672
- begin: Optional[datetime], default None
1603
+ begin: Union[datetime, int, None], default None
1673
1604
  Optionally specify the earliest datetime to search for data.
1674
1605
  Defaults to `None`.
1675
1606
 
1676
- end: Optional[datetime], default None
1607
+ end: Union[datetime, int, None], default None
1677
1608
  Optionally specify the latest datetime to search for data.
1678
1609
  Defaults to `None`.
1679
1610
 
@@ -1759,18 +1690,16 @@ def sync_pipe(
1759
1690
  _ = pipe.__dict__.pop('_columns_types', None)
1760
1691
  if not self.exec_queries(alter_cols_queries, debug=debug):
1761
1692
  warn(f"Failed to alter columns for {pipe}.")
1762
- else:
1763
- _ = pipe.infer_dtypes(persist=True)
1764
1693
 
1765
1694
  ### NOTE: Oracle SQL < 23c (2023) and SQLite does not support booleans,
1766
1695
  ### so infer bools and persist them to `dtypes`.
1767
1696
  if self.flavor in ('oracle', 'sqlite', 'mysql', 'mariadb'):
1768
- pipe_dtypes = pipe.dtypes
1697
+ pipe_dtypes = pipe.get_dtypes(infer=False, debug=debug)
1769
1698
  new_bool_cols = {
1770
1699
  col: 'bool[pyarrow]'
1771
1700
  for col, typ in df.dtypes.items()
1772
1701
  if col not in pipe_dtypes
1773
- and are_dtypes_equal(str(typ), 'bool')
1702
+ and are_dtypes_equal(str(typ), 'bool')
1774
1703
  }
1775
1704
  pipe_dtypes.update(new_bool_cols)
1776
1705
  pipe.dtypes = pipe_dtypes
@@ -1833,10 +1762,12 @@ def sync_pipe(
1833
1762
  )
1834
1763
  )
1835
1764
  if autoincrement and autoincrement not in pipe.parameters:
1836
- pipe.parameters['autoincrement'] = autoincrement
1837
- edit_success, edit_msg = pipe.edit(debug=debug)
1838
- if not edit_success:
1839
- return edit_success, edit_msg
1765
+ update_success, update_msg = pipe.update_parameters(
1766
+ {'autoincrement': autoincrement},
1767
+ debug=debug,
1768
+ )
1769
+ if not update_success:
1770
+ return update_success, update_msg
1840
1771
 
1841
1772
  def _check_pk(_df_to_clear):
1842
1773
  if _df_to_clear is None:
@@ -1969,7 +1900,11 @@ def sync_pipe(
1969
1900
  if col and col in existing_cols
1970
1901
  ] if not primary_key or self.flavor == 'oracle' else (
1971
1902
  [dt_col, primary_key]
1972
- if self.flavor == 'timescaledb' and dt_col and dt_col in update_df.columns
1903
+ if (
1904
+ self.flavor in ('timescaledb', 'timescaledb-ha')
1905
+ and dt_col
1906
+ and dt_col in update_df.columns
1907
+ )
1973
1908
  else [primary_key]
1974
1909
  )
1975
1910
  update_queries = get_update_queries(
@@ -2779,7 +2714,6 @@ def pipe_exists(
2779
2714
  debug=debug,
2780
2715
  )
2781
2716
  if debug:
2782
- from meerschaum.utils.debug import dprint
2783
2717
  dprint(f"{pipe} " + ('exists.' if exists else 'does not exist.'))
2784
2718
  return exists
2785
2719
 
@@ -2833,7 +2767,6 @@ def get_pipe_rowcount(
2833
2767
  error(msg)
2834
2768
  return None
2835
2769
 
2836
-
2837
2770
  flavor = self.flavor if not remote else pipe.connector.flavor
2838
2771
  conn = self if not remote else pipe.connector
2839
2772
  _pipe_name = sql_item_name(pipe.target, flavor, self.get_pipe_schema(pipe))
@@ -3117,11 +3050,17 @@ def get_pipe_columns_types(
3117
3050
  debug=debug,
3118
3051
  )
3119
3052
 
3053
+ if debug:
3054
+ dprint(f"Fetching columns_types for {pipe} with via SQLAlchemy table.")
3055
+
3120
3056
  table_columns = {}
3121
3057
  try:
3122
3058
  pipe_table = self.get_pipe_table(pipe, debug=debug)
3123
3059
  if pipe_table is None:
3124
3060
  return {}
3061
+ if debug:
3062
+ dprint(f"Found columns:")
3063
+ mrsm.pprint(dict(pipe_table.columns))
3125
3064
  for col in pipe_table.columns:
3126
3065
  table_columns[str(col.name)] = str(col.type)
3127
3066
  except Exception as e:
@@ -3313,10 +3252,9 @@ def get_alter_columns_queries(
3313
3252
  -------
3314
3253
  A list of the `ALTER TABLE` SQL query or queries to be executed on the provided connector.
3315
3254
  """
3316
- if not pipe.exists(debug=debug):
3255
+ if not pipe.exists(debug=debug) or pipe.static:
3317
3256
  return []
3318
- if pipe.static:
3319
- return
3257
+
3320
3258
  from meerschaum.utils.sql import (
3321
3259
  sql_item_name,
3322
3260
  get_table_cols_types,
@@ -3362,7 +3300,8 @@ def get_alter_columns_queries(
3362
3300
  debug=debug,
3363
3301
  ).items()
3364
3302
  }
3365
- pipe_bool_cols = [col for col, typ in pipe.dtypes.items() if are_dtypes_equal(str(typ), 'bool')]
3303
+ pipe_dtypes = pipe.dtypes
3304
+ pipe_bool_cols = [col for col, typ in pipe_dtypes.items() if are_dtypes_equal(str(typ), 'bool')]
3366
3305
  pd_db_df_aliases = {
3367
3306
  'int': 'bool',
3368
3307
  'float': 'bool',
@@ -3370,7 +3309,10 @@ def get_alter_columns_queries(
3370
3309
  'guid': 'object',
3371
3310
  }
3372
3311
  if self.flavor == 'oracle':
3373
- pd_db_df_aliases['int'] = 'numeric'
3312
+ pd_db_df_aliases.update({
3313
+ 'int': 'numeric',
3314
+ 'date': 'datetime',
3315
+ })
3374
3316
 
3375
3317
  altered_cols = {
3376
3318
  col: (db_cols_types.get(col, 'object'), typ)
@@ -3379,6 +3321,10 @@ def get_alter_columns_queries(
3379
3321
  and not are_dtypes_equal(db_cols_types.get(col, 'object'), 'string')
3380
3322
  }
3381
3323
 
3324
+ if debug and altered_cols:
3325
+ dprint(f"Columns to be altered:")
3326
+ mrsm.pprint(altered_cols)
3327
+
3382
3328
  ### NOTE: Sometimes bools are coerced into ints or floats.
3383
3329
  altered_cols_to_ignore = set()
3384
3330
  for col, (db_typ, df_typ) in altered_cols.items():
@@ -3405,21 +3351,29 @@ def get_alter_columns_queries(
3405
3351
  if db_is_bool_compatible and df_is_bool_compatible:
3406
3352
  altered_cols_to_ignore.add(bool_col)
3407
3353
 
3354
+ if debug and altered_cols_to_ignore:
3355
+ dprint(f"Ignoring the following altered columns (false positives).")
3356
+ mrsm.pprint(altered_cols_to_ignore)
3357
+
3408
3358
  for col in altered_cols_to_ignore:
3409
3359
  _ = altered_cols.pop(col, None)
3360
+
3410
3361
  if not altered_cols:
3411
3362
  return []
3412
3363
 
3413
3364
  if numeric_cols:
3414
- pipe.dtypes.update({col: 'numeric' for col in numeric_cols})
3415
- edit_success, edit_msg = pipe.edit(debug=debug)
3416
- if not edit_success:
3417
- warn(
3418
- f"Failed to update dtypes for numeric columns {items_str(numeric_cols)}:\n"
3419
- + f"{edit_msg}"
3420
- )
3365
+ explicit_pipe_dtypes = pipe.get_dtypes(infer=False, debug=debug)
3366
+ explicit_pipe_dtypes.update({col: 'numeric' for col in numeric_cols})
3367
+ pipe.dtypes = explicit_pipe_dtypes
3368
+ if not pipe.temporary:
3369
+ edit_success, edit_msg = pipe.edit(debug=debug)
3370
+ if not edit_success:
3371
+ warn(
3372
+ f"Failed to update dtypes for numeric columns {items_str(numeric_cols)}:\n"
3373
+ + f"{edit_msg}"
3374
+ )
3421
3375
  else:
3422
- numeric_cols.extend([col for col, typ in pipe.dtypes.items() if typ.startswith('numeric')])
3376
+ numeric_cols.extend([col for col, typ in pipe_dtypes.items() if typ.startswith('numeric')])
3423
3377
 
3424
3378
  numeric_type = get_db_type_from_pd_type('numeric', self.flavor, as_sqlalchemy=False)
3425
3379
  text_type = get_db_type_from_pd_type('str', self.flavor, as_sqlalchemy=False)
@@ -3627,20 +3581,18 @@ def get_to_sql_dtype(
3627
3581
  >>> get_to_sql_dtype(pipe, df)
3628
3582
  {'a': <class 'sqlalchemy.sql.sqltypes.JSON'>}
3629
3583
  """
3630
- from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
3584
+ from meerschaum.utils.dataframe import get_special_cols
3631
3585
  from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
3632
3586
  df_dtypes = {
3633
3587
  col: str(typ)
3634
3588
  for col, typ in df.dtypes.items()
3635
3589
  }
3636
- json_cols = get_json_cols(df)
3637
- numeric_cols = get_numeric_cols(df)
3638
- uuid_cols = get_uuid_cols(df)
3639
- df_dtypes.update({col: 'json' for col in json_cols})
3640
- df_dtypes.update({col: 'numeric' for col in numeric_cols})
3641
- df_dtypes.update({col: 'uuid' for col in uuid_cols})
3590
+ special_cols = get_special_cols(df)
3591
+ df_dtypes.update(special_cols)
3592
+
3642
3593
  if update_dtypes:
3643
3594
  df_dtypes.update(pipe.dtypes)
3595
+
3644
3596
  return {
3645
3597
  col: get_db_type_from_pd_type(typ, self.flavor, as_sqlalchemy=True)
3646
3598
  for col, typ in df_dtypes.items()
@@ -3881,13 +3833,15 @@ def get_pipe_schema(self, pipe: mrsm.Pipe) -> Union[str, None]:
3881
3833
  -------
3882
3834
  A schema string or `None` if nothing is configured.
3883
3835
  """
3836
+ if self.flavor == 'sqlite':
3837
+ return self.schema
3884
3838
  return pipe.parameters.get('schema', self.schema)
3885
3839
 
3886
3840
 
3887
3841
  @staticmethod
3888
3842
  def get_temporary_target(
3889
3843
  target: str,
3890
- transact_id: Optional[str, None] = None,
3844
+ transact_id: Optional[str] = None,
3891
3845
  label: Optional[str] = None,
3892
3846
  separator: Optional[str] = None,
3893
3847
  ) -> str:
@@ -3909,3 +3863,15 @@ def get_temporary_target(
3909
3863
  + transact_id
3910
3864
  + ((separator + label) if label else '')
3911
3865
  )
3866
+
3867
+
3868
+ def _enforce_pipe_dtypes_chunks_hook(
3869
+ pipe: mrsm.Pipe,
3870
+ chunk_df: 'pd.DataFrame',
3871
+ debug: bool = False,
3872
+ **kwargs
3873
+ ) -> 'pd.DataFrame':
3874
+ """
3875
+ Enforce a pipe's dtypes on each chunk.
3876
+ """
3877
+ return pipe.enforce_dtypes(chunk_df, debug=debug)
@@ -13,6 +13,35 @@ import json
13
13
  import meerschaum as mrsm
14
14
  from meerschaum.utils.typing import Optional, Any, List, SuccessTuple, Dict
15
15
 
16
+
17
+ def get_plugins_pipe(self) -> mrsm.Pipe:
18
+ """
19
+ Return the internal metadata plugins pipe.
20
+ """
21
+ users_pipe = self.get_users_pipe()
22
+ user_id_dtype = users_pipe.dtypes.get('user_id', 'int')
23
+ return mrsm.Pipe(
24
+ 'mrsm', 'plugins',
25
+ instance=self,
26
+ temporary=True,
27
+ static=True,
28
+ null_indices=False,
29
+ columns={
30
+ 'primary': 'plugin_id',
31
+ 'user_id': 'user_id',
32
+ },
33
+ dtypes={
34
+ 'plugin_name': 'string',
35
+ 'user_id': user_id_dtype,
36
+ 'attributes': 'json',
37
+ 'version': 'string',
38
+ },
39
+ indices={
40
+ 'unique': 'plugin_name',
41
+ },
42
+ )
43
+
44
+
16
45
  def register_plugin(
17
46
  self,
18
47
  plugin: 'mrsm.core.Plugin',