meerschaum 3.0.0rc1__py3-none-any.whl → 3.0.0rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. meerschaum/_internal/arguments/_parser.py +2 -1
  2. meerschaum/_internal/docs/index.py +49 -2
  3. meerschaum/_internal/shell/Shell.py +5 -4
  4. meerschaum/_internal/static.py +8 -24
  5. meerschaum/actions/bootstrap.py +1 -1
  6. meerschaum/actions/edit.py +6 -3
  7. meerschaum/actions/start.py +1 -1
  8. meerschaum/actions/verify.py +5 -8
  9. meerschaum/api/__init__.py +2 -1
  10. meerschaum/api/dash/__init__.py +0 -2
  11. meerschaum/api/dash/callbacks/__init__.py +1 -0
  12. meerschaum/api/dash/callbacks/dashboard.py +20 -19
  13. meerschaum/api/dash/callbacks/jobs.py +11 -5
  14. meerschaum/api/dash/callbacks/pipes.py +106 -5
  15. meerschaum/api/dash/callbacks/settings/__init__.py +0 -1
  16. meerschaum/api/dash/callbacks/{settings/tokens.py → tokens.py} +1 -1
  17. meerschaum/api/dash/jobs.py +1 -1
  18. meerschaum/api/dash/pages/__init__.py +2 -1
  19. meerschaum/api/dash/pages/{job.py → jobs.py} +10 -7
  20. meerschaum/api/dash/pages/pipes.py +4 -3
  21. meerschaum/api/dash/pages/settings/__init__.py +0 -1
  22. meerschaum/api/dash/pages/{settings/tokens.py → tokens.py} +6 -8
  23. meerschaum/api/dash/pipes.py +131 -0
  24. meerschaum/api/dash/tokens.py +28 -31
  25. meerschaum/api/routes/_pipes.py +47 -37
  26. meerschaum/config/_default.py +13 -2
  27. meerschaum/config/_paths.py +1 -0
  28. meerschaum/config/_version.py +1 -1
  29. meerschaum/config/stack/__init__.py +9 -8
  30. meerschaum/connectors/api/_pipes.py +2 -18
  31. meerschaum/connectors/api/_tokens.py +2 -2
  32. meerschaum/connectors/instance/_tokens.py +10 -6
  33. meerschaum/connectors/sql/_SQLConnector.py +14 -0
  34. meerschaum/connectors/sql/_create_engine.py +3 -14
  35. meerschaum/connectors/sql/_pipes.py +175 -185
  36. meerschaum/connectors/sql/_sql.py +38 -20
  37. meerschaum/connectors/sql/tables/__init__.py +237 -122
  38. meerschaum/connectors/valkey/_pipes.py +44 -16
  39. meerschaum/core/Pipe/__init__.py +28 -5
  40. meerschaum/core/Pipe/_attributes.py +273 -46
  41. meerschaum/core/Pipe/_data.py +55 -17
  42. meerschaum/core/Pipe/_dtypes.py +19 -4
  43. meerschaum/core/Pipe/_edit.py +2 -0
  44. meerschaum/core/Pipe/_fetch.py +1 -1
  45. meerschaum/core/Pipe/_sync.py +90 -160
  46. meerschaum/core/Pipe/_verify.py +3 -3
  47. meerschaum/core/Token/_Token.py +4 -5
  48. meerschaum/plugins/bootstrap.py +508 -3
  49. meerschaum/utils/_get_pipes.py +1 -1
  50. meerschaum/utils/dataframe.py +385 -68
  51. meerschaum/utils/debug.py +15 -15
  52. meerschaum/utils/dtypes/__init__.py +387 -22
  53. meerschaum/utils/dtypes/sql.py +327 -31
  54. meerschaum/utils/misc.py +9 -68
  55. meerschaum/utils/packages/__init__.py +7 -21
  56. meerschaum/utils/packages/_packages.py +7 -2
  57. meerschaum/utils/schedule.py +1 -1
  58. meerschaum/utils/sql.py +8 -8
  59. {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/METADATA +5 -17
  60. {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/RECORD +66 -65
  61. meerschaum-3.0.0rc3.dist-info/licenses/NOTICE +2 -0
  62. {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/WHEEL +0 -0
  63. {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/entry_points.txt +0 -0
  64. {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/licenses/LICENSE +0 -0
  65. {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/top_level.txt +0 -0
  66. {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/zip-safe +0 -0
@@ -25,7 +25,6 @@ def register_pipe(
25
25
  Register a new pipe.
26
26
  A pipe's attributes must be set before registering.
27
27
  """
28
- from meerschaum.utils.debug import dprint
29
28
  from meerschaum.utils.packages import attempt_import
30
29
  from meerschaum.utils.sql import json_flavors
31
30
 
@@ -148,7 +147,7 @@ def fetch_pipes_keys(
148
147
  tags: Optional[List[str]] = None,
149
148
  params: Optional[Dict[str, Any]] = None,
150
149
  debug: bool = False
151
- ) -> Optional[List[Tuple[str, str, Optional[str]]]]:
150
+ ) -> List[Tuple[str, str, Optional[str]]]:
152
151
  """
153
152
  Return a list of tuples corresponding to the parameters provided.
154
153
 
@@ -163,17 +162,27 @@ def fetch_pipes_keys(
163
162
  location_keys: Optional[List[str]], default None
164
163
  List of location_keys to search by.
165
164
 
165
+ tags: Optional[List[str]], default None
166
+ List of pipes to search by.
167
+
166
168
  params: Optional[Dict[str, Any]], default None
167
169
  Dictionary of additional parameters to search by.
168
170
  E.g. `--params pipe_id:1`
169
171
 
170
172
  debug: bool, default False
171
173
  Verbosity toggle.
174
+
175
+ Returns
176
+ -------
177
+ A list of tuples of pipes' keys (connector_keys, metric_key, location_key).
172
178
  """
173
- from meerschaum.utils.debug import dprint
174
179
  from meerschaum.utils.packages import attempt_import
175
180
  from meerschaum.utils.misc import separate_negation_values
176
- from meerschaum.utils.sql import OMIT_NULLSFIRST_FLAVORS, table_exists
181
+ from meerschaum.utils.sql import (
182
+ OMIT_NULLSFIRST_FLAVORS,
183
+ table_exists,
184
+ json_flavors,
185
+ )
177
186
  from meerschaum._internal.static import STATIC_CONFIG
178
187
  import json
179
188
  from copy import deepcopy
@@ -261,25 +270,49 @@ def fetch_pipes_keys(
261
270
  in_ex_tag_groups = [separate_negation_values(tag_group) for tag_group in tag_groups]
262
271
 
263
272
  ors, nands = [], []
264
- for _in_tags, _ex_tags in in_ex_tag_groups:
265
- sub_ands = []
266
- for nt in _in_tags:
267
- sub_ands.append(
268
- sqlalchemy.cast(
269
- pipes_tbl.c['parameters'],
270
- sqlalchemy.String,
271
- ).like(f'%"tags":%"{nt}"%')
272
- )
273
- if sub_ands:
274
- ors.append(sqlalchemy.and_(*sub_ands))
275
-
276
- for xt in _ex_tags:
277
- nands.append(
278
- sqlalchemy.cast(
279
- pipes_tbl.c['parameters'],
280
- sqlalchemy.String,
281
- ).not_like(f'%"tags":%"{xt}"%')
282
- )
273
+ if self.flavor in json_flavors:
274
+ from sqlalchemy.dialects import postgresql
275
+ for _in_tags, _ex_tags in in_ex_tag_groups:
276
+ if _in_tags:
277
+ ors.append(
278
+ sqlalchemy.and_(
279
+ pipes_tbl.c['parameters'].cast(postgresql.JSONB).has_key('tags'),
280
+ pipes_tbl.c['parameters']['tags'].cast(
281
+ postgresql.JSONB
282
+ ).contains(_in_tags)
283
+ )
284
+ )
285
+ for xt in _ex_tags:
286
+ nands.append(
287
+ sqlalchemy.not_(
288
+ sqlalchemy.and_(
289
+ pipes_tbl.c['parameters'].cast(postgresql.JSONB).has_key('tags'),
290
+ pipes_tbl.c['parameters']['tags'].cast(
291
+ postgresql.JSONB
292
+ ).contains([xt])
293
+ )
294
+ )
295
+ )
296
+ else:
297
+ for _in_tags, _ex_tags in in_ex_tag_groups:
298
+ sub_ands = []
299
+ for nt in _in_tags:
300
+ sub_ands.append(
301
+ sqlalchemy.cast(
302
+ pipes_tbl.c['parameters'],
303
+ sqlalchemy.String,
304
+ ).like(f'%"tags":%"{nt}"%')
305
+ )
306
+ if sub_ands:
307
+ ors.append(sqlalchemy.and_(*sub_ands))
308
+
309
+ for xt in _ex_tags:
310
+ nands.append(
311
+ sqlalchemy.cast(
312
+ pipes_tbl.c['parameters'],
313
+ sqlalchemy.String,
314
+ ).not_like(f'%"tags":%"{xt}"%')
315
+ )
283
316
 
284
317
  q = q.where(sqlalchemy.and_(*nands)) if nands else q
285
318
  q = q.where(sqlalchemy.or_(*ors)) if ors else q
@@ -294,7 +327,7 @@ def fetch_pipes_keys(
294
327
 
295
328
  ### execute the query and return a list of tuples
296
329
  if debug:
297
- dprint(q.compile(compile_kwargs={'literal_binds': True}))
330
+ dprint(q)
298
331
  try:
299
332
  rows = (
300
333
  self.execute(q).fetchall()
@@ -338,7 +371,6 @@ def create_indices(
338
371
  """
339
372
  Create a pipe's indices.
340
373
  """
341
- from meerschaum.utils.debug import dprint
342
374
  if debug:
343
375
  dprint(f"Creating indices for {pipe}...")
344
376
 
@@ -392,7 +424,6 @@ def drop_indices(
392
424
  """
393
425
  Drop a pipe's indices.
394
426
  """
395
- from meerschaum.utils.debug import dprint
396
427
  if debug:
397
428
  dprint(f"Dropping indices for {pipe}...")
398
429
 
@@ -1008,6 +1039,8 @@ def get_pipe_data(
1008
1039
  limit: Optional[int] = None,
1009
1040
  begin_add_minutes: int = 0,
1010
1041
  end_add_minutes: int = 0,
1042
+ chunksize: Optional[int] = -1,
1043
+ as_iterator: bool = False,
1011
1044
  debug: bool = False,
1012
1045
  **kw: Any
1013
1046
  ) -> Union[pd.DataFrame, None]:
@@ -1044,14 +1077,17 @@ def get_pipe_data(
1044
1077
  If specified, limit the number of rows retrieved to this value.
1045
1078
 
1046
1079
  begin_add_minutes: int, default 0
1047
- The number of minutes to add to the `begin` datetime (i.e. `DATEADD`.
1080
+ The number of minutes to add to the `begin` datetime (i.e. `DATEADD`).
1048
1081
 
1049
1082
  end_add_minutes: int, default 0
1050
- The number of minutes to add to the `end` datetime (i.e. `DATEADD`.
1083
+ The number of minutes to add to the `end` datetime (i.e. `DATEADD`).
1051
1084
 
1052
1085
  chunksize: Optional[int], default -1
1053
1086
  The size of dataframe chunks to load into memory.
1054
1087
 
1088
+ as_iterator: bool, default False
1089
+ If `True`, return the chunks iterator directly.
1090
+
1055
1091
  debug: bool, default False
1056
1092
  Verbosity toggle.
1057
1093
 
@@ -1060,43 +1096,58 @@ def get_pipe_data(
1060
1096
  A `pd.DataFrame` of the pipe's data.
1061
1097
 
1062
1098
  """
1063
- import json
1064
- from meerschaum.utils.misc import parse_df_datetimes, to_pandas_dtype
1099
+ import functools
1065
1100
  from meerschaum.utils.packages import import_pandas
1066
- from meerschaum.utils.dtypes import (
1067
- attempt_cast_to_numeric,
1068
- attempt_cast_to_uuid,
1069
- attempt_cast_to_bytes,
1070
- attempt_cast_to_geometry,
1071
- are_dtypes_equal,
1072
- )
1101
+ from meerschaum.utils.dtypes import to_pandas_dtype, are_dtypes_equal
1073
1102
  from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
1074
1103
  pd = import_pandas()
1075
1104
  is_dask = 'dask' in pd.__name__
1076
1105
 
1077
1106
  cols_types = pipe.get_columns_types(debug=debug) if pipe.enforce else {}
1107
+ pipe_dtypes = pipe.get_dtypes(infer=False, debug=debug) if pipe.enforce else {}
1108
+
1109
+ remote_pandas_types = {
1110
+ col: to_pandas_dtype(get_pd_type_from_db_type(typ))
1111
+ for col, typ in cols_types.items()
1112
+ }
1113
+ remote_dt_cols_types = {
1114
+ col: typ
1115
+ for col, typ in remote_pandas_types.items()
1116
+ if are_dtypes_equal(typ, 'datetime')
1117
+ }
1118
+ remote_dt_tz_aware_cols_types = {
1119
+ col: typ
1120
+ for col, typ in remote_dt_cols_types.items()
1121
+ if ',' in typ or typ == 'datetime'
1122
+ }
1123
+ remote_dt_tz_naive_cols_types = {
1124
+ col: typ
1125
+ for col, typ in remote_dt_cols_types.items()
1126
+ if col not in remote_dt_tz_aware_cols_types
1127
+ }
1128
+
1129
+ configured_pandas_types = {
1130
+ col: to_pandas_dtype(typ)
1131
+ for col, typ in pipe_dtypes.items()
1132
+ }
1133
+ configured_lower_precision_dt_cols_types = {
1134
+ col: typ
1135
+ for col, typ in pipe_dtypes.items()
1136
+ if (
1137
+ are_dtypes_equal('datetime', typ)
1138
+ and '[' in typ
1139
+ and 'ns' not in typ
1140
+ )
1141
+
1142
+ }
1143
+
1078
1144
  dtypes = {
1079
- **{
1080
- col: get_pd_type_from_db_type(typ)
1081
- for col, typ in cols_types.items()
1082
- },
1083
- **{
1084
- p_col: to_pandas_dtype(p_typ)
1085
- for p_col, p_typ in pipe.dtypes.items()
1086
- },
1145
+ **remote_pandas_types,
1146
+ **configured_pandas_types,
1147
+ **remote_dt_tz_aware_cols_types,
1148
+ **remote_dt_tz_naive_cols_types,
1149
+ **configured_lower_precision_dt_cols_types
1087
1150
  } if pipe.enforce else {}
1088
- if dtypes:
1089
- if self.flavor == 'sqlite':
1090
- if not pipe.columns.get('datetime', None):
1091
- _dt = pipe.guess_datetime()
1092
- else:
1093
- _dt = pipe.get_columns('datetime')
1094
-
1095
- if _dt:
1096
- dt_type = dtypes.get(_dt, 'object').lower()
1097
- if 'datetime' not in dt_type:
1098
- if 'int' not in dt_type:
1099
- dtypes[_dt] = 'datetime64[ns, UTC]'
1100
1151
 
1101
1152
  existing_cols = cols_types.keys()
1102
1153
  select_columns = (
@@ -1113,13 +1164,20 @@ def get_pipe_data(
1113
1164
  and col not in (omit_columns or [])
1114
1165
  ]
1115
1166
  ) if pipe.enforce else select_columns
1167
+
1116
1168
  if select_columns:
1117
1169
  dtypes = {col: typ for col, typ in dtypes.items() if col in select_columns}
1170
+
1118
1171
  dtypes = {
1119
- col: to_pandas_dtype(typ)
1172
+ col: typ
1120
1173
  for col, typ in dtypes.items()
1121
- if col in select_columns and col not in (omit_columns or [])
1174
+ if col in (select_columns or [col]) and col not in (omit_columns or [])
1122
1175
  } if pipe.enforce else {}
1176
+
1177
+ if debug:
1178
+ dprint(f"[{self}] `read()` dtypes:")
1179
+ mrsm.pprint(dtypes)
1180
+
1123
1181
  query = self.get_pipe_data_query(
1124
1182
  pipe,
1125
1183
  select_columns=select_columns,
@@ -1135,91 +1193,25 @@ def get_pipe_data(
1135
1193
  **kw
1136
1194
  )
1137
1195
 
1196
+ read_kwargs = {}
1138
1197
  if is_dask:
1139
1198
  index_col = pipe.columns.get('datetime', None)
1140
- kw['index_col'] = index_col
1199
+ read_kwargs['index_col'] = index_col
1141
1200
 
1142
- numeric_columns = [
1143
- col
1144
- for col, typ in pipe.dtypes.items()
1145
- if typ.startswith('numeric') and col in dtypes
1146
- ]
1147
- uuid_columns = [
1148
- col
1149
- for col, typ in pipe.dtypes.items()
1150
- if typ == 'uuid' and col in dtypes
1151
- ]
1152
- bytes_columns = [
1153
- col
1154
- for col, typ in pipe.dtypes.items()
1155
- if typ == 'bytes' and col in dtypes
1156
- ]
1157
- geometry_columns = [
1158
- col
1159
- for col, typ in pipe.dtypes.items()
1160
- if typ.startswith('geometry') and col in dtypes
1161
- ]
1162
-
1163
- kw['coerce_float'] = kw.get('coerce_float', (len(numeric_columns) == 0))
1164
-
1165
- df = self.read(
1201
+ chunks = self.read(
1166
1202
  query,
1203
+ chunksize=chunksize,
1204
+ as_iterator=True,
1205
+ coerce_float=False,
1167
1206
  dtype=dtypes,
1168
1207
  debug=debug,
1169
- **kw
1208
+ **read_kwargs
1170
1209
  )
1171
- for col in numeric_columns:
1172
- if col not in df.columns:
1173
- continue
1174
- df[col] = df[col].apply(attempt_cast_to_numeric)
1175
1210
 
1176
- for col in uuid_columns:
1177
- if col not in df.columns:
1178
- continue
1179
- df[col] = df[col].apply(attempt_cast_to_uuid)
1180
-
1181
- for col in bytes_columns:
1182
- if col not in df.columns:
1183
- continue
1184
- df[col] = df[col].apply(attempt_cast_to_bytes)
1211
+ if as_iterator:
1212
+ return chunks
1185
1213
 
1186
- for col in geometry_columns:
1187
- if col not in df.columns:
1188
- continue
1189
- df[col] = df[col].apply(attempt_cast_to_geometry)
1190
-
1191
- if self.flavor == 'sqlite':
1192
- ignore_dt_cols = [
1193
- col
1194
- for col, dtype in pipe.dtypes.items()
1195
- if not are_dtypes_equal(str(dtype), 'datetime')
1196
- ]
1197
- ### NOTE: We have to consume the iterator here to ensure that datetimes are parsed correctly
1198
- df = (
1199
- parse_df_datetimes(
1200
- df,
1201
- ignore_cols=ignore_dt_cols,
1202
- chunksize=kw.get('chunksize', None),
1203
- strip_timezone=(pipe.tzinfo is None),
1204
- debug=debug,
1205
- ) if isinstance(df, pd.DataFrame) else (
1206
- [
1207
- parse_df_datetimes(
1208
- c,
1209
- ignore_cols=ignore_dt_cols,
1210
- chunksize=kw.get('chunksize', None),
1211
- strip_timezone=(pipe.tzinfo is None),
1212
- debug=debug,
1213
- )
1214
- for c in df
1215
- ]
1216
- )
1217
- )
1218
- for col, typ in dtypes.items():
1219
- if typ != 'json':
1220
- continue
1221
- df[col] = df[col].apply(lambda x: json.loads(x) if x is not None else x)
1222
- return df
1214
+ return pd.concat(chunks)
1223
1215
 
1224
1216
 
1225
1217
  def get_pipe_data_query(
@@ -1552,13 +1544,7 @@ def create_pipe_table_from_df(
1552
1544
  """
1553
1545
  Create a pipe's table from its configured dtypes and an incoming dataframe.
1554
1546
  """
1555
- from meerschaum.utils.dataframe import (
1556
- get_json_cols,
1557
- get_numeric_cols,
1558
- get_uuid_cols,
1559
- get_datetime_cols,
1560
- get_bytes_cols,
1561
- )
1547
+ from meerschaum.utils.dataframe import get_special_cols
1562
1548
  from meerschaum.utils.sql import (
1563
1549
  get_create_table_queries,
1564
1550
  sql_item_name,
@@ -1587,30 +1573,7 @@ def create_pipe_table_from_df(
1587
1573
  for col_ix, col in pipe.columns.items()
1588
1574
  if col and col_ix != 'primary'
1589
1575
  },
1590
- **{
1591
- col: 'uuid'
1592
- for col in get_uuid_cols(df)
1593
- },
1594
- **{
1595
- col: 'json'
1596
- for col in get_json_cols(df)
1597
- },
1598
- **{
1599
- col: 'numeric'
1600
- for col in get_numeric_cols(df)
1601
- },
1602
- **{
1603
- col: 'bytes'
1604
- for col in get_bytes_cols(df)
1605
- },
1606
- **{
1607
- col: 'datetime64[ns, UTC]'
1608
- for col in get_datetime_cols(df, timezone_aware=True, timezone_naive=False)
1609
- },
1610
- **{
1611
- col: 'datetime64[ns]'
1612
- for col in get_datetime_cols(df, timezone_aware=False, timezone_naive=True)
1613
- },
1576
+ **get_special_cols(df),
1614
1577
  **pipe.dtypes
1615
1578
  }
1616
1579
  autoincrement = (
@@ -1762,18 +1725,16 @@ def sync_pipe(
1762
1725
  _ = pipe.__dict__.pop('_columns_types', None)
1763
1726
  if not self.exec_queries(alter_cols_queries, debug=debug):
1764
1727
  warn(f"Failed to alter columns for {pipe}.")
1765
- else:
1766
- _ = pipe.infer_dtypes(persist=True)
1767
1728
 
1768
1729
  ### NOTE: Oracle SQL < 23c (2023) and SQLite does not support booleans,
1769
1730
  ### so infer bools and persist them to `dtypes`.
1770
1731
  if self.flavor in ('oracle', 'sqlite', 'mysql', 'mariadb'):
1771
- pipe_dtypes = pipe.dtypes
1732
+ pipe_dtypes = pipe.get_dtypes(infer=False, debug=debug)
1772
1733
  new_bool_cols = {
1773
1734
  col: 'bool[pyarrow]'
1774
1735
  for col, typ in df.dtypes.items()
1775
1736
  if col not in pipe_dtypes
1776
- and are_dtypes_equal(str(typ), 'bool')
1737
+ and are_dtypes_equal(str(typ), 'bool')
1777
1738
  }
1778
1739
  pipe_dtypes.update(new_bool_cols)
1779
1740
  pipe.dtypes = pipe_dtypes
@@ -2788,7 +2749,6 @@ def pipe_exists(
2788
2749
  debug=debug,
2789
2750
  )
2790
2751
  if debug:
2791
- from meerschaum.utils.debug import dprint
2792
2752
  dprint(f"{pipe} " + ('exists.' if exists else 'does not exist.'))
2793
2753
  return exists
2794
2754
 
@@ -3125,11 +3085,17 @@ def get_pipe_columns_types(
3125
3085
  debug=debug,
3126
3086
  )
3127
3087
 
3088
+ if debug:
3089
+ dprint(f"Fetching columns_types for {pipe} with via SQLAlchemy table.")
3090
+
3128
3091
  table_columns = {}
3129
3092
  try:
3130
3093
  pipe_table = self.get_pipe_table(pipe, debug=debug)
3131
3094
  if pipe_table is None:
3132
3095
  return {}
3096
+ if debug:
3097
+ dprint(f"Found columns:")
3098
+ mrsm.pprint(dict(pipe_table.columns))
3133
3099
  for col in pipe_table.columns:
3134
3100
  table_columns[str(col.name)] = str(col.type)
3135
3101
  except Exception as e:
@@ -3321,10 +3287,9 @@ def get_alter_columns_queries(
3321
3287
  -------
3322
3288
  A list of the `ALTER TABLE` SQL query or queries to be executed on the provided connector.
3323
3289
  """
3324
- if not pipe.exists(debug=debug):
3290
+ if not pipe.exists(debug=debug) or pipe.static:
3325
3291
  return []
3326
- if pipe.static:
3327
- return
3292
+
3328
3293
  from meerschaum.utils.sql import (
3329
3294
  sql_item_name,
3330
3295
  get_table_cols_types,
@@ -3370,7 +3335,8 @@ def get_alter_columns_queries(
3370
3335
  debug=debug,
3371
3336
  ).items()
3372
3337
  }
3373
- pipe_bool_cols = [col for col, typ in pipe.dtypes.items() if are_dtypes_equal(str(typ), 'bool')]
3338
+ pipe_dtypes = pipe.dtypes
3339
+ pipe_bool_cols = [col for col, typ in pipe_dtypes.items() if are_dtypes_equal(str(typ), 'bool')]
3374
3340
  pd_db_df_aliases = {
3375
3341
  'int': 'bool',
3376
3342
  'float': 'bool',
@@ -3378,7 +3344,10 @@ def get_alter_columns_queries(
3378
3344
  'guid': 'object',
3379
3345
  }
3380
3346
  if self.flavor == 'oracle':
3381
- pd_db_df_aliases['int'] = 'numeric'
3347
+ pd_db_df_aliases.update({
3348
+ 'int': 'numeric',
3349
+ 'date': 'datetime',
3350
+ })
3382
3351
 
3383
3352
  altered_cols = {
3384
3353
  col: (db_cols_types.get(col, 'object'), typ)
@@ -3387,6 +3356,10 @@ def get_alter_columns_queries(
3387
3356
  and not are_dtypes_equal(db_cols_types.get(col, 'object'), 'string')
3388
3357
  }
3389
3358
 
3359
+ if debug and altered_cols:
3360
+ dprint(f"Columns to be altered:")
3361
+ mrsm.pprint(altered_cols)
3362
+
3390
3363
  ### NOTE: Sometimes bools are coerced into ints or floats.
3391
3364
  altered_cols_to_ignore = set()
3392
3365
  for col, (db_typ, df_typ) in altered_cols.items():
@@ -3413,13 +3386,20 @@ def get_alter_columns_queries(
3413
3386
  if db_is_bool_compatible and df_is_bool_compatible:
3414
3387
  altered_cols_to_ignore.add(bool_col)
3415
3388
 
3389
+ if debug and altered_cols_to_ignore:
3390
+ dprint(f"Ignoring the following altered columns (false positives).")
3391
+ mrsm.pprint(altered_cols_to_ignore)
3392
+
3416
3393
  for col in altered_cols_to_ignore:
3417
3394
  _ = altered_cols.pop(col, None)
3395
+
3418
3396
  if not altered_cols:
3419
3397
  return []
3420
3398
 
3421
3399
  if numeric_cols:
3422
- pipe.dtypes.update({col: 'numeric' for col in numeric_cols})
3400
+ explicit_pipe_dtypes = pipe.get_dtypes(infer=False, debug=debug)
3401
+ explicit_pipe_dtypes.update({col: 'numeric' for col in numeric_cols})
3402
+ pipe.dtypes = explicit_pipe_dtypes
3423
3403
  if not pipe.temporary:
3424
3404
  edit_success, edit_msg = pipe.edit(debug=debug)
3425
3405
  if not edit_success:
@@ -3428,7 +3408,7 @@ def get_alter_columns_queries(
3428
3408
  + f"{edit_msg}"
3429
3409
  )
3430
3410
  else:
3431
- numeric_cols.extend([col for col, typ in pipe.dtypes.items() if typ.startswith('numeric')])
3411
+ numeric_cols.extend([col for col, typ in pipe_dtypes.items() if typ.startswith('numeric')])
3432
3412
 
3433
3413
  numeric_type = get_db_type_from_pd_type('numeric', self.flavor, as_sqlalchemy=False)
3434
3414
  text_type = get_db_type_from_pd_type('str', self.flavor, as_sqlalchemy=False)
@@ -3636,20 +3616,18 @@ def get_to_sql_dtype(
3636
3616
  >>> get_to_sql_dtype(pipe, df)
3637
3617
  {'a': <class 'sqlalchemy.sql.sqltypes.JSON'>}
3638
3618
  """
3639
- from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
3619
+ from meerschaum.utils.dataframe import get_special_cols
3640
3620
  from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
3641
3621
  df_dtypes = {
3642
3622
  col: str(typ)
3643
3623
  for col, typ in df.dtypes.items()
3644
3624
  }
3645
- json_cols = get_json_cols(df)
3646
- numeric_cols = get_numeric_cols(df)
3647
- uuid_cols = get_uuid_cols(df)
3648
- df_dtypes.update({col: 'json' for col in json_cols})
3649
- df_dtypes.update({col: 'numeric' for col in numeric_cols})
3650
- df_dtypes.update({col: 'uuid' for col in uuid_cols})
3625
+ special_cols = get_special_cols(df)
3626
+ df_dtypes.update(special_cols)
3627
+
3651
3628
  if update_dtypes:
3652
3629
  df_dtypes.update(pipe.dtypes)
3630
+
3653
3631
  return {
3654
3632
  col: get_db_type_from_pd_type(typ, self.flavor, as_sqlalchemy=True)
3655
3633
  for col, typ in df_dtypes.items()
@@ -3920,3 +3898,15 @@ def get_temporary_target(
3920
3898
  + transact_id
3921
3899
  + ((separator + label) if label else '')
3922
3900
  )
3901
+
3902
+
3903
+ def _enforce_pipe_dtypes_chunks_hook(
3904
+ pipe: mrsm.Pipe,
3905
+ chunk_df: 'pd.DataFrame',
3906
+ debug: bool = False,
3907
+ **kwargs
3908
+ ) -> 'pd.DataFrame':
3909
+ """
3910
+ Enforce a pipe's dtypes on each chunk.
3911
+ """
3912
+ return pipe.enforce_dtypes(chunk_df, debug=debug)