meerschaum 2.9.5__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (200) hide show
  1. meerschaum/__init__.py +5 -2
  2. meerschaum/_internal/__init__.py +1 -0
  3. meerschaum/_internal/arguments/_parse_arguments.py +4 -4
  4. meerschaum/_internal/arguments/_parser.py +33 -4
  5. meerschaum/_internal/cli/__init__.py +6 -0
  6. meerschaum/_internal/cli/daemons.py +103 -0
  7. meerschaum/_internal/cli/entry.py +220 -0
  8. meerschaum/_internal/cli/workers.py +435 -0
  9. meerschaum/_internal/docs/index.py +48 -2
  10. meerschaum/_internal/entry.py +50 -14
  11. meerschaum/_internal/shell/Shell.py +121 -29
  12. meerschaum/_internal/shell/__init__.py +4 -1
  13. meerschaum/_internal/static.py +359 -0
  14. meerschaum/_internal/term/TermPageHandler.py +1 -2
  15. meerschaum/_internal/term/__init__.py +40 -6
  16. meerschaum/_internal/term/tools.py +33 -8
  17. meerschaum/actions/__init__.py +6 -4
  18. meerschaum/actions/api.py +53 -13
  19. meerschaum/actions/attach.py +1 -0
  20. meerschaum/actions/bootstrap.py +8 -8
  21. meerschaum/actions/delete.py +4 -2
  22. meerschaum/actions/edit.py +171 -25
  23. meerschaum/actions/login.py +8 -8
  24. meerschaum/actions/register.py +143 -6
  25. meerschaum/actions/reload.py +22 -5
  26. meerschaum/actions/restart.py +14 -0
  27. meerschaum/actions/show.py +184 -31
  28. meerschaum/actions/start.py +166 -17
  29. meerschaum/actions/stop.py +38 -2
  30. meerschaum/actions/sync.py +7 -2
  31. meerschaum/actions/tag.py +9 -8
  32. meerschaum/actions/verify.py +5 -8
  33. meerschaum/api/__init__.py +45 -15
  34. meerschaum/api/_events.py +46 -4
  35. meerschaum/api/_oauth2.py +162 -9
  36. meerschaum/api/_tokens.py +102 -0
  37. meerschaum/api/dash/__init__.py +0 -3
  38. meerschaum/api/dash/callbacks/__init__.py +1 -0
  39. meerschaum/api/dash/callbacks/custom.py +4 -3
  40. meerschaum/api/dash/callbacks/dashboard.py +198 -118
  41. meerschaum/api/dash/callbacks/jobs.py +14 -7
  42. meerschaum/api/dash/callbacks/login.py +10 -1
  43. meerschaum/api/dash/callbacks/pipes.py +194 -14
  44. meerschaum/api/dash/callbacks/plugins.py +0 -1
  45. meerschaum/api/dash/callbacks/register.py +10 -3
  46. meerschaum/api/dash/callbacks/settings/password_reset.py +2 -2
  47. meerschaum/api/dash/callbacks/tokens.py +389 -0
  48. meerschaum/api/dash/components.py +36 -15
  49. meerschaum/api/dash/jobs.py +1 -1
  50. meerschaum/api/dash/keys.py +35 -93
  51. meerschaum/api/dash/pages/__init__.py +2 -1
  52. meerschaum/api/dash/pages/dashboard.py +1 -20
  53. meerschaum/api/dash/pages/{job.py → jobs.py} +10 -7
  54. meerschaum/api/dash/pages/login.py +2 -2
  55. meerschaum/api/dash/pages/pipes.py +16 -5
  56. meerschaum/api/dash/pages/settings/password_reset.py +1 -1
  57. meerschaum/api/dash/pages/tokens.py +53 -0
  58. meerschaum/api/dash/pipes.py +382 -95
  59. meerschaum/api/dash/sessions.py +12 -0
  60. meerschaum/api/dash/tokens.py +603 -0
  61. meerschaum/api/dash/websockets.py +1 -1
  62. meerschaum/api/dash/webterm.py +18 -6
  63. meerschaum/api/models/__init__.py +23 -3
  64. meerschaum/api/models/_actions.py +22 -0
  65. meerschaum/api/models/_pipes.py +91 -7
  66. meerschaum/api/models/_tokens.py +81 -0
  67. meerschaum/api/resources/static/js/terminado.js +3 -0
  68. meerschaum/api/resources/static/js/xterm-addon-unicode11.js +2 -0
  69. meerschaum/api/resources/templates/termpage.html +13 -0
  70. meerschaum/api/routes/__init__.py +1 -0
  71. meerschaum/api/routes/_actions.py +3 -4
  72. meerschaum/api/routes/_connectors.py +3 -7
  73. meerschaum/api/routes/_jobs.py +26 -35
  74. meerschaum/api/routes/_login.py +120 -15
  75. meerschaum/api/routes/_misc.py +5 -10
  76. meerschaum/api/routes/_pipes.py +178 -143
  77. meerschaum/api/routes/_plugins.py +38 -28
  78. meerschaum/api/routes/_tokens.py +236 -0
  79. meerschaum/api/routes/_users.py +47 -35
  80. meerschaum/api/routes/_version.py +3 -3
  81. meerschaum/api/routes/_webterm.py +3 -3
  82. meerschaum/config/__init__.py +100 -30
  83. meerschaum/config/_default.py +132 -64
  84. meerschaum/config/_edit.py +38 -32
  85. meerschaum/config/_formatting.py +2 -0
  86. meerschaum/config/_patch.py +10 -8
  87. meerschaum/config/_paths.py +133 -13
  88. meerschaum/config/_read_config.py +87 -36
  89. meerschaum/config/_sync.py +6 -3
  90. meerschaum/config/_version.py +1 -1
  91. meerschaum/config/environment.py +262 -0
  92. meerschaum/config/stack/__init__.py +37 -15
  93. meerschaum/config/static.py +18 -0
  94. meerschaum/connectors/_Connector.py +11 -6
  95. meerschaum/connectors/__init__.py +41 -22
  96. meerschaum/connectors/api/_APIConnector.py +34 -6
  97. meerschaum/connectors/api/_actions.py +2 -2
  98. meerschaum/connectors/api/_jobs.py +12 -1
  99. meerschaum/connectors/api/_login.py +33 -7
  100. meerschaum/connectors/api/_misc.py +2 -2
  101. meerschaum/connectors/api/_pipes.py +23 -32
  102. meerschaum/connectors/api/_plugins.py +2 -2
  103. meerschaum/connectors/api/_request.py +1 -1
  104. meerschaum/connectors/api/_tokens.py +146 -0
  105. meerschaum/connectors/api/_users.py +70 -58
  106. meerschaum/connectors/instance/_InstanceConnector.py +83 -0
  107. meerschaum/connectors/instance/__init__.py +10 -0
  108. meerschaum/connectors/instance/_pipes.py +442 -0
  109. meerschaum/connectors/instance/_plugins.py +159 -0
  110. meerschaum/connectors/instance/_tokens.py +317 -0
  111. meerschaum/connectors/instance/_users.py +188 -0
  112. meerschaum/connectors/parse.py +5 -2
  113. meerschaum/connectors/sql/_SQLConnector.py +22 -5
  114. meerschaum/connectors/sql/_cli.py +12 -11
  115. meerschaum/connectors/sql/_create_engine.py +12 -168
  116. meerschaum/connectors/sql/_fetch.py +2 -18
  117. meerschaum/connectors/sql/_pipes.py +295 -278
  118. meerschaum/connectors/sql/_plugins.py +29 -0
  119. meerschaum/connectors/sql/_sql.py +46 -21
  120. meerschaum/connectors/sql/_users.py +36 -2
  121. meerschaum/connectors/sql/tables/__init__.py +254 -122
  122. meerschaum/connectors/valkey/_ValkeyConnector.py +5 -7
  123. meerschaum/connectors/valkey/_pipes.py +60 -31
  124. meerschaum/connectors/valkey/_plugins.py +2 -26
  125. meerschaum/core/Pipe/__init__.py +115 -85
  126. meerschaum/core/Pipe/_attributes.py +425 -124
  127. meerschaum/core/Pipe/_bootstrap.py +54 -24
  128. meerschaum/core/Pipe/_cache.py +555 -0
  129. meerschaum/core/Pipe/_clear.py +0 -11
  130. meerschaum/core/Pipe/_data.py +96 -68
  131. meerschaum/core/Pipe/_deduplicate.py +0 -13
  132. meerschaum/core/Pipe/_delete.py +12 -21
  133. meerschaum/core/Pipe/_drop.py +11 -23
  134. meerschaum/core/Pipe/_dtypes.py +49 -19
  135. meerschaum/core/Pipe/_edit.py +14 -4
  136. meerschaum/core/Pipe/_fetch.py +1 -1
  137. meerschaum/core/Pipe/_index.py +8 -14
  138. meerschaum/core/Pipe/_show.py +5 -5
  139. meerschaum/core/Pipe/_sync.py +123 -204
  140. meerschaum/core/Pipe/_verify.py +4 -4
  141. meerschaum/{plugins → core/Plugin}/_Plugin.py +16 -12
  142. meerschaum/core/Plugin/__init__.py +1 -1
  143. meerschaum/core/Token/_Token.py +220 -0
  144. meerschaum/core/Token/__init__.py +12 -0
  145. meerschaum/core/User/_User.py +35 -10
  146. meerschaum/core/User/__init__.py +9 -1
  147. meerschaum/core/__init__.py +1 -0
  148. meerschaum/jobs/_Executor.py +88 -4
  149. meerschaum/jobs/_Job.py +149 -38
  150. meerschaum/jobs/__init__.py +3 -2
  151. meerschaum/jobs/systemd.py +8 -3
  152. meerschaum/models/__init__.py +35 -0
  153. meerschaum/models/pipes.py +247 -0
  154. meerschaum/models/tokens.py +38 -0
  155. meerschaum/models/users.py +26 -0
  156. meerschaum/plugins/__init__.py +301 -88
  157. meerschaum/plugins/bootstrap.py +510 -4
  158. meerschaum/utils/_get_pipes.py +97 -30
  159. meerschaum/utils/daemon/Daemon.py +199 -43
  160. meerschaum/utils/daemon/FileDescriptorInterceptor.py +0 -1
  161. meerschaum/utils/daemon/RotatingFile.py +63 -36
  162. meerschaum/utils/daemon/StdinFile.py +53 -13
  163. meerschaum/utils/daemon/__init__.py +47 -6
  164. meerschaum/utils/daemon/_names.py +6 -3
  165. meerschaum/utils/dataframe.py +479 -81
  166. meerschaum/utils/debug.py +49 -19
  167. meerschaum/utils/dtypes/__init__.py +476 -34
  168. meerschaum/utils/dtypes/sql.py +369 -29
  169. meerschaum/utils/formatting/__init__.py +5 -2
  170. meerschaum/utils/formatting/_jobs.py +1 -1
  171. meerschaum/utils/formatting/_pipes.py +52 -50
  172. meerschaum/utils/formatting/_pprint.py +1 -0
  173. meerschaum/utils/formatting/_shell.py +44 -18
  174. meerschaum/utils/misc.py +268 -186
  175. meerschaum/utils/packages/__init__.py +25 -40
  176. meerschaum/utils/packages/_packages.py +42 -34
  177. meerschaum/utils/pipes.py +213 -0
  178. meerschaum/utils/process.py +2 -2
  179. meerschaum/utils/prompt.py +175 -144
  180. meerschaum/utils/schedule.py +2 -1
  181. meerschaum/utils/sql.py +134 -47
  182. meerschaum/utils/threading.py +42 -0
  183. meerschaum/utils/typing.py +1 -4
  184. meerschaum/utils/venv/_Venv.py +2 -2
  185. meerschaum/utils/venv/__init__.py +7 -7
  186. meerschaum/utils/warnings.py +19 -13
  187. {meerschaum-2.9.5.dist-info → meerschaum-3.0.0.dist-info}/METADATA +94 -96
  188. meerschaum-3.0.0.dist-info/RECORD +289 -0
  189. {meerschaum-2.9.5.dist-info → meerschaum-3.0.0.dist-info}/WHEEL +1 -1
  190. meerschaum-3.0.0.dist-info/licenses/NOTICE +2 -0
  191. meerschaum/api/models/_interfaces.py +0 -15
  192. meerschaum/api/models/_locations.py +0 -15
  193. meerschaum/api/models/_metrics.py +0 -15
  194. meerschaum/config/_environment.py +0 -145
  195. meerschaum/config/static/__init__.py +0 -186
  196. meerschaum-2.9.5.dist-info/RECORD +0 -263
  197. {meerschaum-2.9.5.dist-info → meerschaum-3.0.0.dist-info}/entry_points.txt +0 -0
  198. {meerschaum-2.9.5.dist-info → meerschaum-3.0.0.dist-info}/licenses/LICENSE +0 -0
  199. {meerschaum-2.9.5.dist-info → meerschaum-3.0.0.dist-info}/top_level.txt +0 -0
  200. {meerschaum-2.9.5.dist-info → meerschaum-3.0.0.dist-info}/zip-safe +0 -0
@@ -25,7 +25,6 @@ def register_pipe(
25
25
  Register a new pipe.
26
26
  A pipe's attributes must be set before registering.
27
27
  """
28
- from meerschaum.utils.debug import dprint
29
28
  from meerschaum.utils.packages import attempt_import
30
29
  from meerschaum.utils.sql import json_flavors
31
30
 
@@ -45,7 +44,7 @@ def register_pipe(
45
44
  ### (which shouldn't be able to be registered anyway but that's an issue for later).
46
45
  parameters = None
47
46
  try:
48
- parameters = pipe.parameters
47
+ parameters = pipe.get_parameters(apply_symlinks=False)
49
48
  except Exception as e:
50
49
  if debug:
51
50
  dprint(str(e))
@@ -76,7 +75,7 @@ def register_pipe(
76
75
 
77
76
  def edit_pipe(
78
77
  self,
79
- pipe : mrsm.Pipe = None,
78
+ pipe: mrsm.Pipe,
80
79
  patch: bool = False,
81
80
  debug: bool = False,
82
81
  **kw : Any
@@ -108,10 +107,10 @@ def edit_pipe(
108
107
  original_parameters = Pipe(
109
108
  pipe.connector_keys, pipe.metric_key, pipe.location_key,
110
109
  mrsm_instance=pipe.instance_keys
111
- ).parameters
110
+ ).get_parameters(apply_symlinks=False)
112
111
  parameters = apply_patch_to_config(
113
112
  original_parameters,
114
- pipe.parameters
113
+ pipe._attributes['parameters']
115
114
  )
116
115
 
117
116
  ### ensure pipes table exists
@@ -147,8 +146,10 @@ def fetch_pipes_keys(
147
146
  location_keys: Optional[List[str]] = None,
148
147
  tags: Optional[List[str]] = None,
149
148
  params: Optional[Dict[str, Any]] = None,
150
- debug: bool = False
151
- ) -> Optional[List[Tuple[str, str, Optional[str]]]]:
149
+ debug: bool = False,
150
+ ) -> List[
151
+ Tuple[str, str, Union[str, None], Dict[str, Any]]
152
+ ]:
152
153
  """
153
154
  Return a list of tuples corresponding to the parameters provided.
154
155
 
@@ -163,18 +164,28 @@ def fetch_pipes_keys(
163
164
  location_keys: Optional[List[str]], default None
164
165
  List of location_keys to search by.
165
166
 
167
+ tags: Optional[List[str]], default None
168
+ List of pipes to search by.
169
+
166
170
  params: Optional[Dict[str, Any]], default None
167
171
  Dictionary of additional parameters to search by.
168
172
  E.g. `--params pipe_id:1`
169
173
 
170
174
  debug: bool, default False
171
175
  Verbosity toggle.
176
+
177
+ Returns
178
+ -------
179
+ A list of tuples of pipes' keys and parameters (connector_keys, metric_key, location_key, parameters).
172
180
  """
173
- from meerschaum.utils.debug import dprint
174
181
  from meerschaum.utils.packages import attempt_import
175
182
  from meerschaum.utils.misc import separate_negation_values
176
- from meerschaum.utils.sql import OMIT_NULLSFIRST_FLAVORS, table_exists
177
- from meerschaum.config.static import STATIC_CONFIG
183
+ from meerschaum.utils.sql import (
184
+ OMIT_NULLSFIRST_FLAVORS,
185
+ table_exists,
186
+ json_flavors,
187
+ )
188
+ from meerschaum._internal.static import STATIC_CONFIG
178
189
  import json
179
190
  from copy import deepcopy
180
191
  sqlalchemy, sqlalchemy_sql_functions = attempt_import(
@@ -240,11 +251,18 @@ def fetch_pipes_keys(
240
251
  ) for key, val in _params.items()
241
252
  if not isinstance(val, (list, tuple)) and key in pipes_tbl.c
242
253
  ]
254
+ if self.flavor in json_flavors:
255
+ sqlalchemy_dialects = mrsm.attempt_import('sqlalchemy.dialects', lazy=False)
256
+ JSONB = sqlalchemy_dialects.postgresql.JSONB
257
+ else:
258
+ JSONB = sqlalchemy.String
259
+
243
260
  select_cols = (
244
261
  [
245
262
  pipes_tbl.c.connector_keys,
246
263
  pipes_tbl.c.metric_key,
247
264
  pipes_tbl.c.location_key,
265
+ pipes_tbl.c.parameters,
248
266
  ]
249
267
  )
250
268
 
@@ -261,25 +279,43 @@ def fetch_pipes_keys(
261
279
  in_ex_tag_groups = [separate_negation_values(tag_group) for tag_group in tag_groups]
262
280
 
263
281
  ors, nands = [], []
264
- for _in_tags, _ex_tags in in_ex_tag_groups:
265
- sub_ands = []
266
- for nt in _in_tags:
267
- sub_ands.append(
268
- sqlalchemy.cast(
269
- pipes_tbl.c['parameters'],
270
- sqlalchemy.String,
271
- ).like(f'%"tags":%"{nt}"%')
272
- )
273
- if sub_ands:
274
- ors.append(sqlalchemy.and_(*sub_ands))
275
-
276
- for xt in _ex_tags:
277
- nands.append(
278
- sqlalchemy.cast(
279
- pipes_tbl.c['parameters'],
280
- sqlalchemy.String,
281
- ).not_like(f'%"tags":%"{xt}"%')
282
- )
282
+ if self.flavor in json_flavors:
283
+ tags_jsonb = pipes_tbl.c['parameters'].cast(JSONB).op('->')('tags').cast(JSONB)
284
+ for _in_tags, _ex_tags in in_ex_tag_groups:
285
+ if _in_tags:
286
+ ors.append(
287
+ sqlalchemy.and_(
288
+ tags_jsonb.contains(_in_tags)
289
+ )
290
+ )
291
+ for xt in _ex_tags:
292
+ nands.append(
293
+ sqlalchemy.not_(
294
+ sqlalchemy.and_(
295
+ tags_jsonb.contains([xt])
296
+ )
297
+ )
298
+ )
299
+ else:
300
+ for _in_tags, _ex_tags in in_ex_tag_groups:
301
+ sub_ands = []
302
+ for nt in _in_tags:
303
+ sub_ands.append(
304
+ sqlalchemy.cast(
305
+ pipes_tbl.c['parameters'],
306
+ sqlalchemy.String,
307
+ ).like(f'%"tags":%"{nt}"%')
308
+ )
309
+ if sub_ands:
310
+ ors.append(sqlalchemy.and_(*sub_ands))
311
+
312
+ for xt in _ex_tags:
313
+ nands.append(
314
+ sqlalchemy.cast(
315
+ pipes_tbl.c['parameters'],
316
+ sqlalchemy.String,
317
+ ).not_like(f'%"tags":%"{xt}"%')
318
+ )
283
319
 
284
320
  q = q.where(sqlalchemy.and_(*nands)) if nands else q
285
321
  q = q.where(sqlalchemy.or_(*ors)) if ors else q
@@ -294,7 +330,7 @@ def fetch_pipes_keys(
294
330
 
295
331
  ### execute the query and return a list of tuples
296
332
  if debug:
297
- dprint(q.compile(compile_kwargs={'literal_binds': True}))
333
+ dprint(q)
298
334
  try:
299
335
  rows = (
300
336
  self.execute(q).fetchall()
@@ -307,7 +343,7 @@ def fetch_pipes_keys(
307
343
  except Exception as e:
308
344
  error(str(e))
309
345
 
310
- return [(row[0], row[1], row[2]) for row in rows]
346
+ return rows
311
347
 
312
348
 
313
349
  def create_pipe_indices(
@@ -338,7 +374,9 @@ def create_indices(
338
374
  """
339
375
  Create a pipe's indices.
340
376
  """
341
- from meerschaum.utils.debug import dprint
377
+ if pipe.__dict__.get('_skip_check_indices', False):
378
+ return True
379
+
342
380
  if debug:
343
381
  dprint(f"Creating indices for {pipe}...")
344
382
 
@@ -348,7 +386,7 @@ def create_indices(
348
386
 
349
387
  cols_to_include = set((columns or []) + (indices or [])) or None
350
388
 
351
- _ = pipe.__dict__.pop('_columns_indices', None)
389
+ pipe._clear_cache_key('_columns_indices', debug=debug)
352
390
  ix_queries = {
353
391
  col: queries
354
392
  for col, queries in self.get_create_index_queries(pipe, debug=debug).items()
@@ -392,7 +430,6 @@ def drop_indices(
392
430
  """
393
431
  Drop a pipe's indices.
394
432
  """
395
- from meerschaum.utils.debug import dprint
396
433
  if debug:
397
434
  dprint(f"Dropping indices for {pipe}...")
398
435
 
@@ -425,7 +462,7 @@ def get_pipe_index_names(self, pipe: mrsm.Pipe) -> Dict[str, str]:
425
462
  -------
426
463
  A dictionary of index keys to column names.
427
464
  """
428
- from meerschaum.utils.sql import DEFAULT_SCHEMA_FLAVORS
465
+ from meerschaum.utils.sql import DEFAULT_SCHEMA_FLAVORS, truncate_item_name
429
466
  _parameters = pipe.parameters
430
467
  _index_template = _parameters.get('index_template', "IX_{schema_str}{target}_{column_names}")
431
468
  _schema = self.get_pipe_schema(pipe)
@@ -466,7 +503,7 @@ def get_pipe_index_names(self, pipe: mrsm.Pipe) -> Dict[str, str]:
466
503
  continue
467
504
  seen_index_names[index_name] = ix
468
505
  return {
469
- ix: index_name
506
+ ix: truncate_item_name(index_name, flavor=self.flavor)
470
507
  for index_name, ix in seen_index_names.items()
471
508
  }
472
509
 
@@ -603,7 +640,10 @@ def get_create_index_queries(
603
640
  ### create datetime index
604
641
  dt_query = None
605
642
  if _datetime is not None:
606
- if self.flavor == 'timescaledb' and pipe.parameters.get('hypertable', True):
643
+ if (
644
+ self.flavor in ('timescaledb', 'timescaledb-ha')
645
+ and pipe.parameters.get('hypertable', True)
646
+ ):
607
647
  _id_count = (
608
648
  get_distinct_col_count(_id, f"SELECT {_id_name} FROM {_pipe_name}", self)
609
649
  if (_id is not None and _create_space_partition) else None
@@ -719,7 +759,7 @@ def get_create_index_queries(
719
759
  f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
720
760
  )
721
761
  ])
722
- elif self.flavor == 'timescaledb':
762
+ elif self.flavor in ('timescaledb', 'timescaledb-ha'):
723
763
  primary_queries.extend([
724
764
  (
725
765
  f"ALTER TABLE {_pipe_name}\n"
@@ -758,7 +798,7 @@ def get_create_index_queries(
758
798
 
759
799
  ### create id index
760
800
  if _id_name is not None:
761
- if self.flavor == 'timescaledb':
801
+ if self.flavor in ('timescaledb', 'timescaledb-ha'):
762
802
  ### Already created indices via create_hypertable.
763
803
  id_query = (
764
804
  None if (_id is not None and _create_space_partition)
@@ -797,7 +837,7 @@ def get_create_index_queries(
797
837
 
798
838
  cols_names_str = ", ".join(cols_names)
799
839
  index_query_params_clause = f" ({cols_names_str})"
800
- if self.flavor == 'postgis':
840
+ if self.flavor in ('postgis', 'timescaledb-ha'):
801
841
  for col in cols:
802
842
  col_typ = existing_cols_pd_types.get(cols[0], 'object')
803
843
  if col_typ != 'object' and are_dtypes_equal(col_typ, 'geometry'):
@@ -1005,6 +1045,8 @@ def get_pipe_data(
1005
1045
  limit: Optional[int] = None,
1006
1046
  begin_add_minutes: int = 0,
1007
1047
  end_add_minutes: int = 0,
1048
+ chunksize: Optional[int] = -1,
1049
+ as_iterator: bool = False,
1008
1050
  debug: bool = False,
1009
1051
  **kw: Any
1010
1052
  ) -> Union[pd.DataFrame, None]:
@@ -1041,14 +1083,17 @@ def get_pipe_data(
1041
1083
  If specified, limit the number of rows retrieved to this value.
1042
1084
 
1043
1085
  begin_add_minutes: int, default 0
1044
- The number of minutes to add to the `begin` datetime (i.e. `DATEADD`.
1086
+ The number of minutes to add to the `begin` datetime (i.e. `DATEADD`).
1045
1087
 
1046
1088
  end_add_minutes: int, default 0
1047
- The number of minutes to add to the `end` datetime (i.e. `DATEADD`.
1089
+ The number of minutes to add to the `end` datetime (i.e. `DATEADD`).
1048
1090
 
1049
1091
  chunksize: Optional[int], default -1
1050
1092
  The size of dataframe chunks to load into memory.
1051
1093
 
1094
+ as_iterator: bool, default False
1095
+ If `True`, return the chunks iterator directly.
1096
+
1052
1097
  debug: bool, default False
1053
1098
  Verbosity toggle.
1054
1099
 
@@ -1057,43 +1102,58 @@ def get_pipe_data(
1057
1102
  A `pd.DataFrame` of the pipe's data.
1058
1103
 
1059
1104
  """
1060
- import json
1061
- from meerschaum.utils.misc import parse_df_datetimes, to_pandas_dtype
1105
+ import functools
1062
1106
  from meerschaum.utils.packages import import_pandas
1063
- from meerschaum.utils.dtypes import (
1064
- attempt_cast_to_numeric,
1065
- attempt_cast_to_uuid,
1066
- attempt_cast_to_bytes,
1067
- attempt_cast_to_geometry,
1068
- are_dtypes_equal,
1069
- )
1107
+ from meerschaum.utils.dtypes import to_pandas_dtype, are_dtypes_equal
1070
1108
  from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
1071
1109
  pd = import_pandas()
1072
1110
  is_dask = 'dask' in pd.__name__
1073
1111
 
1074
1112
  cols_types = pipe.get_columns_types(debug=debug) if pipe.enforce else {}
1113
+ pipe_dtypes = pipe.get_dtypes(infer=False, debug=debug) if pipe.enforce else {}
1114
+
1115
+ remote_pandas_types = {
1116
+ col: to_pandas_dtype(get_pd_type_from_db_type(typ))
1117
+ for col, typ in cols_types.items()
1118
+ }
1119
+ remote_dt_cols_types = {
1120
+ col: typ
1121
+ for col, typ in remote_pandas_types.items()
1122
+ if are_dtypes_equal(typ, 'datetime')
1123
+ }
1124
+ remote_dt_tz_aware_cols_types = {
1125
+ col: typ
1126
+ for col, typ in remote_dt_cols_types.items()
1127
+ if ',' in typ or typ == 'datetime'
1128
+ }
1129
+ remote_dt_tz_naive_cols_types = {
1130
+ col: typ
1131
+ for col, typ in remote_dt_cols_types.items()
1132
+ if col not in remote_dt_tz_aware_cols_types
1133
+ }
1134
+
1135
+ configured_pandas_types = {
1136
+ col: to_pandas_dtype(typ)
1137
+ for col, typ in pipe_dtypes.items()
1138
+ }
1139
+ configured_lower_precision_dt_cols_types = {
1140
+ col: typ
1141
+ for col, typ in pipe_dtypes.items()
1142
+ if (
1143
+ are_dtypes_equal('datetime', typ)
1144
+ and '[' in typ
1145
+ and 'ns' not in typ
1146
+ )
1147
+
1148
+ }
1149
+
1075
1150
  dtypes = {
1076
- **{
1077
- p_col: to_pandas_dtype(p_typ)
1078
- for p_col, p_typ in pipe.dtypes.items()
1079
- },
1080
- **{
1081
- col: get_pd_type_from_db_type(typ)
1082
- for col, typ in cols_types.items()
1083
- }
1151
+ **remote_pandas_types,
1152
+ **configured_pandas_types,
1153
+ **remote_dt_tz_aware_cols_types,
1154
+ **remote_dt_tz_naive_cols_types,
1155
+ **configured_lower_precision_dt_cols_types
1084
1156
  } if pipe.enforce else {}
1085
- if dtypes:
1086
- if self.flavor == 'sqlite':
1087
- if not pipe.columns.get('datetime', None):
1088
- _dt = pipe.guess_datetime()
1089
- else:
1090
- _dt = pipe.get_columns('datetime')
1091
-
1092
- if _dt:
1093
- dt_type = dtypes.get(_dt, 'object').lower()
1094
- if 'datetime' not in dt_type:
1095
- if 'int' not in dt_type:
1096
- dtypes[_dt] = 'datetime64[ns, UTC]'
1097
1157
 
1098
1158
  existing_cols = cols_types.keys()
1099
1159
  select_columns = (
@@ -1110,13 +1170,20 @@ def get_pipe_data(
1110
1170
  and col not in (omit_columns or [])
1111
1171
  ]
1112
1172
  ) if pipe.enforce else select_columns
1173
+
1113
1174
  if select_columns:
1114
1175
  dtypes = {col: typ for col, typ in dtypes.items() if col in select_columns}
1176
+
1115
1177
  dtypes = {
1116
- col: to_pandas_dtype(typ)
1178
+ col: typ
1117
1179
  for col, typ in dtypes.items()
1118
- if col in select_columns and col not in (omit_columns or [])
1180
+ if col in (select_columns or [col]) and col not in (omit_columns or [])
1119
1181
  } if pipe.enforce else {}
1182
+
1183
+ if debug:
1184
+ dprint(f"[{self}] `read()` dtypes:")
1185
+ mrsm.pprint(dtypes)
1186
+
1120
1187
  query = self.get_pipe_data_query(
1121
1188
  pipe,
1122
1189
  select_columns=select_columns,
@@ -1132,91 +1199,25 @@ def get_pipe_data(
1132
1199
  **kw
1133
1200
  )
1134
1201
 
1202
+ read_kwargs = {}
1135
1203
  if is_dask:
1136
1204
  index_col = pipe.columns.get('datetime', None)
1137
- kw['index_col'] = index_col
1138
-
1139
- numeric_columns = [
1140
- col
1141
- for col, typ in pipe.dtypes.items()
1142
- if typ.startswith('numeric') and col in dtypes
1143
- ]
1144
- uuid_columns = [
1145
- col
1146
- for col, typ in pipe.dtypes.items()
1147
- if typ == 'uuid' and col in dtypes
1148
- ]
1149
- bytes_columns = [
1150
- col
1151
- for col, typ in pipe.dtypes.items()
1152
- if typ == 'bytes' and col in dtypes
1153
- ]
1154
- geometry_columns = [
1155
- col
1156
- for col, typ in pipe.dtypes.items()
1157
- if typ.startswith('geometry') and col in dtypes
1158
- ]
1159
-
1160
- kw['coerce_float'] = kw.get('coerce_float', (len(numeric_columns) == 0))
1205
+ read_kwargs['index_col'] = index_col
1161
1206
 
1162
- df = self.read(
1207
+ chunks = self.read(
1163
1208
  query,
1209
+ chunksize=chunksize,
1210
+ as_iterator=True,
1211
+ coerce_float=False,
1164
1212
  dtype=dtypes,
1165
1213
  debug=debug,
1166
- **kw
1214
+ **read_kwargs
1167
1215
  )
1168
- for col in numeric_columns:
1169
- if col not in df.columns:
1170
- continue
1171
- df[col] = df[col].apply(attempt_cast_to_numeric)
1172
1216
 
1173
- for col in uuid_columns:
1174
- if col not in df.columns:
1175
- continue
1176
- df[col] = df[col].apply(attempt_cast_to_uuid)
1177
-
1178
- for col in bytes_columns:
1179
- if col not in df.columns:
1180
- continue
1181
- df[col] = df[col].apply(attempt_cast_to_bytes)
1182
-
1183
- for col in geometry_columns:
1184
- if col not in df.columns:
1185
- continue
1186
- df[col] = df[col].apply(attempt_cast_to_geometry)
1217
+ if as_iterator:
1218
+ return chunks
1187
1219
 
1188
- if self.flavor == 'sqlite':
1189
- ignore_dt_cols = [
1190
- col
1191
- for col, dtype in pipe.dtypes.items()
1192
- if not are_dtypes_equal(str(dtype), 'datetime')
1193
- ]
1194
- ### NOTE: We have to consume the iterator here to ensure that datetimes are parsed correctly
1195
- df = (
1196
- parse_df_datetimes(
1197
- df,
1198
- ignore_cols=ignore_dt_cols,
1199
- chunksize=kw.get('chunksize', None),
1200
- strip_timezone=(pipe.tzinfo is None),
1201
- debug=debug,
1202
- ) if isinstance(df, pd.DataFrame) else (
1203
- [
1204
- parse_df_datetimes(
1205
- c,
1206
- ignore_cols=ignore_dt_cols,
1207
- chunksize=kw.get('chunksize', None),
1208
- strip_timezone=(pipe.tzinfo is None),
1209
- debug=debug,
1210
- )
1211
- for c in df
1212
- ]
1213
- )
1214
- )
1215
- for col, typ in dtypes.items():
1216
- if typ != 'json':
1217
- continue
1218
- df[col] = df[col].apply(lambda x: json.loads(x) if x is not None else x)
1219
- return df
1220
+ return pd.concat(chunks)
1220
1221
 
1221
1222
 
1222
1223
  def get_pipe_data_query(
@@ -1419,7 +1420,7 @@ def get_pipe_data_query(
1419
1420
  if k in existing_cols or skip_existing_cols_check
1420
1421
  }
1421
1422
  if valid_params:
1422
- where += build_where(valid_params, self).replace(
1423
+ where += ' ' + build_where(valid_params, self).lstrip().replace(
1423
1424
  'WHERE', (' AND' if is_dt_bound else " ")
1424
1425
  )
1425
1426
 
@@ -1503,7 +1504,7 @@ def get_pipe_attributes(
1503
1504
  """
1504
1505
  from meerschaum.connectors.sql.tables import get_tables
1505
1506
  from meerschaum.utils.packages import attempt_import
1506
- sqlalchemy = attempt_import('sqlalchemy')
1507
+ sqlalchemy = attempt_import('sqlalchemy', lazy=False)
1507
1508
 
1508
1509
  if pipe.get_id(debug=debug) is None:
1509
1510
  return {}
@@ -1514,16 +1515,16 @@ def get_pipe_attributes(
1514
1515
  q = sqlalchemy.select(pipes_tbl).where(pipes_tbl.c.pipe_id == pipe.id)
1515
1516
  if debug:
1516
1517
  dprint(q)
1517
- attributes = (
1518
- dict(self.exec(q, silent=True, debug=debug).first()._mapping)
1518
+ rows = (
1519
+ self.exec(q, silent=True, debug=debug).mappings().all()
1519
1520
  if self.flavor != 'duckdb'
1520
- else self.read(q, debug=debug).to_dict(orient='records')[0]
1521
+ else self.read(q, debug=debug).to_dict(orient='records')
1521
1522
  )
1522
- except Exception as e:
1523
- import traceback
1524
- traceback.print_exc()
1525
- warn(e)
1526
- print(pipe)
1523
+ if not rows:
1524
+ return {}
1525
+ attributes = dict(rows[0])
1526
+ except Exception:
1527
+ warn(traceback.format_exc())
1527
1528
  return {}
1528
1529
 
1529
1530
  ### handle non-PostgreSQL databases (text vs JSON)
@@ -1549,13 +1550,7 @@ def create_pipe_table_from_df(
1549
1550
  """
1550
1551
  Create a pipe's table from its configured dtypes and an incoming dataframe.
1551
1552
  """
1552
- from meerschaum.utils.dataframe import (
1553
- get_json_cols,
1554
- get_numeric_cols,
1555
- get_uuid_cols,
1556
- get_datetime_cols,
1557
- get_bytes_cols,
1558
- )
1553
+ from meerschaum.utils.dataframe import get_special_cols
1559
1554
  from meerschaum.utils.sql import (
1560
1555
  get_create_table_queries,
1561
1556
  sql_item_name,
@@ -1584,30 +1579,7 @@ def create_pipe_table_from_df(
1584
1579
  for col_ix, col in pipe.columns.items()
1585
1580
  if col and col_ix != 'primary'
1586
1581
  },
1587
- **{
1588
- col: 'uuid'
1589
- for col in get_uuid_cols(df)
1590
- },
1591
- **{
1592
- col: 'json'
1593
- for col in get_json_cols(df)
1594
- },
1595
- **{
1596
- col: 'numeric'
1597
- for col in get_numeric_cols(df)
1598
- },
1599
- **{
1600
- col: 'bytes'
1601
- for col in get_bytes_cols(df)
1602
- },
1603
- **{
1604
- col: 'datetime64[ns, UTC]'
1605
- for col in get_datetime_cols(df, timezone_aware=True, timezone_naive=False)
1606
- },
1607
- **{
1608
- col: 'datetime64[ns]'
1609
- for col in get_datetime_cols(df, timezone_aware=False, timezone_naive=True)
1610
- },
1582
+ **get_special_cols(df),
1611
1583
  **pipe.dtypes
1612
1584
  }
1613
1585
  autoincrement = (
@@ -1648,8 +1620,8 @@ def sync_pipe(
1648
1620
  self,
1649
1621
  pipe: mrsm.Pipe,
1650
1622
  df: Union[pd.DataFrame, str, Dict[Any, Any], None] = None,
1651
- begin: Optional[datetime] = None,
1652
- end: Optional[datetime] = None,
1623
+ begin: Union[datetime, int, None] = None,
1624
+ end: Union[datetime, int, None] = None,
1653
1625
  chunksize: Optional[int] = -1,
1654
1626
  check_existing: bool = True,
1655
1627
  blocking: bool = True,
@@ -1669,11 +1641,11 @@ def sync_pipe(
1669
1641
  An optional DataFrame or equivalent to sync into the pipe.
1670
1642
  Defaults to `None`.
1671
1643
 
1672
- begin: Optional[datetime], default None
1644
+ begin: Union[datetime, int, None], default None
1673
1645
  Optionally specify the earliest datetime to search for data.
1674
1646
  Defaults to `None`.
1675
1647
 
1676
- end: Optional[datetime], default None
1648
+ end: Union[datetime, int, None], default None
1677
1649
  Optionally specify the latest datetime to search for data.
1678
1650
  Defaults to `None`.
1679
1651
 
@@ -1707,8 +1679,9 @@ def sync_pipe(
1707
1679
  UPDATE_QUERIES,
1708
1680
  get_reset_autoincrement_queries,
1709
1681
  )
1710
- from meerschaum.utils.dtypes import are_dtypes_equal
1682
+ from meerschaum.utils.dtypes import get_current_timestamp
1711
1683
  from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
1684
+ from meerschaum.utils.dataframe import get_special_cols
1712
1685
  from meerschaum import Pipe
1713
1686
  import time
1714
1687
  import copy
@@ -1720,6 +1693,7 @@ def sync_pipe(
1720
1693
 
1721
1694
  start = time.perf_counter()
1722
1695
  pipe_name = sql_item_name(pipe.target, self.flavor, schema=self.get_pipe_schema(pipe))
1696
+ dtypes = pipe.get_dtypes(debug=debug)
1723
1697
 
1724
1698
  if not pipe.temporary and not pipe.get_id(debug=debug):
1725
1699
  register_tuple = pipe.register(debug=debug)
@@ -1736,6 +1710,7 @@ def sync_pipe(
1736
1710
  df,
1737
1711
  chunksize=chunksize,
1738
1712
  safe_copy=kw.get('safe_copy', False),
1713
+ dtypes=dtypes,
1739
1714
  debug=debug,
1740
1715
  )
1741
1716
 
@@ -1748,36 +1723,17 @@ def sync_pipe(
1748
1723
  ### Check for new columns.
1749
1724
  add_cols_queries = self.get_add_columns_queries(pipe, df, debug=debug)
1750
1725
  if add_cols_queries:
1751
- _ = pipe.__dict__.pop('_columns_indices', None)
1752
- _ = pipe.__dict__.pop('_columns_types', None)
1726
+ pipe._clear_cache_key('_columns_types', debug=debug)
1727
+ pipe._clear_cache_key('_columns_indices', debug=debug)
1753
1728
  if not self.exec_queries(add_cols_queries, debug=debug):
1754
1729
  warn(f"Failed to add new columns to {pipe}.")
1755
1730
 
1756
1731
  alter_cols_queries = self.get_alter_columns_queries(pipe, df, debug=debug)
1757
1732
  if alter_cols_queries:
1758
- _ = pipe.__dict__.pop('_columns_indices', None)
1759
- _ = pipe.__dict__.pop('_columns_types', None)
1733
+ pipe._clear_cache_key('_columns_types', debug=debug)
1734
+ pipe._clear_cache_key('_columns_types', debug=debug)
1760
1735
  if not self.exec_queries(alter_cols_queries, debug=debug):
1761
1736
  warn(f"Failed to alter columns for {pipe}.")
1762
- else:
1763
- _ = pipe.infer_dtypes(persist=True)
1764
-
1765
- ### NOTE: Oracle SQL < 23c (2023) and SQLite does not support booleans,
1766
- ### so infer bools and persist them to `dtypes`.
1767
- if self.flavor in ('oracle', 'sqlite', 'mysql', 'mariadb'):
1768
- pipe_dtypes = pipe.dtypes
1769
- new_bool_cols = {
1770
- col: 'bool[pyarrow]'
1771
- for col, typ in df.dtypes.items()
1772
- if col not in pipe_dtypes
1773
- and are_dtypes_equal(str(typ), 'bool')
1774
- }
1775
- pipe_dtypes.update(new_bool_cols)
1776
- pipe.dtypes = pipe_dtypes
1777
- if new_bool_cols and not pipe.temporary:
1778
- infer_bool_success, infer_bool_msg = pipe.edit(debug=debug)
1779
- if not infer_bool_success:
1780
- return infer_bool_success, infer_bool_msg
1781
1737
 
1782
1738
  upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in UPDATE_QUERIES
1783
1739
  if upsert:
@@ -1807,7 +1763,7 @@ def sync_pipe(
1807
1763
  if 'name' in kw:
1808
1764
  kw.pop('name')
1809
1765
 
1810
- ### Insert new data into Pipe's table.
1766
+ ### Insert new data into the target table.
1811
1767
  unseen_kw = copy.deepcopy(kw)
1812
1768
  unseen_kw.update({
1813
1769
  'name': pipe.target,
@@ -1828,15 +1784,17 @@ def sync_pipe(
1828
1784
  is_new
1829
1785
  and primary_key
1830
1786
  and primary_key
1831
- not in pipe.dtypes
1787
+ not in dtypes
1832
1788
  and primary_key not in unseen_df.columns
1833
1789
  )
1834
1790
  )
1835
1791
  if autoincrement and autoincrement not in pipe.parameters:
1836
- pipe.parameters['autoincrement'] = autoincrement
1837
- edit_success, edit_msg = pipe.edit(debug=debug)
1838
- if not edit_success:
1839
- return edit_success, edit_msg
1792
+ update_success, update_msg = pipe.update_parameters(
1793
+ {'autoincrement': autoincrement},
1794
+ debug=debug,
1795
+ )
1796
+ if not update_success:
1797
+ return update_success, update_msg
1840
1798
 
1841
1799
  def _check_pk(_df_to_clear):
1842
1800
  if _df_to_clear is None:
@@ -1926,6 +1884,14 @@ def sync_pipe(
1926
1884
  label=('update' if not upsert else 'upsert'),
1927
1885
  )
1928
1886
  self._log_temporary_tables_creation(temp_target, create=(not pipe.temporary), debug=debug)
1887
+ update_dtypes = {
1888
+ **{
1889
+ col: str(typ)
1890
+ for col, typ in update_df.dtypes.items()
1891
+ },
1892
+ **get_special_cols(update_df)
1893
+ }
1894
+
1929
1895
  temp_pipe = Pipe(
1930
1896
  pipe.connector_keys.replace(':', '_') + '_', pipe.metric_key, pipe.location_key,
1931
1897
  instance=pipe.instance_keys,
@@ -1934,34 +1900,30 @@ def sync_pipe(
1934
1900
  for ix_key, ix in pipe.columns.items()
1935
1901
  if ix and ix in update_df.columns
1936
1902
  },
1937
- dtypes={
1938
- col: typ
1939
- for col, typ in pipe.dtypes.items()
1940
- if col in update_df.columns
1941
- },
1903
+ dtypes=update_dtypes,
1942
1904
  target=temp_target,
1943
1905
  temporary=True,
1944
1906
  enforce=False,
1945
1907
  static=True,
1946
1908
  autoincrement=False,
1909
+ cache=False,
1947
1910
  parameters={
1948
1911
  'schema': self.internal_schema,
1949
1912
  'hypertable': False,
1950
1913
  },
1951
1914
  )
1952
- temp_pipe.__dict__['_columns_types'] = {
1953
- col: get_db_type_from_pd_type(
1954
- pipe.dtypes.get(col, str(typ)),
1955
- self.flavor,
1956
- )
1957
- for col, typ in update_df.dtypes.items()
1915
+ _temp_columns_types = {
1916
+ col: get_db_type_from_pd_type(typ, self.flavor)
1917
+ for col, typ in update_dtypes.items()
1958
1918
  }
1959
- now_ts = time.perf_counter()
1960
- temp_pipe.__dict__['_columns_types_timestamp'] = now_ts
1961
- temp_pipe.__dict__['_skip_check_indices'] = True
1919
+ temp_pipe._cache_value('_columns_types', _temp_columns_types, memory_only=True, debug=debug)
1920
+ temp_pipe._cache_value('_skip_check_indices', True, memory_only=True, debug=debug)
1921
+ now_ts = get_current_timestamp('ms', as_int=True) / 1000
1922
+ temp_pipe._cache_value('_columns_types_timestamp', now_ts, memory_only=True, debug=debug)
1962
1923
  temp_success, temp_msg = temp_pipe.sync(update_df, check_existing=False, debug=debug)
1963
1924
  if not temp_success:
1964
1925
  return temp_success, temp_msg
1926
+
1965
1927
  existing_cols = pipe.get_columns_types(debug=debug)
1966
1928
  join_cols = [
1967
1929
  col
@@ -1969,7 +1931,11 @@ def sync_pipe(
1969
1931
  if col and col in existing_cols
1970
1932
  ] if not primary_key or self.flavor == 'oracle' else (
1971
1933
  [dt_col, primary_key]
1972
- if self.flavor == 'timescaledb' and dt_col and dt_col in update_df.columns
1934
+ if (
1935
+ self.flavor in ('timescaledb', 'timescaledb-ha')
1936
+ and dt_col
1937
+ and dt_col in update_df.columns
1938
+ )
1973
1939
  else [primary_key]
1974
1940
  )
1975
1941
  update_queries = get_update_queries(
@@ -1980,6 +1946,8 @@ def sync_pipe(
1980
1946
  upsert=upsert,
1981
1947
  schema=self.get_pipe_schema(pipe),
1982
1948
  patch_schema=self.internal_schema,
1949
+ target_cols_types=pipe.get_columns_types(debug=debug),
1950
+ patch_cols_types=_temp_columns_types,
1983
1951
  datetime_col=(dt_col if dt_col in update_df.columns else None),
1984
1952
  identity_insert=(autoincrement and primary_key in update_df.columns),
1985
1953
  null_indices=pipe.null_indices,
@@ -2267,13 +2235,13 @@ def sync_pipe_inplace(
2267
2235
 
2268
2236
  add_cols_queries = self.get_add_columns_queries(pipe, new_cols, debug=debug)
2269
2237
  if add_cols_queries:
2270
- _ = pipe.__dict__.pop('_columns_types', None)
2271
- _ = pipe.__dict__.pop('_columns_indices', None)
2238
+ pipe._clear_cache_key('_columns_types', debug=debug)
2239
+ pipe._clear_cache_key('_columns_indices', debug=debug)
2272
2240
  self.exec_queries(add_cols_queries, debug=debug)
2273
2241
 
2274
2242
  alter_cols_queries = self.get_alter_columns_queries(pipe, new_cols, debug=debug)
2275
2243
  if alter_cols_queries:
2276
- _ = pipe.__dict__.pop('_columns_types', None)
2244
+ pipe._clear_cache_key('_columns_types', debug=debug)
2277
2245
  self.exec_queries(alter_cols_queries, debug=debug)
2278
2246
 
2279
2247
  insert_queries = [
@@ -2576,6 +2544,8 @@ def sync_pipe_inplace(
2576
2544
  upsert=upsert,
2577
2545
  schema=self.get_pipe_schema(pipe),
2578
2546
  patch_schema=internal_schema,
2547
+ target_cols_types=pipe.get_columns_types(debug=debug),
2548
+ patch_cols_types=delta_cols_types,
2579
2549
  datetime_col=pipe.columns.get('datetime', None),
2580
2550
  flavor=self.flavor,
2581
2551
  null_indices=pipe.null_indices,
@@ -2779,7 +2749,6 @@ def pipe_exists(
2779
2749
  debug=debug,
2780
2750
  )
2781
2751
  if debug:
2782
- from meerschaum.utils.debug import dprint
2783
2752
  dprint(f"{pipe} " + ('exists.' if exists else 'does not exist.'))
2784
2753
  return exists
2785
2754
 
@@ -2833,7 +2802,6 @@ def get_pipe_rowcount(
2833
2802
  error(msg)
2834
2803
  return None
2835
2804
 
2836
-
2837
2805
  flavor = self.flavor if not remote else pipe.connector.flavor
2838
2806
  conn = self if not remote else pipe.connector
2839
2807
  _pipe_name = sql_item_name(pipe.target, flavor, self.get_pipe_schema(pipe))
@@ -3068,6 +3036,7 @@ def get_pipe_table(
3068
3036
  from meerschaum.utils.sql import get_sqlalchemy_table
3069
3037
  if not pipe.exists(debug=debug):
3070
3038
  return None
3039
+
3071
3040
  return get_sqlalchemy_table(
3072
3041
  pipe.target,
3073
3042
  connector=self,
@@ -3117,11 +3086,19 @@ def get_pipe_columns_types(
3117
3086
  debug=debug,
3118
3087
  )
3119
3088
 
3089
+ if debug:
3090
+ dprint(f"Fetching columns_types for {pipe} with via SQLAlchemy table.")
3091
+
3120
3092
  table_columns = {}
3121
3093
  try:
3122
3094
  pipe_table = self.get_pipe_table(pipe, debug=debug)
3123
3095
  if pipe_table is None:
3124
3096
  return {}
3097
+
3098
+ if debug:
3099
+ dprint("Found columns:")
3100
+ mrsm.pprint(dict(pipe_table.columns))
3101
+
3125
3102
  for col in pipe_table.columns:
3126
3103
  table_columns[str(col.name)] = str(col.type)
3127
3104
  except Exception as e:
@@ -3153,6 +3130,7 @@ def get_pipe_columns_indices(
3153
3130
  """
3154
3131
  if pipe.__dict__.get('_skip_check_indices', False):
3155
3132
  return {}
3133
+
3156
3134
  from meerschaum.utils.sql import get_table_cols_indices
3157
3135
  return get_table_cols_indices(
3158
3136
  pipe.target,
@@ -3207,7 +3185,6 @@ def get_add_columns_queries(
3207
3185
  get_db_type_from_pd_type,
3208
3186
  )
3209
3187
  from meerschaum.utils.misc import flatten_list
3210
- table_obj = self.get_pipe_table(pipe, debug=debug)
3211
3188
  is_dask = 'dask' in df.__module__ if not isinstance(df, dict) else False
3212
3189
  if is_dask:
3213
3190
  df = df.partitions[0].compute()
@@ -3231,9 +3208,6 @@ def get_add_columns_queries(
3231
3208
  elif isinstance(val, str):
3232
3209
  df_cols_types[col] = 'str'
3233
3210
  db_cols_types = {
3234
- col: get_pd_type_from_db_type(str(typ.type))
3235
- for col, typ in table_obj.columns.items()
3236
- } if table_obj is not None else {
3237
3211
  col: get_pd_type_from_db_type(typ)
3238
3212
  for col, typ in get_table_cols_types(
3239
3213
  pipe.target,
@@ -3313,10 +3287,9 @@ def get_alter_columns_queries(
3313
3287
  -------
3314
3288
  A list of the `ALTER TABLE` SQL query or queries to be executed on the provided connector.
3315
3289
  """
3316
- if not pipe.exists(debug=debug):
3290
+ if not pipe.exists(debug=debug) or pipe.static:
3317
3291
  return []
3318
- if pipe.static:
3319
- return
3292
+
3320
3293
  from meerschaum.utils.sql import (
3321
3294
  sql_item_name,
3322
3295
  get_table_cols_types,
@@ -3330,7 +3303,6 @@ def get_alter_columns_queries(
3330
3303
  get_db_type_from_pd_type,
3331
3304
  )
3332
3305
  from meerschaum.utils.misc import flatten_list, generate_password, items_str
3333
- table_obj = self.get_pipe_table(pipe, debug=debug)
3334
3306
  target = pipe.target
3335
3307
  session_id = generate_password(3)
3336
3308
  numeric_cols = (
@@ -3351,9 +3323,6 @@ def get_alter_columns_queries(
3351
3323
  else df
3352
3324
  )
3353
3325
  db_cols_types = {
3354
- col: get_pd_type_from_db_type(str(typ.type))
3355
- for col, typ in table_obj.columns.items()
3356
- } if table_obj is not None else {
3357
3326
  col: get_pd_type_from_db_type(typ)
3358
3327
  for col, typ in get_table_cols_types(
3359
3328
  pipe.target,
@@ -3362,7 +3331,8 @@ def get_alter_columns_queries(
3362
3331
  debug=debug,
3363
3332
  ).items()
3364
3333
  }
3365
- pipe_bool_cols = [col for col, typ in pipe.dtypes.items() if are_dtypes_equal(str(typ), 'bool')]
3334
+ pipe_dtypes = pipe.get_dtypes(debug=debug)
3335
+ pipe_bool_cols = [col for col, typ in pipe_dtypes.items() if are_dtypes_equal(str(typ), 'bool')]
3366
3336
  pd_db_df_aliases = {
3367
3337
  'int': 'bool',
3368
3338
  'float': 'bool',
@@ -3370,7 +3340,11 @@ def get_alter_columns_queries(
3370
3340
  'guid': 'object',
3371
3341
  }
3372
3342
  if self.flavor == 'oracle':
3373
- pd_db_df_aliases['int'] = 'numeric'
3343
+ pd_db_df_aliases.update({
3344
+ 'int': 'numeric',
3345
+ 'date': 'datetime',
3346
+ 'numeric': 'int',
3347
+ })
3374
3348
 
3375
3349
  altered_cols = {
3376
3350
  col: (db_cols_types.get(col, 'object'), typ)
@@ -3379,11 +3353,33 @@ def get_alter_columns_queries(
3379
3353
  and not are_dtypes_equal(db_cols_types.get(col, 'object'), 'string')
3380
3354
  }
3381
3355
 
3356
+ if debug and altered_cols:
3357
+ dprint("Columns to be altered:")
3358
+ mrsm.pprint(altered_cols)
3359
+
3360
+ ### NOTE: Special columns (numerics, bools, etc.) are captured and cached upon detection.
3361
+ new_special_cols = pipe._get_cached_value('new_special_cols', debug=debug) or {}
3362
+ new_special_db_cols_types = {
3363
+ col: (db_cols_types.get(col, 'object'), typ)
3364
+ for col, typ in new_special_cols.items()
3365
+ }
3366
+ if debug:
3367
+ dprint("Cached new special columns:")
3368
+ mrsm.pprint(new_special_cols)
3369
+ dprint("New special columns db types:")
3370
+ mrsm.pprint(new_special_db_cols_types)
3371
+
3372
+ altered_cols.update(new_special_db_cols_types)
3373
+
3382
3374
  ### NOTE: Sometimes bools are coerced into ints or floats.
3383
3375
  altered_cols_to_ignore = set()
3384
3376
  for col, (db_typ, df_typ) in altered_cols.items():
3385
3377
  for db_alias, df_alias in pd_db_df_aliases.items():
3386
- if db_alias in db_typ.lower() and df_alias in df_typ.lower():
3378
+ if (
3379
+ db_alias in db_typ.lower()
3380
+ and df_alias in df_typ.lower()
3381
+ and col not in new_special_cols
3382
+ ):
3387
3383
  altered_cols_to_ignore.add(col)
3388
3384
 
3389
3385
  ### Oracle's bool handling sometimes mixes NUMBER and INT.
@@ -3405,21 +3401,29 @@ def get_alter_columns_queries(
3405
3401
  if db_is_bool_compatible and df_is_bool_compatible:
3406
3402
  altered_cols_to_ignore.add(bool_col)
3407
3403
 
3404
+ if debug and altered_cols_to_ignore:
3405
+ dprint("Ignoring the following altered columns (false positives).")
3406
+ mrsm.pprint(altered_cols_to_ignore)
3407
+
3408
3408
  for col in altered_cols_to_ignore:
3409
3409
  _ = altered_cols.pop(col, None)
3410
+
3410
3411
  if not altered_cols:
3411
3412
  return []
3412
3413
 
3413
3414
  if numeric_cols:
3414
- pipe.dtypes.update({col: 'numeric' for col in numeric_cols})
3415
- edit_success, edit_msg = pipe.edit(debug=debug)
3416
- if not edit_success:
3417
- warn(
3418
- f"Failed to update dtypes for numeric columns {items_str(numeric_cols)}:\n"
3419
- + f"{edit_msg}"
3420
- )
3415
+ explicit_pipe_dtypes = pipe.get_dtypes(infer=False, debug=debug)
3416
+ explicit_pipe_dtypes.update({col: 'numeric' for col in numeric_cols})
3417
+ pipe.dtypes = explicit_pipe_dtypes
3418
+ if not pipe.temporary:
3419
+ edit_success, edit_msg = pipe.edit(debug=debug)
3420
+ if not edit_success:
3421
+ warn(
3422
+ f"Failed to update dtypes for numeric columns {items_str(numeric_cols)}:\n"
3423
+ + f"{edit_msg}"
3424
+ )
3421
3425
  else:
3422
- numeric_cols.extend([col for col, typ in pipe.dtypes.items() if typ.startswith('numeric')])
3426
+ numeric_cols.extend([col for col, typ in pipe_dtypes.items() if typ.startswith('numeric')])
3423
3427
 
3424
3428
  numeric_type = get_db_type_from_pd_type('numeric', self.flavor, as_sqlalchemy=False)
3425
3429
  text_type = get_db_type_from_pd_type('str', self.flavor, as_sqlalchemy=False)
@@ -3445,12 +3449,12 @@ def get_alter_columns_queries(
3445
3449
  + sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
3446
3450
  + " (\n"
3447
3451
  )
3448
- for col_name, col_obj in table_obj.columns.items():
3452
+ for col_name, col_typ in db_cols_types.items():
3449
3453
  create_query += (
3450
3454
  sql_item_name(col_name, self.flavor, None)
3451
3455
  + " "
3452
3456
  + (
3453
- str(col_obj.type)
3457
+ col_typ
3454
3458
  if col_name not in altered_cols
3455
3459
  else altered_cols_types[col_name]
3456
3460
  )
@@ -3464,12 +3468,12 @@ def get_alter_columns_queries(
3464
3468
  + ' ('
3465
3469
  + ', '.join([
3466
3470
  sql_item_name(col_name, self.flavor, None)
3467
- for col_name, _ in table_obj.columns.items()
3471
+ for col_name in db_cols_types
3468
3472
  ])
3469
3473
  + ')'
3470
3474
  + "\nSELECT\n"
3471
3475
  )
3472
- for col_name, col_obj in table_obj.columns.items():
3476
+ for col_name in db_cols_types:
3473
3477
  new_col_str = (
3474
3478
  sql_item_name(col_name, self.flavor, None)
3475
3479
  if col_name not in altered_cols
@@ -3482,6 +3486,7 @@ def get_alter_columns_queries(
3482
3486
  )
3483
3487
  )
3484
3488
  insert_query += new_col_str + ",\n"
3489
+
3485
3490
  insert_query = insert_query[:-2] + (
3486
3491
  f"\nFROM {sql_item_name(temp_table_name, self.flavor, self.get_pipe_schema(pipe))}"
3487
3492
  )
@@ -3627,20 +3632,18 @@ def get_to_sql_dtype(
3627
3632
  >>> get_to_sql_dtype(pipe, df)
3628
3633
  {'a': <class 'sqlalchemy.sql.sqltypes.JSON'>}
3629
3634
  """
3630
- from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
3635
+ from meerschaum.utils.dataframe import get_special_cols
3631
3636
  from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
3632
3637
  df_dtypes = {
3633
3638
  col: str(typ)
3634
3639
  for col, typ in df.dtypes.items()
3635
3640
  }
3636
- json_cols = get_json_cols(df)
3637
- numeric_cols = get_numeric_cols(df)
3638
- uuid_cols = get_uuid_cols(df)
3639
- df_dtypes.update({col: 'json' for col in json_cols})
3640
- df_dtypes.update({col: 'numeric' for col in numeric_cols})
3641
- df_dtypes.update({col: 'uuid' for col in uuid_cols})
3641
+ special_cols = get_special_cols(df)
3642
+ df_dtypes.update(special_cols)
3643
+
3642
3644
  if update_dtypes:
3643
3645
  df_dtypes.update(pipe.dtypes)
3646
+
3644
3647
  return {
3645
3648
  col: get_db_type_from_pd_type(typ, self.flavor, as_sqlalchemy=True)
3646
3649
  for col, typ in df_dtypes.items()
@@ -3881,13 +3884,15 @@ def get_pipe_schema(self, pipe: mrsm.Pipe) -> Union[str, None]:
3881
3884
  -------
3882
3885
  A schema string or `None` if nothing is configured.
3883
3886
  """
3887
+ if self.flavor == 'sqlite':
3888
+ return self.schema
3884
3889
  return pipe.parameters.get('schema', self.schema)
3885
3890
 
3886
3891
 
3887
3892
  @staticmethod
3888
3893
  def get_temporary_target(
3889
3894
  target: str,
3890
- transact_id: Optional[str, None] = None,
3895
+ transact_id: Optional[str] = None,
3891
3896
  label: Optional[str] = None,
3892
3897
  separator: Optional[str] = None,
3893
3898
  ) -> str:
@@ -3909,3 +3914,15 @@ def get_temporary_target(
3909
3914
  + transact_id
3910
3915
  + ((separator + label) if label else '')
3911
3916
  )
3917
+
3918
+
3919
+ def _enforce_pipe_dtypes_chunks_hook(
3920
+ pipe: mrsm.Pipe,
3921
+ chunk_df: 'pd.DataFrame',
3922
+ debug: bool = False,
3923
+ **kwargs
3924
+ ) -> 'pd.DataFrame':
3925
+ """
3926
+ Enforce a pipe's dtypes on each chunk.
3927
+ """
3928
+ return pipe.enforce_dtypes(chunk_df, debug=debug)