meerschaum 2.7.2__py3-none-any.whl → 2.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -391,7 +391,7 @@ def get_create_index_queries(
391
391
  from meerschaum.utils.sql import (
392
392
  sql_item_name,
393
393
  get_distinct_col_count,
394
- update_queries,
394
+ UPDATE_QUERIES,
395
395
  get_null_replacement,
396
396
  get_create_table_queries,
397
397
  get_rename_table_queries,
@@ -405,7 +405,7 @@ def get_create_index_queries(
405
405
  from meerschaum.config import get_config
406
406
  index_queries = {}
407
407
 
408
- upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in update_queries
408
+ upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in UPDATE_QUERIES
409
409
  static = pipe.parameters.get('static', False)
410
410
  index_names = pipe.get_indices()
411
411
  indices = pipe.indices
@@ -512,7 +512,7 @@ def get_create_index_queries(
512
512
  + 'if_not_exists => true, '
513
513
  + "migrate_data => true);"
514
514
  )
515
- elif _datetime_index_name:
515
+ elif _datetime_index_name and _datetime != primary_key:
516
516
  if self.flavor == 'mssql':
517
517
  dt_query = (
518
518
  f"CREATE {datetime_clustered} INDEX {_datetime_index_name} "
@@ -1105,12 +1105,13 @@ def get_pipe_data_query(
1105
1105
  from meerschaum.utils.misc import items_str
1106
1106
  from meerschaum.utils.sql import sql_item_name, dateadd_str
1107
1107
  from meerschaum.utils.dtypes import coerce_timezone
1108
- from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
1108
+ from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type, get_db_type_from_pd_type
1109
1109
 
1110
1110
  dt_col = pipe.columns.get('datetime', None)
1111
1111
  existing_cols = pipe.get_columns_types(debug=debug) if pipe.enforce else []
1112
1112
  skip_existing_cols_check = skip_existing_cols_check or not pipe.enforce
1113
1113
  dt_typ = get_pd_type_from_db_type(existing_cols[dt_col]) if dt_col in existing_cols else None
1114
+ dt_db_type = get_db_type_from_pd_type(dt_typ, self.flavor) if dt_typ else None
1114
1115
  select_columns = (
1115
1116
  [col for col in existing_cols]
1116
1117
  if not select_columns
@@ -1200,6 +1201,7 @@ def get_pipe_data_query(
1200
1201
  datepart='minute',
1201
1202
  number=begin_add_minutes,
1202
1203
  begin=begin,
1204
+ db_type=dt_db_type,
1203
1205
  )
1204
1206
  where += f"\n {dt} >= {begin_da}" + ("\n AND\n " if end is not None else "")
1205
1207
  is_dt_bound = True
@@ -1211,7 +1213,8 @@ def get_pipe_data_query(
1211
1213
  flavor=self.flavor,
1212
1214
  datepart='minute',
1213
1215
  number=end_add_minutes,
1214
- begin=end
1216
+ begin=end,
1217
+ db_type=dt_db_type,
1215
1218
  )
1216
1219
  where += f"{dt} < {end_da}"
1217
1220
  is_dt_bound = True
@@ -1362,7 +1365,18 @@ def create_pipe_table_from_df(
1362
1365
  get_bytes_cols,
1363
1366
  )
1364
1367
  from meerschaum.utils.sql import get_create_table_queries, sql_item_name
1368
+ from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
1365
1369
  primary_key = pipe.columns.get('primary', None)
1370
+ primary_key_typ = (
1371
+ pipe.dtypes.get(primary_key, str(df.dtypes.get(primary_key)))
1372
+ if primary_key
1373
+ else None
1374
+ )
1375
+ primary_key_db_type = (
1376
+ get_db_type_from_pd_type(primary_key_typ, self.flavor)
1377
+ if primary_key
1378
+ else None
1379
+ )
1366
1380
  dt_col = pipe.columns.get('datetime', None)
1367
1381
  new_dtypes = {
1368
1382
  **{
@@ -1413,6 +1427,7 @@ def create_pipe_table_from_df(
1413
1427
  self.flavor,
1414
1428
  schema=self.get_pipe_schema(pipe),
1415
1429
  primary_key=primary_key,
1430
+ primary_key_db_type=primary_key_db_type,
1416
1431
  datetime_column=dt_col,
1417
1432
  )
1418
1433
  success = all(
@@ -1487,7 +1502,7 @@ def sync_pipe(
1487
1502
  from meerschaum.utils.sql import (
1488
1503
  get_update_queries,
1489
1504
  sql_item_name,
1490
- update_queries,
1505
+ UPDATE_QUERIES,
1491
1506
  get_reset_autoincrement_queries,
1492
1507
  )
1493
1508
  from meerschaum.utils.misc import generate_password
@@ -1563,7 +1578,7 @@ def sync_pipe(
1563
1578
  if not infer_bool_success:
1564
1579
  return infer_bool_success, infer_bool_msg
1565
1580
 
1566
- upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in update_queries
1581
+ upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in UPDATE_QUERIES
1567
1582
  if upsert:
1568
1583
  check_existing = False
1569
1584
  kw['safe_copy'] = kw.get('safe_copy', False)
@@ -1794,12 +1809,12 @@ def sync_pipe(
1794
1809
  update_count = len(update_df.index) if update_df is not None else 0
1795
1810
  msg = (
1796
1811
  (
1797
- f"Inserted {unseen_count}, "
1798
- + f"updated {update_count} rows."
1812
+ f"Inserted {unseen_count:,}, "
1813
+ + f"updated {update_count:,} rows."
1799
1814
  )
1800
1815
  if not upsert
1801
1816
  else (
1802
- f"Upserted {update_count} row"
1817
+ f"Upserted {update_count:,} row"
1803
1818
  + ('s' if update_count != 1 else '')
1804
1819
  + "."
1805
1820
  )
@@ -1886,10 +1901,12 @@ def sync_pipe_inplace(
1886
1901
  get_create_table_queries,
1887
1902
  get_table_cols_types,
1888
1903
  session_execute,
1889
- update_queries,
1904
+ dateadd_str,
1905
+ UPDATE_QUERIES,
1890
1906
  )
1891
1907
  from meerschaum.utils.dtypes.sql import (
1892
1908
  get_pd_type_from_db_type,
1909
+ get_db_type_from_pd_type,
1893
1910
  )
1894
1911
  from meerschaum.utils.misc import generate_password
1895
1912
 
@@ -1905,13 +1922,14 @@ def sync_pipe_inplace(
1905
1922
  for table_root in temp_table_roots
1906
1923
  }
1907
1924
  temp_table_names = {
1908
- table_root: sql_item_name(
1909
- table_name_raw,
1910
- self.flavor,
1911
- internal_schema,
1912
- )
1925
+ table_root: sql_item_name(table_name_raw, self.flavor, internal_schema)
1913
1926
  for table_root, table_name_raw in temp_tables.items()
1914
1927
  }
1928
+ temp_table_aliases = {
1929
+ table_root: sql_item_name(table_root, self.flavor)
1930
+ for table_root in temp_table_roots
1931
+ }
1932
+ table_alias_as = " AS" if self.flavor != 'oracle' else ''
1915
1933
  metadef = self.get_pipe_metadef(
1916
1934
  pipe,
1917
1935
  params=params,
@@ -1921,13 +1939,21 @@ def sync_pipe_inplace(
1921
1939
  debug=debug,
1922
1940
  )
1923
1941
  pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
1924
- upsert = pipe.parameters.get('upsert', False) and f'{self.flavor}-upsert' in update_queries
1942
+ upsert = pipe.parameters.get('upsert', False) and f'{self.flavor}-upsert' in UPDATE_QUERIES
1925
1943
  static = pipe.parameters.get('static', False)
1926
1944
  database = getattr(self, 'database', self.parse_uri(self.URI).get('database', None))
1927
1945
  primary_key = pipe.columns.get('primary', None)
1946
+ primary_key_typ = pipe.dtypes.get(primary_key, None) if primary_key else None
1947
+ primary_key_db_type = (
1948
+ get_db_type_from_pd_type(primary_key_typ, self.flavor)
1949
+ if primary_key_typ
1950
+ else None
1951
+ )
1928
1952
  autoincrement = pipe.parameters.get('autoincrement', False)
1929
1953
  dt_col = pipe.columns.get('datetime', None)
1930
1954
  dt_col_name = sql_item_name(dt_col, self.flavor, None) if dt_col else None
1955
+ dt_typ = pipe.dtypes.get(dt_col, 'datetime') if dt_col else None
1956
+ dt_db_type = get_db_type_from_pd_type(dt_typ, self.flavor) if dt_typ else None
1931
1957
 
1932
1958
  def clean_up_temp_tables(ready_to_drop: bool = False):
1933
1959
  log_success, log_msg = self._log_temporary_tables_creation(
@@ -1957,6 +1983,7 @@ def sync_pipe_inplace(
1957
1983
  self.flavor,
1958
1984
  schema=self.get_pipe_schema(pipe),
1959
1985
  primary_key=primary_key,
1986
+ primary_key_db_type=primary_key_db_type,
1960
1987
  autoincrement=autoincrement,
1961
1988
  datetime_column=dt_col,
1962
1989
  )
@@ -1970,7 +1997,7 @@ def sync_pipe_inplace(
1970
1997
 
1971
1998
  rowcount = pipe.get_rowcount(debug=debug)
1972
1999
  _ = clean_up_temp_tables()
1973
- return True, f"Inserted {rowcount}, updated 0 rows."
2000
+ return True, f"Inserted {rowcount:,}, updated 0 rows."
1974
2001
 
1975
2002
  session = sqlalchemy_orm.Session(self.engine)
1976
2003
  connectable = session if self.flavor != 'duckdb' else self
@@ -2007,7 +2034,7 @@ def sync_pipe_inplace(
2007
2034
  str(col_name): get_pd_type_from_db_type(str(col_type))
2008
2035
  for col_name, col_type in new_cols_types.items()
2009
2036
  }
2010
- new_cols_str = ', '.join([
2037
+ new_cols_str = '\n ' + ',\n '.join([
2011
2038
  sql_item_name(col, self.flavor)
2012
2039
  for col in new_cols
2013
2040
  ])
@@ -2030,7 +2057,8 @@ def sync_pipe_inplace(
2030
2057
  insert_queries = [
2031
2058
  (
2032
2059
  f"INSERT INTO {pipe_name} ({new_cols_str})\n"
2033
- + f"SELECT {new_cols_str}\nFROM {temp_table_names['new']}"
2060
+ f"SELECT {new_cols_str}\nFROM {temp_table_names['new']}{table_alias_as}"
2061
+ f" {temp_table_aliases['new']}"
2034
2062
  )
2035
2063
  ] if not check_existing and not upsert else []
2036
2064
 
@@ -2049,12 +2077,13 @@ def sync_pipe_inplace(
2049
2077
  _ = clean_up_temp_tables()
2050
2078
  return True, f"Inserted {new_count}, updated 0 rows."
2051
2079
 
2080
+ dt_col_name_da = dateadd_str(flavor=self.flavor, begin=dt_col_name, db_type=dt_db_type)
2052
2081
  (new_dt_bounds_success, new_dt_bounds_msg), new_dt_bounds_results = session_execute(
2053
2082
  session,
2054
2083
  [
2055
2084
  "SELECT\n"
2056
- f" MIN({dt_col_name}) AS {sql_item_name('min_dt', self.flavor)},\n"
2057
- f" MAX({dt_col_name}) AS {sql_item_name('max_dt', self.flavor)}\n"
2085
+ f" MIN({dt_col_name_da}) AS {sql_item_name('min_dt', self.flavor)},\n"
2086
+ f" MAX({dt_col_name_da}) AS {sql_item_name('max_dt', self.flavor)}\n"
2058
2087
  f"FROM {temp_table_names['new' if not upsert else 'update']}\n"
2059
2088
  f"WHERE {dt_col_name} IS NOT NULL"
2060
2089
  ],
@@ -2121,9 +2150,9 @@ def sync_pipe_inplace(
2121
2150
  } if not primary_key or self.flavor == 'oracle' else {primary_key: new_cols.get(primary_key)}
2122
2151
 
2123
2152
  null_replace_new_cols_str = (
2124
- ', '.join([
2125
- f"COALESCE({temp_table_names['new']}.{sql_item_name(col, self.flavor, None)}, "
2126
- + get_null_replacement(get_col_typ(col, new_cols), self.flavor)
2153
+ '\n ' + ',\n '.join([
2154
+ f"COALESCE({temp_table_aliases['new']}.{sql_item_name(col, self.flavor)}, "
2155
+ + get_null_replacement(get_col_typ(col, new_cols_types), self.flavor)
2127
2156
  + ") AS "
2128
2157
  + sql_item_name(col, self.flavor, None)
2129
2158
  for col, typ in new_cols.items()
@@ -2131,29 +2160,30 @@ def sync_pipe_inplace(
2131
2160
  )
2132
2161
 
2133
2162
  select_delta_query = (
2134
- "SELECT\n"
2135
- + null_replace_new_cols_str + "\n"
2136
- + f"\nFROM {temp_table_names['new']}\n"
2137
- + f"LEFT OUTER JOIN {temp_table_names['backtrack']}\nON\n"
2138
- + '\nAND\n'.join([
2163
+ "SELECT"
2164
+ + null_replace_new_cols_str
2165
+ + f"\nFROM {temp_table_names['new']}{table_alias_as} {temp_table_aliases['new']}\n"
2166
+ + f"LEFT OUTER JOIN {temp_table_names['backtrack']}{table_alias_as} {temp_table_aliases['backtrack']}"
2167
+ + "\n ON\n "
2168
+ + '\n AND\n '.join([
2139
2169
  (
2140
- f"COALESCE({temp_table_names['new']}."
2170
+ f" COALESCE({temp_table_aliases['new']}."
2141
2171
  + sql_item_name(c, self.flavor, None)
2142
2172
  + ", "
2143
- + get_null_replacement(get_col_typ(c, new_cols), self.flavor)
2144
- + ") "
2145
- + ' = '
2146
- + f"COALESCE({temp_table_names['backtrack']}."
2173
+ + get_null_replacement(get_col_typ(c, new_cols_types), self.flavor)
2174
+ + ")"
2175
+ + '\n =\n '
2176
+ + f" COALESCE({temp_table_aliases['backtrack']}."
2147
2177
  + sql_item_name(c, self.flavor, None)
2148
2178
  + ", "
2149
- + get_null_replacement(backtrack_cols_types[c], self.flavor)
2179
+ + get_null_replacement(get_col_typ(c, backtrack_cols_types), self.flavor)
2150
2180
  + ") "
2151
2181
  ) for c in common_cols
2152
2182
  ])
2153
- + "\nWHERE\n"
2154
- + '\nAND\n'.join([
2183
+ + "\nWHERE\n "
2184
+ + '\n AND\n '.join([
2155
2185
  (
2156
- f"{temp_table_names['backtrack']}." + sql_item_name(c, self.flavor, None) + ' IS NULL'
2186
+ f"{temp_table_aliases['backtrack']}." + sql_item_name(c, self.flavor) + ' IS NULL'
2157
2187
  ) for c in common_cols
2158
2188
  ])
2159
2189
  )
@@ -2196,37 +2226,33 @@ def sync_pipe_inplace(
2196
2226
  ])
2197
2227
 
2198
2228
  select_joined_query = (
2199
- "SELECT "
2200
- + (', '.join([
2229
+ "SELECT\n "
2230
+ + (',\n '.join([
2201
2231
  (
2202
- f"{temp_table_names['delta']}." + sql_item_name(c, self.flavor, None)
2232
+ f"{temp_table_aliases['delta']}." + sql_item_name(c, self.flavor, None)
2203
2233
  + " AS " + sql_item_name(c + '_delta', self.flavor, None)
2204
2234
  ) for c in delta_cols
2205
2235
  ]))
2206
- + ", "
2207
- + (', '.join([
2236
+ + ",\n "
2237
+ + (',\n '.join([
2208
2238
  (
2209
- f"{temp_table_names['backtrack']}." + sql_item_name(c, self.flavor, None)
2239
+ f"{temp_table_aliases['backtrack']}." + sql_item_name(c, self.flavor, None)
2210
2240
  + " AS " + sql_item_name(c + '_backtrack', self.flavor, None)
2211
2241
  ) for c in backtrack_cols_types
2212
2242
  ]))
2213
- + f"\nFROM {temp_table_names['delta']}\n"
2214
- + f"LEFT OUTER JOIN {temp_table_names['backtrack']}\nON\n"
2215
- + '\nAND\n'.join([
2243
+ + f"\nFROM {temp_table_names['delta']}{table_alias_as} {temp_table_aliases['delta']}\n"
2244
+ + f"LEFT OUTER JOIN {temp_table_names['backtrack']}{table_alias_as}"
2245
+ + f" {temp_table_aliases['backtrack']}"
2246
+ + "\n ON\n "
2247
+ + '\n AND\n '.join([
2216
2248
  (
2217
- f"COALESCE({temp_table_names['delta']}." + sql_item_name(c, self.flavor, None)
2249
+ f" COALESCE({temp_table_aliases['delta']}." + sql_item_name(c, self.flavor)
2218
2250
  + ", "
2219
- + get_null_replacement(
2220
- get_col_typ(c, on_cols),
2221
- self.flavor
2222
- ) + ")"
2223
- + ' = '
2224
- + f"COALESCE({temp_table_names['backtrack']}." + sql_item_name(c, self.flavor, None)
2251
+ + get_null_replacement(get_col_typ(c, new_cols_types), self.flavor) + ")"
2252
+ + '\n =\n '
2253
+ + f" COALESCE({temp_table_aliases['backtrack']}." + sql_item_name(c, self.flavor)
2225
2254
  + ", "
2226
- + get_null_replacement(
2227
- get_col_typ(c, on_cols),
2228
- self.flavor
2229
- ) + ")"
2255
+ + get_null_replacement(get_col_typ(c, new_cols_types), self.flavor) + ")"
2230
2256
  ) for c, typ in on_cols.items()
2231
2257
  ])
2232
2258
  )
@@ -2247,19 +2273,19 @@ def sync_pipe_inplace(
2247
2273
  return create_joined_success, create_joined_msg
2248
2274
 
2249
2275
  select_unseen_query = (
2250
- "SELECT "
2251
- + (', '.join([
2276
+ "SELECT\n "
2277
+ + (',\n '.join([
2252
2278
  (
2253
- "CASE\n WHEN " + sql_item_name(c + '_delta', self.flavor, None)
2254
- + " != " + get_null_replacement(get_col_typ(c, delta_cols), self.flavor)
2279
+ "CASE\n WHEN " + sql_item_name(c + '_delta', self.flavor, None)
2280
+ + " != " + get_null_replacement(get_col_typ(c, delta_cols_types), self.flavor)
2255
2281
  + " THEN " + sql_item_name(c + '_delta', self.flavor, None)
2256
- + "\n ELSE NULL\nEND "
2282
+ + "\n ELSE NULL\n END"
2257
2283
  + " AS " + sql_item_name(c, self.flavor, None)
2258
2284
  ) for c, typ in delta_cols.items()
2259
2285
  ]))
2260
- + f"\nFROM {temp_table_names['joined']}\n"
2261
- + "WHERE "
2262
- + '\nAND\n'.join([
2286
+ + f"\nFROM {temp_table_names['joined']}{table_alias_as} {temp_table_aliases['joined']}\n"
2287
+ + "WHERE\n "
2288
+ + '\n AND\n '.join([
2263
2289
  (
2264
2290
  sql_item_name(c + '_backtrack', self.flavor, None) + ' IS NULL'
2265
2291
  ) for c in delta_cols
@@ -2282,19 +2308,19 @@ def sync_pipe_inplace(
2282
2308
  return create_unseen_success, create_unseen_msg
2283
2309
 
2284
2310
  select_update_query = (
2285
- "SELECT "
2286
- + (', '.join([
2311
+ "SELECT\n "
2312
+ + (',\n '.join([
2287
2313
  (
2288
- "CASE\n WHEN " + sql_item_name(c + '_delta', self.flavor, None)
2289
- + " != " + get_null_replacement(get_col_typ(c, delta_cols), self.flavor)
2314
+ "CASE\n WHEN " + sql_item_name(c + '_delta', self.flavor, None)
2315
+ + " != " + get_null_replacement(get_col_typ(c, delta_cols_types), self.flavor)
2290
2316
  + " THEN " + sql_item_name(c + '_delta', self.flavor, None)
2291
- + "\n ELSE NULL\nEND "
2317
+ + "\n ELSE NULL\n END"
2292
2318
  + " AS " + sql_item_name(c, self.flavor, None)
2293
2319
  ) for c, typ in delta_cols.items()
2294
2320
  ]))
2295
- + f"\nFROM {temp_table_names['joined']}\n"
2296
- + "WHERE "
2297
- + '\nOR\n'.join([
2321
+ + f"\nFROM {temp_table_names['joined']}{table_alias_as} {temp_table_aliases['joined']}\n"
2322
+ + "WHERE\n "
2323
+ + '\n OR\n '.join([
2298
2324
  (
2299
2325
  sql_item_name(c + '_backtrack', self.flavor, None) + ' IS NOT NULL'
2300
2326
  ) for c in delta_cols
@@ -2366,9 +2392,9 @@ def sync_pipe_inplace(
2366
2392
  session.commit()
2367
2393
 
2368
2394
  msg = (
2369
- f"Inserted {unseen_count}, updated {update_count} rows."
2395
+ f"Inserted {unseen_count:,}, updated {update_count:,} rows."
2370
2396
  if not upsert
2371
- else f"Upserted {update_count} row" + ('s' if update_count != 1 else '') + "."
2397
+ else f"Upserted {update_count:,} row" + ('s' if update_count != 1 else '') + "."
2372
2398
  )
2373
2399
  _ = clean_up_temp_tables(ready_to_drop=True)
2374
2400
 
@@ -2540,6 +2566,7 @@ def get_pipe_rowcount(
2540
2566
  """
2541
2567
  from meerschaum.utils.sql import dateadd_str, sql_item_name, wrap_query_with_cte
2542
2568
  from meerschaum.connectors.sql._fetch import get_pipe_query
2569
+ from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
2543
2570
  if remote:
2544
2571
  msg = f"'fetch:definition' must be an attribute of {pipe} to get a remote rowcount."
2545
2572
  if 'fetch' not in pipe.parameters:
@@ -2551,18 +2578,21 @@ def get_pipe_rowcount(
2551
2578
 
2552
2579
  _pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
2553
2580
 
2554
- if not pipe.columns.get('datetime', None):
2555
- _dt = pipe.guess_datetime()
2556
- dt = sql_item_name(_dt, self.flavor, None) if _dt else None
2581
+ dt_col = pipe.columns.get('datetime', None)
2582
+ dt_typ = pipe.dtypes.get(dt_col, 'datetime') if dt_col else None
2583
+ dt_db_type = get_db_type_from_pd_type(dt_typ, self.flavor) if dt_typ else None
2584
+ if not dt_col:
2585
+ dt_col = pipe.guess_datetime()
2586
+ dt_name = sql_item_name(dt_col, self.flavor, None) if dt_col else None
2557
2587
  is_guess = True
2558
2588
  else:
2559
- _dt = pipe.get_columns('datetime')
2560
- dt = sql_item_name(_dt, self.flavor, None)
2589
+ dt_col = pipe.get_columns('datetime')
2590
+ dt_name = sql_item_name(dt_col, self.flavor, None)
2561
2591
  is_guess = False
2562
2592
 
2563
2593
  if begin is not None or end is not None:
2564
2594
  if is_guess:
2565
- if _dt is None:
2595
+ if dt_col is None:
2566
2596
  warn(
2567
2597
  f"No datetime could be determined for {pipe}."
2568
2598
  + "\n Ignoring begin and end...",
@@ -2572,13 +2602,13 @@ def get_pipe_rowcount(
2572
2602
  else:
2573
2603
  warn(
2574
2604
  f"A datetime wasn't specified for {pipe}.\n"
2575
- + f" Using column \"{_dt}\" for datetime bounds...",
2605
+ + f" Using column \"{dt_col}\" for datetime bounds...",
2576
2606
  stack=False,
2577
2607
  )
2578
2608
 
2579
2609
 
2580
2610
  _datetime_name = sql_item_name(
2581
- _dt,
2611
+ dt_col,
2582
2612
  (
2583
2613
  pipe.instance_connector.flavor
2584
2614
  if not remote
@@ -2598,8 +2628,8 @@ def get_pipe_rowcount(
2598
2628
  )
2599
2629
  for col in set(
2600
2630
  (
2601
- [_dt]
2602
- if _dt
2631
+ [dt_col]
2632
+ if dt_col
2603
2633
  else []
2604
2634
  )
2605
2635
  + (
@@ -2623,13 +2653,13 @@ def get_pipe_rowcount(
2623
2653
  query += "\nWHERE"
2624
2654
  if begin is not None:
2625
2655
  query += f"""
2626
- {dt} >= {dateadd_str(self.flavor, datepart='minute', number=0, begin=begin)}
2656
+ {dt_name} >= {dateadd_str(self.flavor, datepart='minute', number=0, begin=begin, db_type=dt_db_type)}
2627
2657
  """
2628
2658
  if end is not None and begin is not None:
2629
2659
  query += "AND"
2630
2660
  if end is not None:
2631
2661
  query += f"""
2632
- {dt} < {dateadd_str(self.flavor, datepart='minute', number=0, begin=end)}
2662
+ {dt_name} < {dateadd_str(self.flavor, datepart='minute', number=0, begin=end, db_type=dt_db_type)}
2633
2663
  """
2634
2664
  if params is not None:
2635
2665
  from meerschaum.utils.sql import build_where
@@ -2715,31 +2745,35 @@ def clear_pipe(
2715
2745
  return True, f"{pipe} does not exist, so nothing was cleared."
2716
2746
 
2717
2747
  from meerschaum.utils.sql import sql_item_name, build_where, dateadd_str
2748
+ from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
2718
2749
  pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
2719
2750
 
2751
+ dt_col = pipe.columns.get('datetime', None)
2752
+ dt_typ = pipe.dtypes.get(dt_col, 'datetime') if dt_col else None
2753
+ dt_db_type = get_db_type_from_pd_type(dt_typ, self.flavor) if dt_typ else None
2720
2754
  if not pipe.columns.get('datetime', None):
2721
- _dt = pipe.guess_datetime()
2722
- dt_name = sql_item_name(_dt, self.flavor, None) if _dt else None
2755
+ dt_col = pipe.guess_datetime()
2756
+ dt_name = sql_item_name(dt_col, self.flavor, None) if dt_col else None
2723
2757
  is_guess = True
2724
2758
  else:
2725
- _dt = pipe.get_columns('datetime')
2726
- dt_name = sql_item_name(_dt, self.flavor, None)
2759
+ dt_col = pipe.get_columns('datetime')
2760
+ dt_name = sql_item_name(dt_col, self.flavor, None)
2727
2761
  is_guess = False
2728
2762
 
2729
2763
  if begin is not None or end is not None:
2730
2764
  if is_guess:
2731
- if _dt is None:
2765
+ if dt_col is None:
2732
2766
  warn(
2733
2767
  f"No datetime could be determined for {pipe}."
2734
2768
  + "\n Ignoring datetime bounds...",
2735
- stack = False,
2769
+ stack=False,
2736
2770
  )
2737
2771
  begin, end = None, None
2738
2772
  else:
2739
2773
  warn(
2740
2774
  f"A datetime wasn't specified for {pipe}.\n"
2741
- + f" Using column \"{_dt}\" for datetime bounds...",
2742
- stack = False,
2775
+ + f" Using column \"{dt_col}\" for datetime bounds...",
2776
+ stack=False,
2743
2777
  )
2744
2778
 
2745
2779
  valid_params = {}
@@ -2750,10 +2784,10 @@ def clear_pipe(
2750
2784
  f"DELETE FROM {pipe_name}\nWHERE 1 = 1\n"
2751
2785
  + (' AND ' + build_where(valid_params, self, with_where=False) if valid_params else '')
2752
2786
  + (
2753
- f' AND {dt_name} >= ' + dateadd_str(self.flavor, 'day', 0, begin)
2787
+ f' AND {dt_name} >= ' + dateadd_str(self.flavor, 'day', 0, begin, db_type=dt_db_type)
2754
2788
  if begin is not None else ''
2755
2789
  ) + (
2756
- f' AND {dt_name} < ' + dateadd_str(self.flavor, 'day', 0, end)
2790
+ f' AND {dt_name} < ' + dateadd_str(self.flavor, 'day', 0, end, db_type=dt_db_type)
2757
2791
  if end is not None else ''
2758
2792
  )
2759
2793
  )
@@ -3400,7 +3434,6 @@ def deduplicate_pipe(
3400
3434
 
3401
3435
  ### TODO: Handle deleting duplicates without a datetime axis.
3402
3436
  dt_col = pipe.columns.get('datetime', None)
3403
- dt_col_name = sql_item_name(dt_col, self.flavor, None)
3404
3437
  cols_types = pipe.get_columns_types(debug=debug)
3405
3438
  existing_cols = pipe.get_columns_types(debug=debug)
3406
3439
 
@@ -3417,7 +3450,6 @@ def deduplicate_pipe(
3417
3450
  ]
3418
3451
  indices_names = [sql_item_name(index_col, self.flavor, None) for index_col in indices]
3419
3452
  existing_cols_names = [sql_item_name(col, self.flavor, None) for col in existing_cols]
3420
- duplicates_cte_name = sql_item_name('dups', self.flavor, None)
3421
3453
  duplicate_row_number_name = sql_item_name('dup_row_num', self.flavor, None)
3422
3454
  previous_row_number_name = sql_item_name('prev_row_num', self.flavor, None)
3423
3455
 
@@ -3450,7 +3482,7 @@ def deduplicate_pipe(
3450
3482
  and
3451
3483
  int(self.db_version.split('.')[0]) < 8
3452
3484
  )
3453
- except Exception as e:
3485
+ except Exception:
3454
3486
  is_old_mysql = False
3455
3487
 
3456
3488
  src_query = f"""
@@ -3557,7 +3589,7 @@ def deduplicate_pipe(
3557
3589
  (
3558
3590
  f"Successfully deduplicated table {pipe_table_name}"
3559
3591
  + (
3560
- f"\nfrom {old_rowcount} to {new_rowcount} rows"
3592
+ f"\nfrom {old_rowcount:,} to {new_rowcount:,} rows"
3561
3593
  if old_rowcount != new_rowcount
3562
3594
  else ''
3563
3595
  )
@@ -310,7 +310,7 @@ def enforce(self, _enforce: bool) -> None:
310
310
  """
311
311
  Set the `enforce` parameter for the pipe.
312
312
  """
313
- self.parameters['_enforce'] = _enforce
313
+ self.parameters['enforce'] = _enforce
314
314
 
315
315
 
316
316
  def get_columns(self, *args: str, error: bool = False) -> Union[str, Tuple[str]]:
@@ -20,6 +20,7 @@ def enforce_dtypes(
20
20
  self,
21
21
  df: 'pd.DataFrame',
22
22
  chunksize: Optional[int] = -1,
23
+ enforce: bool = True,
23
24
  safe_copy: bool = True,
24
25
  debug: bool = False,
25
26
  ) -> 'pd.DataFrame':
@@ -31,7 +32,7 @@ def enforce_dtypes(
31
32
  from meerschaum.utils.warnings import warn
32
33
  from meerschaum.utils.debug import dprint
33
34
  from meerschaum.utils.dataframe import parse_df_datetimes, enforce_dtypes as _enforce_dtypes
34
- from meerschaum.utils.dtypes import are_dtypes_equal, MRSM_PD_DTYPES
35
+ from meerschaum.utils.dtypes import are_dtypes_equal
35
36
  from meerschaum.utils.packages import import_pandas
36
37
  pd = import_pandas(debug=debug)
37
38
  if df is None:
@@ -42,11 +43,9 @@ def enforce_dtypes(
42
43
  )
43
44
  return df
44
45
 
45
- pipe_dtypes = self.dtypes if self.enforce else {
46
- col: typ
47
- for col, typ in self.dtypes.items()
48
- if typ in MRSM_PD_DTYPES
49
- }
46
+ if not self.enforce:
47
+ enforce = False
48
+ pipe_dtypes = self.dtypes if enforce else {}
50
49
 
51
50
  try:
52
51
  if isinstance(df, str):
@@ -55,8 +54,9 @@ def enforce_dtypes(
55
54
  ignore_cols=[
56
55
  col
57
56
  for col, dtype in pipe_dtypes.items()
58
- if 'datetime' not in str(dtype)
57
+ if (not enforce or not are_dtypes_equal(dtype, 'datetime'))
59
58
  ],
59
+ ignore_all=(not enforce),
60
60
  strip_timezone=(self.tzinfo is None),
61
61
  chunksize=chunksize,
62
62
  debug=debug,
@@ -67,7 +67,7 @@ def enforce_dtypes(
67
67
  ignore_cols=[
68
68
  col
69
69
  for col, dtype in pipe_dtypes.items()
70
- if not are_dtypes_equal(str(dtype), 'datetime')
70
+ if (not enforce or not are_dtypes_equal(str(dtype), 'datetime'))
71
71
  ],
72
72
  strip_timezone=(self.tzinfo is None),
73
73
  chunksize=chunksize,
@@ -90,7 +90,7 @@ def enforce_dtypes(
90
90
  pipe_dtypes,
91
91
  safe_copy=safe_copy,
92
92
  strip_timezone=(self.tzinfo is None),
93
- coerce_timezone=True,
93
+ coerce_timezone=enforce,
94
94
  debug=debug,
95
95
  )
96
96
 
@@ -10,7 +10,7 @@ from __future__ import annotations
10
10
  from datetime import timedelta, datetime
11
11
 
12
12
  import meerschaum as mrsm
13
- from meerschaum.utils.typing import Optional, Any, Union, SuccessTuple, Iterator, TYPE_CHECKING
13
+ from meerschaum.utils.typing import Any, Union, SuccessTuple, Iterator, TYPE_CHECKING
14
14
  from meerschaum.config import get_config
15
15
  from meerschaum.utils.warnings import warn
16
16
 
@@ -56,8 +56,7 @@ def fetch(
56
56
  warn(f"No `fetch()` function defined for connector '{self.connector}'")
57
57
  return None
58
58
 
59
- from meerschaum.connectors import custom_types, get_connector_plugin
60
- from meerschaum.utils.debug import dprint, _checkpoint
59
+ from meerschaum.connectors import get_connector_plugin
61
60
  from meerschaum.utils.misc import filter_arguments
62
61
 
63
62
  _chunk_hook = kw.pop('chunk_hook', None)