meerschaum 2.1.0rc2__py3-none-any.whl → 2.1.1rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. meerschaum/actions/bootstrap.py +1 -2
  2. meerschaum/actions/delete.py +15 -1
  3. meerschaum/actions/sync.py +4 -4
  4. meerschaum/api/routes/_pipes.py +7 -11
  5. meerschaum/config/__init__.py +0 -2
  6. meerschaum/config/_default.py +3 -0
  7. meerschaum/config/_version.py +1 -1
  8. meerschaum/config/static/__init__.py +4 -0
  9. meerschaum/connectors/sql/SQLConnector.py +43 -3
  10. meerschaum/connectors/sql/_cli.py +27 -3
  11. meerschaum/connectors/sql/_instance.py +164 -0
  12. meerschaum/connectors/sql/_pipes.py +344 -304
  13. meerschaum/connectors/sql/_sql.py +52 -14
  14. meerschaum/connectors/sql/tables/__init__.py +65 -13
  15. meerschaum/connectors/sql/tables/pipes.py +9 -0
  16. meerschaum/core/Pipe/__init__.py +1 -1
  17. meerschaum/core/Pipe/_data.py +3 -4
  18. meerschaum/core/Pipe/_delete.py +12 -2
  19. meerschaum/core/Pipe/_sync.py +2 -5
  20. meerschaum/utils/dataframe.py +20 -4
  21. meerschaum/utils/dtypes/__init__.py +15 -1
  22. meerschaum/utils/dtypes/sql.py +1 -0
  23. meerschaum/utils/sql.py +485 -64
  24. {meerschaum-2.1.0rc2.dist-info → meerschaum-2.1.1rc1.dist-info}/METADATA +1 -1
  25. {meerschaum-2.1.0rc2.dist-info → meerschaum-2.1.1rc1.dist-info}/RECORD +31 -29
  26. {meerschaum-2.1.0rc2.dist-info → meerschaum-2.1.1rc1.dist-info}/LICENSE +0 -0
  27. {meerschaum-2.1.0rc2.dist-info → meerschaum-2.1.1rc1.dist-info}/NOTICE +0 -0
  28. {meerschaum-2.1.0rc2.dist-info → meerschaum-2.1.1rc1.dist-info}/WHEEL +0 -0
  29. {meerschaum-2.1.0rc2.dist-info → meerschaum-2.1.1rc1.dist-info}/entry_points.txt +0 -0
  30. {meerschaum-2.1.0rc2.dist-info → meerschaum-2.1.1rc1.dist-info}/top_level.txt +0 -0
  31. {meerschaum-2.1.0rc2.dist-info → meerschaum-2.1.1rc1.dist-info}/zip-safe +0 -0
@@ -186,8 +186,12 @@ def fetch_pipes_keys(
186
186
  location_keys = []
187
187
  else:
188
188
  location_keys = [
189
- (lk if lk not in ('[None]', 'None', 'null') else None)
190
- for lk in location_keys
189
+ (
190
+ lk
191
+ if lk not in ('[None]', 'None', 'null')
192
+ else None
193
+ )
194
+ for lk in location_keys
191
195
  ]
192
196
  if tags is None:
193
197
  tags = []
@@ -213,7 +217,7 @@ def fetch_pipes_keys(
213
217
  parameters[col] = vals
214
218
  cols = {k: v for k, v in cols.items() if v != [None]}
215
219
 
216
- if not table_exists('pipes', self, debug=debug):
220
+ if not table_exists('mrsm_pipes', self, schema=self.instance_schema, debug=debug):
217
221
  return []
218
222
 
219
223
  from meerschaum.connectors.sql.tables import get_tables
@@ -329,6 +333,7 @@ def create_indices(
329
333
  """
330
334
  Create a pipe's indices.
331
335
  """
336
+ from meerschaum.utils.sql import sql_item_name
332
337
  from meerschaum.utils.debug import dprint
333
338
  if debug:
334
339
  dprint(f"Creating indices for {pipe}...")
@@ -347,7 +352,25 @@ def create_indices(
347
352
  success = success and ix_success
348
353
  if not ix_success:
349
354
  warn(f"Failed to create index on column: {ix}")
350
- return success
355
+
356
+ existing_cols_types = pipe.get_columns_types(debug=debug)
357
+ indices_cols_str = ', '.join(
358
+ [sql_item_name(ix, self.flavor) for ix in ix_queries if ix in existing_cols_types]
359
+ )
360
+ pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
361
+ upsert = pipe.parameters.get('upsert', False)
362
+ if not upsert:
363
+ return success
364
+
365
+ constraint_name = sql_item_name(pipe.target + '_constraint', self.flavor)
366
+ constraint_query = (
367
+ f"ALTER TABLE {pipe_name} ADD CONSTRAINT {constraint_name} UNIQUE ({indices_cols_str})"
368
+ )
369
+ constraint_success = self.exec(constraint_query, debug=debug) is not None
370
+ if not constraint_success:
371
+ warn(f"Failed to add unique constraint to {pipe}.")
372
+
373
+ return success and constraint_success
351
374
 
352
375
 
353
376
  def drop_indices(
@@ -481,13 +504,6 @@ def get_create_index_queries(
481
504
  )
482
505
  )
483
506
  pass
484
- elif self.flavor == 'citus':
485
- id_query = [(
486
- f"CREATE INDEX IF NOT EXISTS {_id_index_name} "
487
- + f"ON {_pipe_name} ({_id_name});"
488
- ), (
489
- f"SELECT create_distributed_table('{_pipe_name}', '{_id}');"
490
- )]
491
507
  else: ### mssql, sqlite, etc.
492
508
  id_query = f"CREATE INDEX {_id_index_name} ON {_pipe_name} ({_id_name})"
493
509
 
@@ -551,7 +567,7 @@ def get_drop_index_queries(
551
567
  temp_table = '_' + pipe.target + '_temp_migration'
552
568
  temp_table_name = sql_item_name(temp_table, self.flavor, self.get_pipe_schema(pipe))
553
569
 
554
- if table_exists(temp_table, self, debug=debug):
570
+ if table_exists(temp_table, self, schema=self.get_pipe_schema(pipe), debug=debug):
555
571
  nuke_queries.append(f"DROP TABLE {temp_table_name}")
556
572
  nuke_queries += [
557
573
  f"SELECT * INTO {temp_table_name} FROM {pipe_name}",
@@ -579,18 +595,13 @@ def delete_pipe(
579
595
  debug: bool = False,
580
596
  ) -> SuccessTuple:
581
597
  """
582
- Delete a Pipe's registration and drop its table.
598
+ Delete a Pipe's registration.
583
599
  """
584
600
  from meerschaum.utils.sql import sql_item_name
585
601
  from meerschaum.utils.debug import dprint
586
602
  from meerschaum.utils.packages import attempt_import
587
603
  sqlalchemy = attempt_import('sqlalchemy')
588
604
 
589
- ### try dropping first
590
- drop_tuple = pipe.drop(debug=debug)
591
- if not drop_tuple[0]:
592
- return drop_tuple
593
-
594
605
  if not pipe.id:
595
606
  return False, f"{pipe} is not registered."
596
607
 
@@ -1087,6 +1098,7 @@ def sync_pipe(
1087
1098
  check_existing: bool = True,
1088
1099
  blocking: bool = True,
1089
1100
  debug: bool = False,
1101
+ _check_temporary_tables: bool = True,
1090
1102
  **kw: Any
1091
1103
  ) -> SuccessTuple:
1092
1104
  """
@@ -1133,7 +1145,7 @@ def sync_pipe(
1133
1145
  A `SuccessTuple` of success (`bool`) and message (`str`).
1134
1146
  """
1135
1147
  from meerschaum.utils.packages import import_pandas
1136
- from meerschaum.utils.sql import get_update_queries, sql_item_name, json_flavors
1148
+ from meerschaum.utils.sql import get_update_queries, sql_item_name, json_flavors, update_queries
1137
1149
  from meerschaum.utils.misc import generate_password
1138
1150
  from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols
1139
1151
  from meerschaum.utils.dtypes import are_dtypes_equal
@@ -1200,6 +1212,10 @@ def sync_pipe(
1200
1212
  if not infer_bool_success:
1201
1213
  return infer_bool_success, infer_bool_msg
1202
1214
 
1215
+ upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in update_queries
1216
+ if upsert:
1217
+ check_existing = False
1218
+
1203
1219
  unseen_df, update_df, delta_df = (
1204
1220
  pipe.filter_existing(
1205
1221
  df,
@@ -1208,52 +1224,14 @@ def sync_pipe(
1208
1224
  **kw
1209
1225
  ) if check_existing else (df, None, df)
1210
1226
  )
1227
+ if upsert:
1228
+ unseen_df, update_df, delta_df = (df.head(0), df, df)
1229
+
1211
1230
  if debug:
1212
1231
  dprint("Delta data:\n" + str(delta_df))
1213
1232
  dprint("Unseen data:\n" + str(unseen_df))
1214
1233
  if update_df is not None:
1215
- dprint("Update data:\n" + str(update_df))
1216
-
1217
- if update_df is not None and not len(update_df) == 0:
1218
- transact_id = generate_password(3)
1219
- temp_target = '_' + transact_id + '_' + pipe.target
1220
- update_kw = copy.deepcopy(kw)
1221
- update_kw.update({
1222
- 'name': temp_target,
1223
- 'if_exists': 'append',
1224
- 'chunksize': chunksize,
1225
- 'dtype': self.get_to_sql_dtype(pipe, update_df, update_dtypes=False),
1226
- 'schema': self.get_pipe_schema(pipe),
1227
- 'debug': debug,
1228
- })
1229
- self.to_sql(update_df, **update_kw)
1230
- temp_pipe = Pipe(
1231
- pipe.connector_keys + '_', pipe.metric_key, pipe.location_key,
1232
- instance = pipe.instance_keys,
1233
- columns = pipe.columns,
1234
- target = temp_target,
1235
- temporary = True,
1236
- )
1237
-
1238
- existing_cols = pipe.get_columns_types(debug=debug)
1239
- join_cols = [
1240
- col for col_key, col in pipe.columns.items()
1241
- if col and col_key != 'value' and col in existing_cols
1242
- ]
1243
-
1244
- queries = get_update_queries(
1245
- pipe.target,
1246
- temp_target,
1247
- self,
1248
- join_cols,
1249
- debug = debug
1250
- )
1251
- success = all(self.exec_queries(queries, break_on_error=True, debug=debug))
1252
- drop_success, drop_msg = temp_pipe.drop(debug=debug)
1253
- if not drop_success:
1254
- warn(drop_msg)
1255
- if not success:
1256
- return False, f"Failed to apply update to {pipe}."
1234
+ dprint(("Update" if not upsert else "Upsert") + " data:\n" + str(update_df))
1257
1235
 
1258
1236
  if_exists = kw.get('if_exists', 'append')
1259
1237
  if 'if_exists' in kw:
@@ -1303,6 +1281,65 @@ def sync_pipe(
1303
1281
  if not self.create_indices(pipe, debug=debug):
1304
1282
  warn(f"Failed to create indices for {pipe}. Continuing...")
1305
1283
 
1284
+ if update_df is not None and len(update_df) > 0:
1285
+ dt_col = pipe.columns.get('datetime', None)
1286
+ dt_typ = pipe.dtypes.get(dt_col, None)
1287
+ dt_name = sql_item_name(dt_col, self.flavor) if dt_col else None
1288
+ update_min = update_df[dt_col].min() if dt_col and dt_col in update_df.columns else None
1289
+ update_max = update_df[dt_col].max() if dt_col and dt_col in update_df.columns else None
1290
+ update_begin = update_min
1291
+ update_end = (
1292
+ update_max
1293
+ + (
1294
+ timedelta(minutes=1)
1295
+ if are_dtypes_equal(str(dt_typ), 'datetime')
1296
+ else 1
1297
+ )
1298
+ ) if dt_col else None
1299
+
1300
+ transact_id = generate_password(3)
1301
+ temp_target = '-' + transact_id + '_' + pipe.target
1302
+ self._log_temporary_tables_creation(temp_target, debug=debug)
1303
+ temp_pipe = Pipe(
1304
+ pipe.connector_keys.replace(':', '_') + '_', pipe.metric_key, pipe.location_key,
1305
+ instance = pipe.instance_keys,
1306
+ columns = pipe.columns,
1307
+ dtypes = pipe.dtypes,
1308
+ target = temp_target,
1309
+ temporary = True,
1310
+ parameters = {
1311
+ 'schema': self.internal_schema,
1312
+ },
1313
+ )
1314
+ temp_pipe.sync(update_df, check_existing=False, debug=debug)
1315
+ existing_cols = pipe.get_columns_types(debug=debug)
1316
+ join_cols = [
1317
+ col
1318
+ for col_key, col in pipe.columns.items()
1319
+ if col and col in existing_cols
1320
+ ]
1321
+ update_queries = get_update_queries(
1322
+ pipe.target,
1323
+ temp_target,
1324
+ self,
1325
+ join_cols,
1326
+ upsert = upsert,
1327
+ schema = self.get_pipe_schema(pipe),
1328
+ patch_schema = self.internal_schema,
1329
+ datetime_col = pipe.columns.get('datetime', None),
1330
+ debug = debug,
1331
+ )
1332
+ update_success = all(
1333
+ self.exec_queries(update_queries, break_on_error=True, rollback=True, debug=debug)
1334
+ )
1335
+ self._log_temporary_tables_creation(
1336
+ temp_target,
1337
+ ready_to_drop = True,
1338
+ debug = debug,
1339
+ )
1340
+ if not update_success:
1341
+ warn(f"Failed to apply update to {pipe}.")
1342
+
1306
1343
  stop = time.perf_counter()
1307
1344
  success = stats['success']
1308
1345
  if not success:
@@ -1316,6 +1353,14 @@ def sync_pipe(
1316
1353
  f"\non table {sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))}\n"
1317
1354
  + f"in {round(stop - start, 2)} seconds."
1318
1355
  )
1356
+
1357
+ if _check_temporary_tables:
1358
+ drop_stale_success, drop_stale_msg = self._drop_old_temporary_tables(
1359
+ refresh=False, debug=debug
1360
+ )
1361
+ if not drop_stale_success:
1362
+ warn(drop_stale_msg)
1363
+
1319
1364
  return success, msg
1320
1365
 
1321
1366
 
@@ -1369,7 +1414,6 @@ def sync_pipe_inplace(
1369
1414
  """
1370
1415
  from meerschaum.utils.sql import (
1371
1416
  sql_item_name,
1372
- table_exists,
1373
1417
  get_sqlalchemy_table,
1374
1418
  get_update_queries,
1375
1419
  get_null_replacement,
@@ -1377,12 +1421,17 @@ def sync_pipe_inplace(
1377
1421
  NO_SELECT_INTO_FLAVORS,
1378
1422
  format_cte_subquery,
1379
1423
  get_create_table_query,
1424
+ get_table_cols_types,
1425
+ truncate_item_name,
1426
+ session_execute,
1427
+ table_exists,
1380
1428
  )
1381
1429
  from meerschaum.utils.dtypes.sql import (
1382
1430
  get_pd_type_from_db_type,
1383
1431
  )
1384
1432
  from meerschaum.utils.misc import generate_password
1385
1433
  from meerschaum.utils.debug import dprint
1434
+ sqlalchemy, sqlalchemy_orm = mrsm.attempt_import('sqlalchemy', 'sqlalchemy.orm')
1386
1435
  metadef = self.get_pipe_metadef(
1387
1436
  pipe,
1388
1437
  params = params,
@@ -1391,12 +1440,17 @@ def sync_pipe_inplace(
1391
1440
  check_existing = check_existing,
1392
1441
  debug = debug,
1393
1442
  )
1394
- metadef_name = sql_item_name('metadef', self.flavor, None)
1395
1443
  pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
1444
+ upsert = pipe.parameters.get('upsert', False)
1445
+ internal_schema = self.internal_schema
1446
+ database = getattr(self, 'database', self.parse_uri(self.URI).get('database', None))
1396
1447
 
1397
1448
  if not pipe.exists(debug=debug):
1398
1449
  create_pipe_query = get_create_table_query(
1399
- metadef, pipe.target, self.flavor, schema=self.get_pipe_schema(pipe)
1450
+ metadef,
1451
+ pipe.target,
1452
+ self.flavor,
1453
+ schema = self.get_pipe_schema(pipe),
1400
1454
  )
1401
1455
  result = self.exec(create_pipe_query, debug=debug)
1402
1456
  if result is None:
@@ -1407,52 +1461,70 @@ def sync_pipe_inplace(
1407
1461
  rowcount = pipe.get_rowcount(debug=debug)
1408
1462
  return True, f"Inserted {rowcount}, updated 0 rows."
1409
1463
 
1410
- ### Generate names for the tables.
1464
+ session = sqlalchemy_orm.Session(self.engine)
1465
+ connectable = session if self.flavor != 'duckdb' else self
1466
+
1411
1467
  transact_id = generate_password(3)
1412
1468
  def get_temp_table_name(label: str) -> str:
1413
- return '_' + transact_id + '_' + label + '_' + pipe.target
1414
-
1415
- backtrack_table_raw = get_temp_table_name('backtrack')
1416
- backtrack_table_name = sql_item_name(
1417
- backtrack_table_raw, self.flavor, self.get_pipe_schema(pipe)
1418
- )
1419
- new_table_raw = get_temp_table_name('new')
1420
- new_table_name = sql_item_name(new_table_raw, self.flavor, self.get_pipe_schema(pipe))
1421
- delta_table_raw = get_temp_table_name('delta')
1422
- delta_table_name = sql_item_name(delta_table_raw, self.flavor, self.get_pipe_schema(pipe))
1423
- joined_table_raw = get_temp_table_name('joined')
1424
- joined_table_name = sql_item_name(joined_table_raw, self.flavor, self.get_pipe_schema(pipe))
1425
- unseen_table_raw = get_temp_table_name('unseen')
1426
- unseen_table_name = sql_item_name(unseen_table_raw, self.flavor, self.get_pipe_schema(pipe))
1427
- update_table_raw = get_temp_table_name('update')
1428
- update_table_name = sql_item_name(update_table_raw, self.flavor, self.get_pipe_schema(pipe))
1429
- metadef_name = sql_item_name('metadef', self.flavor, self.get_pipe_schema(pipe))
1430
-
1431
- new_queries = []
1432
- drop_new_query = f"DROP TABLE {new_table_name}"
1433
- if table_exists(new_table_raw, self, debug=debug):
1434
- new_queries.append(drop_new_query)
1469
+ return '-' + transact_id + '_' + label + '_' + pipe.target
1435
1470
 
1436
- create_new_query = get_create_table_query(
1437
- metadef, new_table_raw, self.flavor, schema=self.get_pipe_schema(pipe)
1438
- )
1439
- new_queries.append(create_new_query)
1471
+ temp_table_roots = ['backtrack', 'new', 'delta', 'joined', 'unseen', 'update']
1472
+ temp_tables = {
1473
+ table_root: get_temp_table_name(table_root)
1474
+ for table_root in temp_table_roots
1475
+ }
1476
+ temp_table_names = {
1477
+ table_root: sql_item_name(
1478
+ table_name_raw,
1479
+ self.flavor,
1480
+ internal_schema,
1481
+ )
1482
+ for table_root, table_name_raw in temp_tables.items()
1483
+ }
1440
1484
 
1441
- new_success = all(self.exec_queries(new_queries, break_on_error=True, debug=debug))
1442
- if not new_success:
1443
- self.exec_queries([drop_new_query], break_on_error=False, debug=debug)
1444
- return False, f"Could not fetch new data for {pipe}."
1485
+ def clean_up_temp_tables(ready_to_drop: bool = False):
1486
+ log_success, log_msg = self._log_temporary_tables_creation(
1487
+ [table for table in temp_tables.values()],
1488
+ ready_to_drop = ready_to_drop,
1489
+ debug = debug,
1490
+ )
1491
+ if not log_success:
1492
+ warn(log_msg)
1445
1493
 
1446
- new_table_obj = get_sqlalchemy_table(
1447
- new_table_raw,
1448
- connector = self,
1449
- schema = self.get_pipe_schema(pipe),
1450
- refresh = True,
1494
+ ### Clean up in case we have a session collision.
1495
+ _ = clean_up_temp_tables()
1496
+
1497
+ create_new_query = get_create_table_query(
1498
+ metadef,
1499
+ temp_tables['new'],
1500
+ self.flavor,
1501
+ schema = internal_schema,
1502
+ )
1503
+ (create_new_success, create_new_msg), create_new_results = session_execute(
1504
+ session,
1505
+ create_new_query,
1506
+ with_results = True,
1451
1507
  debug = debug,
1452
1508
  )
1509
+ if not create_new_success:
1510
+ _ = clean_up_temp_tables()
1511
+ return create_new_success, create_new_msg
1512
+ new_count = create_new_results[0].rowcount if create_new_results else 0
1513
+
1514
+ new_cols_types = get_table_cols_types(
1515
+ temp_tables['new'],
1516
+ connectable = connectable,
1517
+ flavor = self.flavor,
1518
+ schema = internal_schema,
1519
+ database = database,
1520
+ debug = debug,
1521
+ )
1522
+ if not new_cols_types:
1523
+ return False, "Failed to get columns for new table."
1524
+
1453
1525
  new_cols = {
1454
- str(col.name): get_pd_type_from_db_type(str(col.type))
1455
- for col in new_table_obj.columns
1526
+ str(col_name): get_pd_type_from_db_type(str(col_type))
1527
+ for col_name, col_type in new_cols_types.items()
1456
1528
  }
1457
1529
  new_cols_str = ', '.join([
1458
1530
  sql_item_name(col, self.flavor)
@@ -1461,34 +1533,31 @@ def sync_pipe_inplace(
1461
1533
 
1462
1534
  add_cols_queries = self.get_add_columns_queries(pipe, new_cols, debug=debug)
1463
1535
  if add_cols_queries:
1464
- if not self.exec_queries(add_cols_queries, debug=debug):
1465
- warn(f"Failed to add new columns to {pipe}.")
1536
+ self.exec_queries(add_cols_queries, debug=debug)
1466
1537
 
1467
1538
  alter_cols_queries = self.get_alter_columns_queries(pipe, new_cols, debug=debug)
1468
1539
  if alter_cols_queries:
1469
- if not self.exec_queries(alter_cols_queries, debug=debug):
1470
- warn(f"Failed to alter columns for {pipe}.")
1471
- else:
1472
- _ = pipe.infer_dtypes(persist=True)
1540
+ self.exec_queries(alter_cols_queries, debug=debug)
1541
+
1542
+ insert_queries = [
1543
+ (
1544
+ f"INSERT INTO {pipe_name} ({new_cols_str})\n"
1545
+ + f"SELECT {new_cols_str}\nFROM {temp_table_names['new']}"
1546
+ ),
1547
+ f"DROP TABLE {temp_table_names['new']}",
1548
+ ] if not check_existing else []
1549
+
1550
+ new_queries = insert_queries
1551
+ new_success, new_msg = session_execute(session, new_queries, debug=debug)
1552
+ if not new_success:
1553
+ _ = clean_up_temp_tables()
1554
+ return new_success, new_msg
1473
1555
 
1474
1556
  if not check_existing:
1475
- new_count = self.value(f"SELECT COUNT(*) FROM {new_table_name}", debug=debug)
1476
- insert_queries = [
1477
- (
1478
- f"INSERT INTO {pipe_name} ({new_cols_str})\n"
1479
- + f"SELECT {new_cols_str}\nFROM {new_table_name}"
1480
- ),
1481
- f"DROP TABLE {new_table_name}"
1482
- ]
1483
- if not self.exec_queries(insert_queries, debug=debug, break_on_error=False):
1484
- return False, f"Failed to insert into rows into {pipe}."
1557
+ session.commit()
1558
+ _ = clean_up_temp_tables()
1485
1559
  return True, f"Inserted {new_count}, updated 0 rows."
1486
1560
 
1487
-
1488
- backtrack_queries = []
1489
- drop_backtrack_query = f"DROP TABLE {backtrack_table_name}"
1490
- if table_exists(backtrack_table_raw, self, debug=debug):
1491
- backtrack_queries.append(drop_backtrack_query)
1492
1561
  backtrack_def = self.get_pipe_data_query(
1493
1562
  pipe,
1494
1563
  begin = begin,
@@ -1507,25 +1576,31 @@ def sync_pipe_inplace(
1507
1576
  )
1508
1577
  create_backtrack_query = get_create_table_query(
1509
1578
  backtrack_def,
1510
- backtrack_table_raw,
1579
+ temp_tables['backtrack'],
1511
1580
  self.flavor,
1581
+ schema = internal_schema,
1512
1582
  )
1513
- backtrack_queries.append(create_backtrack_query)
1514
- backtrack_success = all(self.exec_queries(backtrack_queries, break_on_error=True, debug=debug))
1515
- if not backtrack_success:
1516
- self.exec_queries([drop_new_query, drop_backtrack_query], break_on_error=False, debug=debug)
1517
- return False, f"Could not fetch backtrack data from {pipe}."
1518
-
1519
- ### Determine which index columns are present in both tables.
1520
- backtrack_table_obj = get_sqlalchemy_table(
1521
- backtrack_table_raw,
1522
- connector = self,
1523
- schema = self.get_pipe_schema(pipe),
1524
- refresh = True,
1583
+ (create_backtrack_success, create_backtrack_msg), create_backtrack_results = session_execute(
1584
+ session,
1585
+ create_backtrack_query,
1586
+ with_results = True,
1525
1587
  debug = debug,
1526
1588
  )
1527
- backtrack_cols = {str(col.name): str(col.type) for col in backtrack_table_obj.columns}
1528
- common_cols = [col for col in new_cols if col in backtrack_cols]
1589
+ if not create_backtrack_success:
1590
+ _ = clean_up_temp_tables()
1591
+ return create_backtrack_success, create_backtrack_msg
1592
+ bactrack_count = create_backtrack_results[0].rowcount if create_backtrack_results else 0
1593
+
1594
+ backtrack_cols_types = get_table_cols_types(
1595
+ temp_tables['backtrack'],
1596
+ connectable = connectable,
1597
+ flavor = self.flavor,
1598
+ schema = internal_schema,
1599
+ database = database,
1600
+ debug = debug,
1601
+ )
1602
+
1603
+ common_cols = [col for col in new_cols if col in backtrack_cols_types]
1529
1604
  on_cols = {
1530
1605
  col: new_cols.get(col, 'object')
1531
1606
  for col_key, col in pipe.columns.items()
@@ -1533,19 +1608,14 @@ def sync_pipe_inplace(
1533
1608
  col
1534
1609
  and
1535
1610
  col_key != 'value'
1536
- and col in backtrack_cols
1611
+ and col in backtrack_cols_types
1537
1612
  and col in new_cols
1538
1613
  )
1539
1614
  }
1540
1615
 
1541
- delta_queries = []
1542
- drop_delta_query = f"DROP TABLE {delta_table_name}"
1543
- if table_exists(delta_table_raw, self, debug=debug):
1544
- delta_queries.append(drop_delta_query)
1545
-
1546
1616
  null_replace_new_cols_str = (
1547
1617
  ', '.join([
1548
- f"COALESCE({new_table_name}.{sql_item_name(col, self.flavor, None)}, "
1618
+ f"COALESCE({temp_table_names['new']}.{sql_item_name(col, self.flavor, None)}, "
1549
1619
  + f"{get_null_replacement(typ, self.flavor)}) AS "
1550
1620
  + sql_item_name(col, self.flavor, None)
1551
1621
  for col, typ in new_cols.items()
@@ -1555,117 +1625,105 @@ def sync_pipe_inplace(
1555
1625
  select_delta_query = (
1556
1626
  f"SELECT\n"
1557
1627
  + null_replace_new_cols_str + "\n"
1558
- + f"\nFROM {new_table_name}\n"
1559
- + f"LEFT OUTER JOIN {backtrack_table_name}\nON\n"
1628
+ + f"\nFROM {temp_table_names['new']}\n"
1629
+ + f"LEFT OUTER JOIN {temp_table_names['backtrack']}\nON\n"
1560
1630
  + '\nAND\n'.join([
1561
1631
  (
1562
- f'COALESCE({new_table_name}.' + sql_item_name(c, self.flavor, None) + ", "
1563
- + get_null_replacement(new_cols[c], self.flavor) + ") "
1632
+ f"COALESCE({temp_table_names['new']}."
1633
+ + sql_item_name(c, self.flavor, None)
1634
+ + ", "
1635
+ + get_null_replacement(new_cols[c], self.flavor)
1636
+ + ") "
1564
1637
  + ' = '
1565
- + f'COALESCE({backtrack_table_name}.' + sql_item_name(c, self.flavor, None) + ", "
1566
- + get_null_replacement(backtrack_cols[c], self.flavor) + ") "
1638
+ + f"COALESCE({temp_table_names['backtrack']}."
1639
+ + sql_item_name(c, self.flavor, None)
1640
+ + ", "
1641
+ + get_null_replacement(backtrack_cols_types[c], self.flavor)
1642
+ + ") "
1567
1643
  ) for c in common_cols
1568
1644
  ])
1569
1645
  + "\nWHERE\n"
1570
1646
  + '\nAND\n'.join([
1571
1647
  (
1572
- f'{backtrack_table_name}.' + sql_item_name(c, self.flavor, None) + ' IS NULL'
1648
+ f"{temp_table_names['backtrack']}." + sql_item_name(c, self.flavor, None) + ' IS NULL'
1573
1649
  ) for c in common_cols
1574
1650
  ])
1575
1651
  )
1576
1652
  create_delta_query = get_create_table_query(
1577
- select_delta_query, delta_table_raw, self.flavor, None
1653
+ select_delta_query,
1654
+ temp_tables['delta'],
1655
+ self.flavor,
1656
+ schema = internal_schema,
1578
1657
  )
1579
- delta_queries.append(create_delta_query)
1580
-
1581
- delta_success = all(self.exec_queries(delta_queries, break_on_error=True, debug=debug))
1582
- if not delta_success:
1583
- self.exec_queries(
1584
- [
1585
- drop_new_query,
1586
- drop_backtrack_query,
1587
- drop_delta_query,
1588
- ],
1589
- break_on_error = False,
1590
- debug = debug,
1591
- )
1592
- return False, f"Could not filter data for {pipe}."
1658
+ create_delta_success, create_delta_msg = session_execute(
1659
+ session,
1660
+ create_delta_query,
1661
+ debug = debug,
1662
+ )
1663
+ if not create_delta_success:
1664
+ _ = clean_up_temp_tables()
1665
+ return create_delta_success, create_delta_msg
1593
1666
 
1594
- delta_table_obj = get_sqlalchemy_table(
1595
- delta_table_raw,
1596
- connector = self,
1597
- schema = self.get_pipe_schema(pipe),
1598
- refresh = True,
1667
+ delta_cols_types = get_table_cols_types(
1668
+ temp_tables['delta'],
1669
+ connectable = connectable,
1670
+ flavor = self.flavor,
1671
+ schema = internal_schema,
1672
+ database = database,
1599
1673
  debug = debug,
1600
1674
  )
1675
+
1601
1676
  delta_cols = {
1602
- str(col.name): get_pd_type_from_db_type(str(col.type))
1603
- for col in delta_table_obj.columns
1677
+ col: get_pd_type_from_db_type(typ)
1678
+ for col, typ in delta_cols_types.items()
1604
1679
  }
1605
1680
  delta_cols_str = ', '.join([
1606
1681
  sql_item_name(col, self.flavor)
1607
1682
  for col in delta_cols
1608
1683
  ])
1609
1684
 
1610
- joined_queries = []
1611
- drop_joined_query = f"DROP TABLE {joined_table_name}"
1612
- if on_cols and table_exists(joined_table_raw, self, debug=debug):
1613
- joined_queries.append(drop_joined_query)
1614
-
1615
1685
  select_joined_query = (
1616
1686
  "SELECT "
1617
1687
  + (', '.join([
1618
1688
  (
1619
- f'{delta_table_name}.' + sql_item_name(c, self.flavor, None)
1689
+ f"{temp_table_names['delta']}." + sql_item_name(c, self.flavor, None)
1620
1690
  + " AS " + sql_item_name(c + '_delta', self.flavor, None)
1621
1691
  ) for c in delta_cols
1622
1692
  ]))
1623
1693
  + ", "
1624
1694
  + (', '.join([
1625
1695
  (
1626
- f'{backtrack_table_name}.' + sql_item_name(c, self.flavor, None)
1696
+ f"{temp_table_names['backtrack']}." + sql_item_name(c, self.flavor, None)
1627
1697
  + " AS " + sql_item_name(c + '_backtrack', self.flavor, None)
1628
- ) for c in backtrack_cols
1698
+ ) for c in backtrack_cols_types
1629
1699
  ]))
1630
- + f"\nFROM {delta_table_name}\n"
1631
- + f"LEFT OUTER JOIN {backtrack_table_name}\nON\n"
1700
+ + f"\nFROM {temp_table_names['delta']}\n"
1701
+ + f"LEFT OUTER JOIN {temp_table_names['backtrack']}\nON\n"
1632
1702
  + '\nAND\n'.join([
1633
1703
  (
1634
- f'COALESCE({delta_table_name}.' + sql_item_name(c, self.flavor, None)
1704
+ f"COALESCE({temp_table_names['delta']}." + sql_item_name(c, self.flavor, None)
1635
1705
  + ", " + get_null_replacement(typ, self.flavor) + ")"
1636
1706
  + ' = '
1637
- + f'COALESCE({backtrack_table_name}.' + sql_item_name(c, self.flavor, None)
1707
+ + f"COALESCE({temp_table_names['backtrack']}." + sql_item_name(c, self.flavor, None)
1638
1708
  + ", " + get_null_replacement(typ, self.flavor) + ")"
1639
1709
  ) for c, typ in on_cols.items()
1640
1710
  ])
1641
1711
  )
1642
1712
 
1643
1713
  create_joined_query = get_create_table_query(
1644
- select_joined_query, joined_table_raw, self.flavor, schema=self.get_pipe_schema(pipe)
1645
- )
1646
- joined_queries.append(create_joined_query)
1647
-
1648
- joined_success = (
1649
- all(self.exec_queries(joined_queries, break_on_error=True, debug=debug))
1650
- if on_cols else True
1714
+ select_joined_query,
1715
+ temp_tables['joined'],
1716
+ self.flavor,
1717
+ schema = internal_schema,
1651
1718
  )
1652
- if not joined_success:
1653
- self.exec_queries(
1654
- [
1655
- drop_new_query,
1656
- drop_backtrack_query,
1657
- drop_delta_query,
1658
- drop_joined_query,
1659
- ],
1660
- break_on_error = False,
1661
- debug = debug,
1662
- )
1663
- return False, f"Could not separate new and updated data for {pipe}."
1664
-
1665
- unseen_queries = []
1666
- drop_unseen_query = f"DROP TABLE {unseen_table_name}"
1667
- if on_cols and table_exists(unseen_table_raw, self, debug=debug):
1668
- unseen_queries.append(drop_unseen_query)
1719
+ create_joined_success, create_joined_msg = session_execute(
1720
+ session,
1721
+ create_joined_query,
1722
+ debug = debug,
1723
+ ) if on_cols else (True, "Success")
1724
+ if not create_joined_success:
1725
+ _ = clean_up_temp_tables()
1726
+ return create_joined_success, create_joined_msg
1669
1727
 
1670
1728
  select_unseen_query = (
1671
1729
  "SELECT "
@@ -1678,7 +1736,7 @@ def sync_pipe_inplace(
1678
1736
  + " AS " + sql_item_name(c, self.flavor, None)
1679
1737
  ) for c, typ in delta_cols.items()
1680
1738
  ]))
1681
- + f"\nFROM {joined_table_name}\n"
1739
+ + f"\nFROM {temp_table_names['joined']}\n"
1682
1740
  + f"WHERE "
1683
1741
  + '\nAND\n'.join([
1684
1742
  (
@@ -1687,38 +1745,20 @@ def sync_pipe_inplace(
1687
1745
  ])
1688
1746
  )
1689
1747
  create_unseen_query = get_create_table_query(
1690
- select_unseen_query, unseen_table_raw, self.flavor, self.get_pipe_schema(pipe)
1691
- )
1692
- unseen_queries.append(create_unseen_query)
1693
-
1694
- unseen_success = (
1695
- all(self.exec_queries(unseen_queries, break_on_error=True, debug=debug))
1696
- if on_cols else True
1748
+ select_unseen_query,
1749
+ temp_tables['unseen'],
1750
+ self.flavor,
1751
+ internal_schema,
1697
1752
  )
1698
- if not unseen_success:
1699
- self.exec_queries(
1700
- [
1701
- drop_new_query,
1702
- drop_backtrack_query,
1703
- drop_delta_query,
1704
- drop_joined_query,
1705
- drop_unseen_query,
1706
- ],
1707
- break_on_error = False,
1708
- debug = debug,
1709
- )
1710
- return False, f"Could not determine new data for {pipe}."
1711
- unseen_count = self.value(
1712
- (
1713
- "SELECT COUNT(*) FROM "
1714
- + (unseen_table_name if on_cols else delta_table_name)
1715
- ), debug = debug,
1753
+ (create_unseen_success, create_unseen_msg), create_unseen_results = session_execute(
1754
+ session,
1755
+ create_unseen_query,
1756
+ with_results = True,
1757
+ debug = debug
1716
1758
  )
1717
-
1718
- update_queries = []
1719
- drop_update_query = f"DROP TABLE {update_table_name}"
1720
- if on_cols and table_exists(update_table_raw, self, debug=debug):
1721
- update_queries.append(drop_unseen_query)
1759
+ if not create_unseen_success:
1760
+ _ = clean_up_temp_tables()
1761
+ return create_unseen_success, create_unseen_msg
1722
1762
 
1723
1763
  select_update_query = (
1724
1764
  "SELECT "
@@ -1731,7 +1771,7 @@ def sync_pipe_inplace(
1731
1771
  + " AS " + sql_item_name(c, self.flavor, None)
1732
1772
  ) for c, typ in delta_cols.items()
1733
1773
  ]))
1734
- + f"\nFROM {joined_table_name}\n"
1774
+ + f"\nFROM {temp_table_names['joined']}\n"
1735
1775
  + f"WHERE "
1736
1776
  + '\nOR\n'.join([
1737
1777
  (
@@ -1741,39 +1781,28 @@ def sync_pipe_inplace(
1741
1781
  )
1742
1782
 
1743
1783
  create_update_query = get_create_table_query(
1744
- select_update_query, update_table_raw, self.flavor, self.get_pipe_schema(pipe)
1745
- )
1746
- update_queries.append(create_update_query)
1747
-
1748
- update_success = (
1749
- all(self.exec_queries(update_queries, break_on_error=True, debug=debug))
1750
- if on_cols else True
1751
- )
1752
- if not update_success:
1753
- self.exec_queries(
1754
- [
1755
- drop_new_query,
1756
- drop_backtrack_query,
1757
- drop_delta_query,
1758
- drop_joined_query,
1759
- drop_unseen_query,
1760
- drop_update_query,
1761
- ],
1762
- break_on_error = False,
1763
- debug = debug,
1764
- )
1765
- return False, "Could not determine updated data for {pipe}."
1766
- update_count = (
1767
- self.value(f"SELECT COUNT(*) FROM {update_table_name}", debug=debug)
1768
- if on_cols else 0
1784
+ select_update_query,
1785
+ temp_tables['update'],
1786
+ self.flavor,
1787
+ internal_schema,
1769
1788
  )
1770
-
1789
+ (create_update_success, create_update_msg), create_update_results = session_execute(
1790
+ session,
1791
+ create_update_query,
1792
+ with_results = True,
1793
+ debug = debug,
1794
+ ) if on_cols else ((True, "Success"), [])
1771
1795
  apply_update_queries = (
1772
1796
  get_update_queries(
1773
1797
  pipe.target,
1774
- update_table_raw,
1775
- self,
1798
+ temp_tables['update'],
1799
+ session,
1776
1800
  on_cols,
1801
+ upsert = upsert,
1802
+ schema = self.get_pipe_schema(pipe),
1803
+ patch_schema = internal_schema,
1804
+ datetime_col = pipe.columns.get('datetime', None),
1805
+ flavor = self.flavor,
1777
1806
  debug = debug
1778
1807
  )
1779
1808
  if on_cols else []
@@ -1784,34 +1813,46 @@ def sync_pipe_inplace(
1784
1813
  f"INSERT INTO {pipe_name} ({delta_cols_str})\n"
1785
1814
  + f"SELECT {delta_cols_str}\nFROM "
1786
1815
  + (
1787
- unseen_table_name
1816
+ temp_table_names['unseen']
1788
1817
  if on_cols
1789
- else delta_table_name
1818
+ else temp_table_names['delta']
1790
1819
  )
1791
1820
  ),
1792
1821
  ]
1793
1822
 
1794
- apply_queries = (
1795
- (apply_unseen_queries if unseen_count > 0 else [])
1796
- + (apply_update_queries if update_count > 0 else [])
1797
- + [
1798
- drop_new_query,
1799
- drop_backtrack_query,
1800
- drop_delta_query,
1801
- ] + (
1802
- [
1803
- drop_joined_query,
1804
- drop_unseen_query,
1805
- drop_update_query,
1806
- ] if on_cols else []
1807
- )
1823
+ (apply_unseen_success, apply_unseen_msg), apply_unseen_results = session_execute(
1824
+ session,
1825
+ apply_unseen_queries,
1826
+ with_results = True,
1827
+ debug = debug,
1808
1828
  )
1809
- success = all(self.exec_queries(apply_queries, break_on_error=False, debug=debug))
1810
- msg = (
1811
- f"Was not able to apply changes to {pipe}."
1812
- if not success else f"Inserted {unseen_count}, updated {update_count} rows."
1829
+ if not apply_unseen_success:
1830
+ _ = clean_up_temp_tables()
1831
+ return apply_unseen_success, apply_unseen_msg
1832
+ unseen_count = apply_unseen_results[0].rowcount if apply_unseen_results else 0
1833
+
1834
+ (apply_update_success, apply_update_msg), apply_update_results = session_execute(
1835
+ session,
1836
+ apply_update_queries,
1837
+ with_results = True,
1838
+ debug = debug,
1813
1839
  )
1814
- return success, msg
1840
+
1841
+ if not apply_update_success:
1842
+ _ = clean_up_temp_tables()
1843
+ return apply_update_success, apply_update_msg
1844
+ update_count = apply_update_results[0].rowcount if apply_update_results else 0
1845
+
1846
+ session.commit()
1847
+
1848
+ msg = f"Inserted {unseen_count}, updated {update_count} rows."
1849
+ _ = clean_up_temp_tables(ready_to_drop=True)
1850
+
1851
+ drop_stale_success, drop_stale_msg = self._drop_old_temporary_tables(refresh=False, debug=debug)
1852
+ if not drop_stale_success:
1853
+ warn(drop_stale_msg)
1854
+
1855
+ return True, msg
1815
1856
 
1816
1857
 
1817
1858
  def get_sync_time(
@@ -1936,7 +1977,12 @@ def pipe_exists(
1936
1977
 
1937
1978
  """
1938
1979
  from meerschaum.utils.sql import table_exists
1939
- exists = table_exists(pipe.target, self, debug=debug)
1980
+ exists = table_exists(
1981
+ pipe.target,
1982
+ self,
1983
+ schema = self.get_pipe_schema(pipe),
1984
+ debug = debug,
1985
+ )
1940
1986
  if debug:
1941
1987
  from meerschaum.utils.debug import dprint
1942
1988
  dprint(f"{pipe} " + ('exists.' if exists else 'does not exist.'))
@@ -2118,18 +2164,12 @@ def drop_pipe(
2118
2164
  """
2119
2165
  from meerschaum.utils.sql import table_exists, sql_item_name
2120
2166
  success = True
2121
- target, temp_target = pipe.target, '_' + pipe.target
2122
- target_name, temp_name = (
2123
- sql_item_name(target, self.flavor, self.get_pipe_schema(pipe)),
2124
- sql_item_name(temp_target, self.flavor, self.get_pipe_schema(pipe)),
2167
+ target = pipe.target
2168
+ target_name = (
2169
+ sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
2125
2170
  )
2126
2171
  if table_exists(target, self, debug=debug):
2127
2172
  success = self.exec(f"DROP TABLE {target_name}", silent=True, debug=debug) is not None
2128
- if table_exists(temp_target, self, debug=debug):
2129
- success = (
2130
- success
2131
- and self.exec(f"DROP TABLE {temp_name}", silent=True, debug=debug) is not None
2132
- )
2133
2173
 
2134
2174
  msg = "Success" if success else f"Failed to drop {pipe}."
2135
2175
  return success, msg
@@ -2247,7 +2287,7 @@ def get_pipe_columns_types(
2247
2287
  self,
2248
2288
  pipe: mrsm.Pipe,
2249
2289
  debug: bool = False,
2250
- ) -> Optional[Dict[str, str]]:
2290
+ ) -> Dict[str, str]:
2251
2291
  """
2252
2292
  Get the pipe's columns and types.
2253
2293
 
@@ -2283,7 +2323,7 @@ def get_pipe_columns_types(
2283
2323
  import traceback
2284
2324
  traceback.print_exc()
2285
2325
  warn(e)
2286
- table_columns = None
2326
+ table_columns = {}
2287
2327
 
2288
2328
  return table_columns
2289
2329
 
@@ -2813,7 +2853,7 @@ def deduplicate_pipe(
2813
2853
  cols_list_str = ', '.join(existing_cols_names)
2814
2854
 
2815
2855
  try:
2816
- ### NOTE: MySQL 5 and below does not support window functions (ROW_NUMBER()).
2856
+ ### NOTE: MySQL 5 and below does not support window functions (ROW_NUMBER()).
2817
2857
  is_old_mysql = (
2818
2858
  self.flavor in ('mysql', 'mariadb')
2819
2859
  and