meerschaum 2.6.16__py3-none-any.whl → 2.7.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- meerschaum/_internal/arguments/_parse_arguments.py +1 -1
- meerschaum/actions/delete.py +65 -69
- meerschaum/actions/edit.py +22 -2
- meerschaum/actions/install.py +1 -2
- meerschaum/actions/sync.py +2 -3
- meerschaum/api/routes/_pipes.py +7 -8
- meerschaum/config/_default.py +1 -1
- meerschaum/config/_paths.py +2 -1
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/api/_pipes.py +18 -21
- meerschaum/connectors/sql/_create_engine.py +3 -3
- meerschaum/connectors/sql/_instance.py +11 -12
- meerschaum/connectors/sql/_pipes.py +143 -91
- meerschaum/connectors/sql/_sql.py +43 -8
- meerschaum/connectors/valkey/_pipes.py +12 -1
- meerschaum/core/Pipe/__init__.py +23 -13
- meerschaum/core/Pipe/_attributes.py +25 -1
- meerschaum/core/Pipe/_dtypes.py +23 -16
- meerschaum/core/Pipe/_sync.py +59 -31
- meerschaum/core/Pipe/_verify.py +8 -7
- meerschaum/jobs/_Job.py +4 -1
- meerschaum/plugins/_Plugin.py +11 -14
- meerschaum/utils/daemon/Daemon.py +22 -15
- meerschaum/utils/dataframe.py +178 -16
- meerschaum/utils/dtypes/__init__.py +149 -14
- meerschaum/utils/dtypes/sql.py +41 -7
- meerschaum/utils/misc.py +8 -8
- meerschaum/utils/packages/_packages.py +1 -1
- meerschaum/utils/schedule.py +8 -3
- meerschaum/utils/sql.py +180 -100
- meerschaum/utils/venv/_Venv.py +4 -4
- meerschaum/utils/venv/__init__.py +53 -20
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/METADATA +2 -2
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/RECORD +40 -40
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/LICENSE +0 -0
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/NOTICE +0 -0
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/WHEEL +0 -0
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/top_level.txt +0 -0
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/zip-safe +0 -0
@@ -97,7 +97,6 @@ def edit_pipe(
|
|
97
97
|
if pipe.id is None:
|
98
98
|
return False, f"{pipe} is not registered and cannot be edited."
|
99
99
|
|
100
|
-
from meerschaum.utils.debug import dprint
|
101
100
|
from meerschaum.utils.packages import attempt_import
|
102
101
|
from meerschaum.utils.sql import json_flavors
|
103
102
|
if not patch:
|
@@ -172,7 +171,7 @@ def fetch_pipes_keys(
|
|
172
171
|
"""
|
173
172
|
from meerschaum.utils.debug import dprint
|
174
173
|
from meerschaum.utils.packages import attempt_import
|
175
|
-
from meerschaum.utils.misc import separate_negation_values
|
174
|
+
from meerschaum.utils.misc import separate_negation_values
|
176
175
|
from meerschaum.utils.sql import OMIT_NULLSFIRST_FLAVORS, table_exists
|
177
176
|
from meerschaum.config.static import STATIC_CONFIG
|
178
177
|
import json
|
@@ -316,7 +315,6 @@ def create_indices(
|
|
316
315
|
"""
|
317
316
|
Create a pipe's indices.
|
318
317
|
"""
|
319
|
-
from meerschaum.utils.sql import sql_item_name, update_queries
|
320
318
|
from meerschaum.utils.debug import dprint
|
321
319
|
if debug:
|
322
320
|
dprint(f"Creating indices for {pipe}...")
|
@@ -419,11 +417,14 @@ def get_create_index_queries(
|
|
419
417
|
existing_cols_indices = self.get_pipe_columns_indices(pipe, debug=debug)
|
420
418
|
existing_ix_names = set()
|
421
419
|
existing_primary_keys = []
|
420
|
+
existing_clustered_primary_keys = []
|
422
421
|
for col, col_indices in existing_cols_indices.items():
|
423
422
|
for col_ix_doc in col_indices:
|
424
423
|
existing_ix_names.add(col_ix_doc.get('name', None))
|
425
424
|
if col_ix_doc.get('type', None) == 'PRIMARY KEY':
|
426
425
|
existing_primary_keys.append(col)
|
426
|
+
if col_ix_doc.get('clustered', True):
|
427
|
+
existing_clustered_primary_keys.append(col)
|
427
428
|
|
428
429
|
_datetime = pipe.get_columns('datetime', error=False)
|
429
430
|
_datetime_name = (
|
@@ -460,10 +461,16 @@ def get_create_index_queries(
|
|
460
461
|
else None
|
461
462
|
)
|
462
463
|
primary_key_constraint_name = (
|
463
|
-
sql_item_name(f'
|
464
|
+
sql_item_name(f'PK_{pipe.target}', self.flavor, None)
|
464
465
|
if primary_key is not None
|
465
466
|
else None
|
466
467
|
)
|
468
|
+
primary_key_clustered = "CLUSTERED" if _datetime is None else "NONCLUSTERED"
|
469
|
+
datetime_clustered = (
|
470
|
+
"CLUSTERED"
|
471
|
+
if not existing_clustered_primary_keys and _datetime is not None
|
472
|
+
else "NONCLUSTERED"
|
473
|
+
)
|
467
474
|
|
468
475
|
_id_index_name = (
|
469
476
|
sql_item_name(index_names['id'], self.flavor, None)
|
@@ -474,6 +481,7 @@ def get_create_index_queries(
|
|
474
481
|
_create_space_partition = get_config('system', 'experimental', 'space')
|
475
482
|
|
476
483
|
### create datetime index
|
484
|
+
dt_query = None
|
477
485
|
if _datetime is not None:
|
478
486
|
if self.flavor == 'timescaledb' and pipe.parameters.get('hypertable', True):
|
479
487
|
_id_count = (
|
@@ -504,19 +512,19 @@ def get_create_index_queries(
|
|
504
512
|
+ 'if_not_exists => true, '
|
505
513
|
+ "migrate_data => true);"
|
506
514
|
)
|
507
|
-
elif
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
)
|
515
|
+
elif _datetime_index_name:
|
516
|
+
if self.flavor == 'mssql':
|
517
|
+
dt_query = (
|
518
|
+
f"CREATE {datetime_clustered} INDEX {_datetime_index_name} "
|
519
|
+
f"ON {_pipe_name} ({_datetime_name})"
|
520
|
+
)
|
521
|
+
else:
|
522
|
+
dt_query = (
|
523
|
+
f"CREATE INDEX {_datetime_index_name} "
|
524
|
+
+ f"ON {_pipe_name} ({_datetime_name})"
|
525
|
+
)
|
519
526
|
|
527
|
+
if dt_query:
|
520
528
|
index_queries[_datetime] = [dt_query]
|
521
529
|
|
522
530
|
primary_queries = []
|
@@ -623,7 +631,7 @@ def get_create_index_queries(
|
|
623
631
|
),
|
624
632
|
(
|
625
633
|
f"ALTER TABLE {_pipe_name}\n"
|
626
|
-
f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
|
634
|
+
f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY {primary_key_clustered} ({primary_key_name})"
|
627
635
|
),
|
628
636
|
])
|
629
637
|
index_queries[primary_key] = primary_queries
|
@@ -658,6 +666,8 @@ def get_create_index_queries(
|
|
658
666
|
cols = indices[ix_key]
|
659
667
|
if not isinstance(cols, (list, tuple)):
|
660
668
|
cols = [cols]
|
669
|
+
if ix_key == 'unique' and upsert:
|
670
|
+
continue
|
661
671
|
cols_names = [sql_item_name(col, self.flavor, None) for col in cols if col]
|
662
672
|
if not cols_names:
|
663
673
|
continue
|
@@ -785,8 +795,6 @@ def delete_pipe(
|
|
785
795
|
"""
|
786
796
|
Delete a Pipe's registration.
|
787
797
|
"""
|
788
|
-
from meerschaum.utils.sql import sql_item_name
|
789
|
-
from meerschaum.utils.debug import dprint
|
790
798
|
from meerschaum.utils.packages import attempt_import
|
791
799
|
sqlalchemy = attempt_import('sqlalchemy')
|
792
800
|
|
@@ -869,19 +877,19 @@ def get_pipe_data(
|
|
869
877
|
|
870
878
|
"""
|
871
879
|
import json
|
872
|
-
from meerschaum.utils.sql import sql_item_name
|
873
880
|
from meerschaum.utils.misc import parse_df_datetimes, to_pandas_dtype
|
874
881
|
from meerschaum.utils.packages import import_pandas
|
875
882
|
from meerschaum.utils.dtypes import (
|
876
883
|
attempt_cast_to_numeric,
|
877
884
|
attempt_cast_to_uuid,
|
885
|
+
attempt_cast_to_bytes,
|
878
886
|
are_dtypes_equal,
|
879
887
|
)
|
880
888
|
from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
|
881
889
|
pd = import_pandas()
|
882
890
|
is_dask = 'dask' in pd.__name__
|
883
891
|
|
884
|
-
cols_types = pipe.get_columns_types(debug=debug)
|
892
|
+
cols_types = pipe.get_columns_types(debug=debug) if pipe.enforce else {}
|
885
893
|
dtypes = {
|
886
894
|
**{
|
887
895
|
p_col: to_pandas_dtype(p_typ)
|
@@ -891,24 +899,21 @@ def get_pipe_data(
|
|
891
899
|
col: get_pd_type_from_db_type(typ)
|
892
900
|
for col, typ in cols_types.items()
|
893
901
|
}
|
894
|
-
}
|
902
|
+
} if pipe.enforce else {}
|
895
903
|
if dtypes:
|
896
904
|
if self.flavor == 'sqlite':
|
897
905
|
if not pipe.columns.get('datetime', None):
|
898
906
|
_dt = pipe.guess_datetime()
|
899
|
-
dt = sql_item_name(_dt, self.flavor, None) if _dt else None
|
900
|
-
is_guess = True
|
901
907
|
else:
|
902
908
|
_dt = pipe.get_columns('datetime')
|
903
|
-
dt = sql_item_name(_dt, self.flavor, None)
|
904
|
-
is_guess = False
|
905
909
|
|
906
910
|
if _dt:
|
907
911
|
dt_type = dtypes.get(_dt, 'object').lower()
|
908
912
|
if 'datetime' not in dt_type:
|
909
913
|
if 'int' not in dt_type:
|
910
914
|
dtypes[_dt] = 'datetime64[ns, UTC]'
|
911
|
-
|
915
|
+
|
916
|
+
existing_cols = cols_types.keys()
|
912
917
|
select_columns = (
|
913
918
|
[
|
914
919
|
col
|
@@ -922,14 +927,14 @@ def get_pipe_data(
|
|
922
927
|
if col in existing_cols
|
923
928
|
and col not in (omit_columns or [])
|
924
929
|
]
|
925
|
-
)
|
930
|
+
) if pipe.enforce else select_columns
|
926
931
|
if select_columns:
|
927
932
|
dtypes = {col: typ for col, typ in dtypes.items() if col in select_columns}
|
928
933
|
dtypes = {
|
929
934
|
col: to_pandas_dtype(typ)
|
930
935
|
for col, typ in dtypes.items()
|
931
936
|
if col in select_columns and col not in (omit_columns or [])
|
932
|
-
}
|
937
|
+
} if pipe.enforce else {}
|
933
938
|
query = self.get_pipe_data_query(
|
934
939
|
pipe,
|
935
940
|
select_columns=select_columns,
|
@@ -959,6 +964,11 @@ def get_pipe_data(
|
|
959
964
|
for col, typ in pipe.dtypes.items()
|
960
965
|
if typ == 'uuid' and col in dtypes
|
961
966
|
]
|
967
|
+
bytes_columns = [
|
968
|
+
col
|
969
|
+
for col, typ in pipe.dtypes.items()
|
970
|
+
if typ == 'bytes' and col in dtypes
|
971
|
+
]
|
962
972
|
|
963
973
|
kw['coerce_float'] = kw.get('coerce_float', (len(numeric_columns) == 0))
|
964
974
|
|
@@ -978,6 +988,11 @@ def get_pipe_data(
|
|
978
988
|
continue
|
979
989
|
df[col] = df[col].apply(attempt_cast_to_uuid)
|
980
990
|
|
991
|
+
for col in bytes_columns:
|
992
|
+
if col not in df.columns:
|
993
|
+
continue
|
994
|
+
df[col] = df[col].apply(attempt_cast_to_bytes)
|
995
|
+
|
981
996
|
if self.flavor == 'sqlite':
|
982
997
|
ignore_dt_cols = [
|
983
998
|
col
|
@@ -1093,12 +1108,13 @@ def get_pipe_data_query(
|
|
1093
1108
|
from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
|
1094
1109
|
|
1095
1110
|
dt_col = pipe.columns.get('datetime', None)
|
1096
|
-
existing_cols = pipe.get_columns_types(debug=debug)
|
1111
|
+
existing_cols = pipe.get_columns_types(debug=debug) if pipe.enforce else []
|
1112
|
+
skip_existing_cols_check = skip_existing_cols_check or not pipe.enforce
|
1097
1113
|
dt_typ = get_pd_type_from_db_type(existing_cols[dt_col]) if dt_col in existing_cols else None
|
1098
1114
|
select_columns = (
|
1099
1115
|
[col for col in existing_cols]
|
1100
1116
|
if not select_columns
|
1101
|
-
else [col for col in select_columns if col in existing_cols
|
1117
|
+
else [col for col in select_columns if skip_existing_cols_check or col in existing_cols]
|
1102
1118
|
)
|
1103
1119
|
if omit_columns:
|
1104
1120
|
select_columns = [col for col in select_columns if col not in omit_columns]
|
@@ -1185,7 +1201,7 @@ def get_pipe_data_query(
|
|
1185
1201
|
number=begin_add_minutes,
|
1186
1202
|
begin=begin,
|
1187
1203
|
)
|
1188
|
-
where += f"{dt} >= {begin_da}" + ("
|
1204
|
+
where += f"\n {dt} >= {begin_da}" + ("\n AND\n " if end is not None else "")
|
1189
1205
|
is_dt_bound = True
|
1190
1206
|
|
1191
1207
|
if end is not None and (_dt in existing_cols or skip_existing_cols_check):
|
@@ -1197,7 +1213,7 @@ def get_pipe_data_query(
|
|
1197
1213
|
number=end_add_minutes,
|
1198
1214
|
begin=end
|
1199
1215
|
)
|
1200
|
-
where += f"{dt} <
|
1216
|
+
where += f"{dt} < {end_da}"
|
1201
1217
|
is_dt_bound = True
|
1202
1218
|
|
1203
1219
|
if params is not None:
|
@@ -1209,7 +1225,7 @@ def get_pipe_data_query(
|
|
1209
1225
|
}
|
1210
1226
|
if valid_params:
|
1211
1227
|
where += build_where(valid_params, self).replace(
|
1212
|
-
'WHERE', ('AND' if is_dt_bound else "")
|
1228
|
+
'WHERE', (' AND' if is_dt_bound else " ")
|
1213
1229
|
)
|
1214
1230
|
|
1215
1231
|
if len(where) > 0:
|
@@ -1264,7 +1280,6 @@ def get_pipe_id(
|
|
1264
1280
|
if pipe.temporary:
|
1265
1281
|
return None
|
1266
1282
|
from meerschaum.utils.packages import attempt_import
|
1267
|
-
import json
|
1268
1283
|
sqlalchemy = attempt_import('sqlalchemy')
|
1269
1284
|
from meerschaum.connectors.sql.tables import get_tables
|
1270
1285
|
pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes']
|
@@ -1339,7 +1354,13 @@ def create_pipe_table_from_df(
|
|
1339
1354
|
"""
|
1340
1355
|
Create a pipe's table from its configured dtypes and an incoming dataframe.
|
1341
1356
|
"""
|
1342
|
-
from meerschaum.utils.dataframe import
|
1357
|
+
from meerschaum.utils.dataframe import (
|
1358
|
+
get_json_cols,
|
1359
|
+
get_numeric_cols,
|
1360
|
+
get_uuid_cols,
|
1361
|
+
get_datetime_cols,
|
1362
|
+
get_bytes_cols,
|
1363
|
+
)
|
1343
1364
|
from meerschaum.utils.sql import get_create_table_queries, sql_item_name
|
1344
1365
|
primary_key = pipe.columns.get('primary', None)
|
1345
1366
|
dt_col = pipe.columns.get('datetime', None)
|
@@ -1365,6 +1386,18 @@ def create_pipe_table_from_df(
|
|
1365
1386
|
col: 'numeric'
|
1366
1387
|
for col in get_numeric_cols(df)
|
1367
1388
|
},
|
1389
|
+
**{
|
1390
|
+
col: 'bytes'
|
1391
|
+
for col in get_bytes_cols(df)
|
1392
|
+
},
|
1393
|
+
**{
|
1394
|
+
col: 'datetime64[ns, UTC]'
|
1395
|
+
for col in get_datetime_cols(df, timezone_aware=True, timezone_naive=False)
|
1396
|
+
},
|
1397
|
+
**{
|
1398
|
+
col: 'datetime64[ns]'
|
1399
|
+
for col in get_datetime_cols(df, timezone_aware=False, timezone_naive=True)
|
1400
|
+
},
|
1368
1401
|
**pipe.dtypes
|
1369
1402
|
}
|
1370
1403
|
autoincrement = (
|
@@ -1455,11 +1488,9 @@ def sync_pipe(
|
|
1455
1488
|
get_update_queries,
|
1456
1489
|
sql_item_name,
|
1457
1490
|
update_queries,
|
1458
|
-
get_create_table_queries,
|
1459
1491
|
get_reset_autoincrement_queries,
|
1460
1492
|
)
|
1461
1493
|
from meerschaum.utils.misc import generate_password
|
1462
|
-
from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
|
1463
1494
|
from meerschaum.utils.dtypes import are_dtypes_equal
|
1464
1495
|
from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
|
1465
1496
|
from meerschaum import Pipe
|
@@ -1567,11 +1598,13 @@ def sync_pipe(
|
|
1567
1598
|
'if_exists': if_exists,
|
1568
1599
|
'debug': debug,
|
1569
1600
|
'as_dict': True,
|
1601
|
+
'safe_copy': kw.get('safe_copy', False),
|
1570
1602
|
'chunksize': chunksize,
|
1571
1603
|
'dtype': self.get_to_sql_dtype(pipe, unseen_df, update_dtypes=True),
|
1572
1604
|
'schema': self.get_pipe_schema(pipe),
|
1573
1605
|
})
|
1574
1606
|
|
1607
|
+
dt_col = pipe.columns.get('datetime', None)
|
1575
1608
|
primary_key = pipe.columns.get('primary', None)
|
1576
1609
|
autoincrement = (
|
1577
1610
|
pipe.parameters.get('autoincrement', False)
|
@@ -1589,17 +1622,23 @@ def sync_pipe(
|
|
1589
1622
|
if not edit_success:
|
1590
1623
|
return edit_success, edit_msg
|
1591
1624
|
|
1592
|
-
|
1625
|
+
def _check_pk(_df_to_clear):
|
1626
|
+
if _df_to_clear is None:
|
1627
|
+
return
|
1628
|
+
if primary_key not in _df_to_clear.columns:
|
1629
|
+
return
|
1630
|
+
if not _df_to_clear[primary_key].notnull().any():
|
1631
|
+
del _df_to_clear[primary_key]
|
1632
|
+
|
1633
|
+
autoincrement_needs_reset = bool(
|
1634
|
+
autoincrement
|
1635
|
+
and primary_key
|
1636
|
+
and primary_key in unseen_df.columns
|
1637
|
+
and unseen_df[primary_key].notnull().any()
|
1638
|
+
)
|
1593
1639
|
if autoincrement and primary_key:
|
1594
|
-
|
1595
|
-
|
1596
|
-
del unseen_df[primary_key]
|
1597
|
-
if update_df is not None and primary_key in update_df.columns:
|
1598
|
-
del update_df[primary_key]
|
1599
|
-
if delta_df is not None and primary_key in delta_df.columns:
|
1600
|
-
del delta_df[primary_key]
|
1601
|
-
elif unseen_df[primary_key].notnull().any():
|
1602
|
-
autoincrement_needs_reset = True
|
1640
|
+
for _df_to_clear in (unseen_df, update_df, delta_df):
|
1641
|
+
_check_pk(_df_to_clear)
|
1603
1642
|
|
1604
1643
|
if is_new:
|
1605
1644
|
create_success, create_msg = self.create_pipe_table_from_df(
|
@@ -1612,38 +1651,41 @@ def sync_pipe(
|
|
1612
1651
|
|
1613
1652
|
do_identity_insert = bool(
|
1614
1653
|
self.flavor in ('mssql',)
|
1654
|
+
and primary_key
|
1615
1655
|
and primary_key in unseen_df.columns
|
1616
1656
|
and autoincrement
|
1617
1657
|
)
|
1618
|
-
|
1619
|
-
|
1620
|
-
|
1621
|
-
|
1622
|
-
|
1623
|
-
|
1624
|
-
|
1625
|
-
|
1626
|
-
|
1627
|
-
|
1628
|
-
|
1629
|
-
|
1630
|
-
|
1631
|
-
|
1632
|
-
unseen_df,
|
1633
|
-
_connection=connection,
|
1634
|
-
**unseen_kw
|
1635
|
-
)
|
1658
|
+
stats = {'success': True, 'msg': 'Success'}
|
1659
|
+
if len(unseen_df) > 0:
|
1660
|
+
with self.engine.connect() as connection:
|
1661
|
+
with connection.begin():
|
1662
|
+
if do_identity_insert:
|
1663
|
+
identity_on_result = self.exec(
|
1664
|
+
f"SET IDENTITY_INSERT {pipe_name} ON",
|
1665
|
+
commit=False,
|
1666
|
+
_connection=connection,
|
1667
|
+
close=False,
|
1668
|
+
debug=debug,
|
1669
|
+
)
|
1670
|
+
if identity_on_result is None:
|
1671
|
+
return False, f"Could not enable identity inserts on {pipe}."
|
1636
1672
|
|
1637
|
-
|
1638
|
-
|
1639
|
-
f"SET IDENTITY_INSERT {pipe_name} OFF",
|
1640
|
-
commit=False,
|
1673
|
+
stats = self.to_sql(
|
1674
|
+
unseen_df,
|
1641
1675
|
_connection=connection,
|
1642
|
-
|
1643
|
-
debug=debug,
|
1676
|
+
**unseen_kw
|
1644
1677
|
)
|
1645
|
-
|
1646
|
-
|
1678
|
+
|
1679
|
+
if do_identity_insert:
|
1680
|
+
identity_off_result = self.exec(
|
1681
|
+
f"SET IDENTITY_INSERT {pipe_name} OFF",
|
1682
|
+
commit=False,
|
1683
|
+
_connection=connection,
|
1684
|
+
close=False,
|
1685
|
+
debug=debug,
|
1686
|
+
)
|
1687
|
+
if identity_off_result is None:
|
1688
|
+
return False, f"Could not disable identity inserts on {pipe}."
|
1647
1689
|
|
1648
1690
|
if is_new:
|
1649
1691
|
if not self.create_indices(pipe, debug=debug):
|
@@ -1682,11 +1724,12 @@ def sync_pipe(
|
|
1682
1724
|
},
|
1683
1725
|
target=temp_target,
|
1684
1726
|
temporary=True,
|
1727
|
+
enforce=False,
|
1728
|
+
static=True,
|
1729
|
+
autoincrement=False,
|
1685
1730
|
parameters={
|
1686
|
-
'
|
1687
|
-
'schema': self.internal_schema,
|
1731
|
+
'schema': (self.internal_schema if self.flavor != 'mssql' else None),
|
1688
1732
|
'hypertable': False,
|
1689
|
-
'autoincrement': False,
|
1690
1733
|
},
|
1691
1734
|
)
|
1692
1735
|
temp_pipe.__dict__['_columns_types'] = {
|
@@ -1707,7 +1750,11 @@ def sync_pipe(
|
|
1707
1750
|
col
|
1708
1751
|
for col_key, col in pipe.columns.items()
|
1709
1752
|
if col and col in existing_cols
|
1710
|
-
]
|
1753
|
+
] if not primary_key or self.flavor == 'oracle' else (
|
1754
|
+
[dt_col, primary_key]
|
1755
|
+
if self.flavor == 'timescaledb' and dt_col and dt_col in update_df.columns
|
1756
|
+
else [primary_key]
|
1757
|
+
)
|
1711
1758
|
update_queries = get_update_queries(
|
1712
1759
|
pipe.target,
|
1713
1760
|
temp_target,
|
@@ -1716,12 +1763,17 @@ def sync_pipe(
|
|
1716
1763
|
upsert=upsert,
|
1717
1764
|
schema=self.get_pipe_schema(pipe),
|
1718
1765
|
patch_schema=self.internal_schema,
|
1719
|
-
datetime_col=
|
1766
|
+
datetime_col=(dt_col if dt_col in update_df.columns else None),
|
1767
|
+
identity_insert=(autoincrement and primary_key in update_df.columns),
|
1720
1768
|
debug=debug,
|
1721
1769
|
)
|
1722
|
-
|
1723
|
-
|
1770
|
+
update_results = self.exec_queries(
|
1771
|
+
update_queries,
|
1772
|
+
break_on_error=True,
|
1773
|
+
rollback=True,
|
1774
|
+
debug=debug,
|
1724
1775
|
)
|
1776
|
+
update_success = all(update_results)
|
1725
1777
|
self._log_temporary_tables_creation(
|
1726
1778
|
temp_target,
|
1727
1779
|
ready_to_drop=True,
|
@@ -1730,6 +1782,8 @@ def sync_pipe(
|
|
1730
1782
|
)
|
1731
1783
|
if not update_success:
|
1732
1784
|
warn(f"Failed to apply update to {pipe}.")
|
1785
|
+
stats['success'] = stats['success'] and update_success
|
1786
|
+
stats['msg'] = (stats.get('msg', '') + f'\nFailed to apply update to {pipe}.').lstrip()
|
1733
1787
|
|
1734
1788
|
stop = time.perf_counter()
|
1735
1789
|
success = stats['success']
|
@@ -1834,7 +1888,6 @@ def sync_pipe_inplace(
|
|
1834
1888
|
session_execute,
|
1835
1889
|
update_queries,
|
1836
1890
|
)
|
1837
|
-
from meerschaum.utils.dtypes import are_dtypes_equal
|
1838
1891
|
from meerschaum.utils.dtypes.sql import (
|
1839
1892
|
get_pd_type_from_db_type,
|
1840
1893
|
)
|
@@ -1907,8 +1960,8 @@ def sync_pipe_inplace(
|
|
1907
1960
|
autoincrement=autoincrement,
|
1908
1961
|
datetime_column=dt_col,
|
1909
1962
|
)
|
1910
|
-
|
1911
|
-
if
|
1963
|
+
results = self.exec_queries(create_pipe_queries, debug=debug)
|
1964
|
+
if not all(results):
|
1912
1965
|
_ = clean_up_temp_tables()
|
1913
1966
|
return False, f"Could not insert new data into {pipe} from its SQL query definition."
|
1914
1967
|
|
@@ -2054,6 +2107,7 @@ def sync_pipe_inplace(
|
|
2054
2107
|
) if not (upsert or static) else new_cols_types
|
2055
2108
|
|
2056
2109
|
common_cols = [col for col in new_cols if col in backtrack_cols_types]
|
2110
|
+
primary_key = pipe.columns.get('primary', None)
|
2057
2111
|
on_cols = {
|
2058
2112
|
col: new_cols.get(col)
|
2059
2113
|
for col_key, col in pipe.columns.items()
|
@@ -2064,7 +2118,7 @@ def sync_pipe_inplace(
|
|
2064
2118
|
and col in backtrack_cols_types
|
2065
2119
|
and col in new_cols
|
2066
2120
|
)
|
2067
|
-
}
|
2121
|
+
} if not primary_key or self.flavor == 'oracle' else {primary_key: new_cols.get(primary_key)}
|
2068
2122
|
|
2069
2123
|
null_replace_new_cols_str = (
|
2070
2124
|
', '.join([
|
@@ -2591,7 +2645,7 @@ def get_pipe_rowcount(
|
|
2591
2645
|
result = self.value(query, debug=debug, silent=True)
|
2592
2646
|
try:
|
2593
2647
|
return int(result)
|
2594
|
-
except Exception
|
2648
|
+
except Exception:
|
2595
2649
|
return None
|
2596
2650
|
|
2597
2651
|
|
@@ -2616,10 +2670,11 @@ def drop_pipe(
|
|
2616
2670
|
from meerschaum.utils.sql import table_exists, sql_item_name, DROP_IF_EXISTS_FLAVORS
|
2617
2671
|
success = True
|
2618
2672
|
target = pipe.target
|
2673
|
+
schema = self.get_pipe_schema(pipe)
|
2619
2674
|
target_name = (
|
2620
|
-
sql_item_name(target, self.flavor,
|
2675
|
+
sql_item_name(target, self.flavor, schema)
|
2621
2676
|
)
|
2622
|
-
if table_exists(target, self, debug=debug):
|
2677
|
+
if table_exists(target, self, schema=schema, debug=debug):
|
2623
2678
|
if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
|
2624
2679
|
success = self.exec(
|
2625
2680
|
f"DROP TABLE {if_exists_str} {target_name}", silent=True, debug=debug
|
@@ -3330,9 +3385,7 @@ def deduplicate_pipe(
|
|
3330
3385
|
"""
|
3331
3386
|
from meerschaum.utils.sql import (
|
3332
3387
|
sql_item_name,
|
3333
|
-
NO_CTE_FLAVORS,
|
3334
3388
|
get_rename_table_queries,
|
3335
|
-
NO_SELECT_INTO_FLAVORS,
|
3336
3389
|
DROP_IF_EXISTS_FLAVORS,
|
3337
3390
|
get_create_table_query,
|
3338
3391
|
format_cte_subquery,
|
@@ -3454,7 +3507,6 @@ def deduplicate_pipe(
|
|
3454
3507
|
dedup_table = '-' + session_id + f'_dedup_{pipe.target}'
|
3455
3508
|
temp_old_table = '-' + session_id + f"_old_{pipe.target}"
|
3456
3509
|
|
3457
|
-
dedup_table_name = sql_item_name(dedup_table, self.flavor, self.get_pipe_schema(pipe))
|
3458
3510
|
temp_old_table_name = sql_item_name(temp_old_table, self.flavor, self.get_pipe_schema(pipe))
|
3459
3511
|
|
3460
3512
|
create_temporary_table_query = get_create_table_query(
|
@@ -624,7 +624,7 @@ def exec_queries(
|
|
624
624
|
rollback: bool = True,
|
625
625
|
silent: bool = False,
|
626
626
|
debug: bool = False,
|
627
|
-
) -> List[sqlalchemy.engine.cursor.
|
627
|
+
) -> List[Union[sqlalchemy.engine.cursor.CursorResult, None]]:
|
628
628
|
"""
|
629
629
|
Execute a list of queries in a single transaction.
|
630
630
|
|
@@ -688,6 +688,7 @@ def exec_queries(
|
|
688
688
|
if result is None and break_on_error:
|
689
689
|
if rollback:
|
690
690
|
session.rollback()
|
691
|
+
results.append(result)
|
691
692
|
break
|
692
693
|
elif result is not None and hook is not None:
|
693
694
|
hook_queries = hook(session)
|
@@ -715,6 +716,7 @@ def to_sql(
|
|
715
716
|
method: str = "",
|
716
717
|
chunksize: Optional[int] = -1,
|
717
718
|
schema: Optional[str] = None,
|
719
|
+
safe_copy: bool = True,
|
718
720
|
silent: bool = False,
|
719
721
|
debug: bool = False,
|
720
722
|
as_tuple: bool = False,
|
@@ -729,7 +731,7 @@ def to_sql(
|
|
729
731
|
Parameters
|
730
732
|
----------
|
731
733
|
df: pd.DataFrame
|
732
|
-
The DataFrame to be
|
734
|
+
The DataFrame to be inserted.
|
733
735
|
|
734
736
|
name: str
|
735
737
|
The name of the table to be created.
|
@@ -752,6 +754,9 @@ def to_sql(
|
|
752
754
|
Optionally override the schema for the table.
|
753
755
|
Defaults to `SQLConnector.schema`.
|
754
756
|
|
757
|
+
safe_copy: bool, defaul True
|
758
|
+
If `True`, copy the dataframe before making any changes.
|
759
|
+
|
755
760
|
as_tuple: bool, default False
|
756
761
|
If `True`, return a (success_bool, message) tuple instead of a `bool`.
|
757
762
|
Defaults to `False`.
|
@@ -770,8 +775,7 @@ def to_sql(
|
|
770
775
|
"""
|
771
776
|
import time
|
772
777
|
import json
|
773
|
-
import
|
774
|
-
from decimal import Decimal, Context
|
778
|
+
from decimal import Decimal
|
775
779
|
from meerschaum.utils.warnings import error, warn
|
776
780
|
import warnings
|
777
781
|
import functools
|
@@ -790,10 +794,21 @@ def to_sql(
|
|
790
794
|
truncate_item_name,
|
791
795
|
DROP_IF_EXISTS_FLAVORS,
|
792
796
|
)
|
793
|
-
from meerschaum.utils.dataframe import
|
794
|
-
|
797
|
+
from meerschaum.utils.dataframe import (
|
798
|
+
get_json_cols,
|
799
|
+
get_numeric_cols,
|
800
|
+
get_uuid_cols,
|
801
|
+
get_bytes_cols,
|
802
|
+
)
|
803
|
+
from meerschaum.utils.dtypes import (
|
804
|
+
are_dtypes_equal,
|
805
|
+
quantize_decimal,
|
806
|
+
coerce_timezone,
|
807
|
+
encode_bytes_for_bytea,
|
808
|
+
)
|
795
809
|
from meerschaum.utils.dtypes.sql import (
|
796
810
|
NUMERIC_PRECISION_FLAVORS,
|
811
|
+
NUMERIC_AS_TEXT_FLAVORS,
|
797
812
|
PD_TO_SQLALCHEMY_DTYPES_FLAVORS,
|
798
813
|
get_db_type_from_pd_type,
|
799
814
|
)
|
@@ -803,14 +818,35 @@ def to_sql(
|
|
803
818
|
pd = import_pandas()
|
804
819
|
is_dask = 'dask' in df.__module__
|
805
820
|
|
806
|
-
|
821
|
+
bytes_cols = get_bytes_cols(df)
|
822
|
+
numeric_cols = get_numeric_cols(df)
|
823
|
+
|
824
|
+
stats = {'target': name,}
|
807
825
|
### resort to defaults if None
|
826
|
+
copied = False
|
827
|
+
use_psql_copy = False
|
808
828
|
if method == "":
|
809
829
|
if self.flavor in _bulk_flavors:
|
810
830
|
method = functools.partial(psql_insert_copy, schema=self.schema)
|
831
|
+
use_psql_copy = True
|
811
832
|
else:
|
812
833
|
### Should resolve to 'multi' or `None`.
|
813
834
|
method = flavor_configs.get(self.flavor, {}).get('to_sql', {}).get('method', 'multi')
|
835
|
+
|
836
|
+
if bytes_cols and (use_psql_copy or self.flavor == 'oracle'):
|
837
|
+
if safe_copy and not copied:
|
838
|
+
df = df.copy()
|
839
|
+
copied = True
|
840
|
+
for col in bytes_cols:
|
841
|
+
df[col] = df[col].apply(encode_bytes_for_bytea, with_prefix=(self.flavor != 'oracle'))
|
842
|
+
|
843
|
+
if self.flavor in NUMERIC_AS_TEXT_FLAVORS:
|
844
|
+
if safe_copy and not copied:
|
845
|
+
df = df.copy()
|
846
|
+
copied = True
|
847
|
+
for col in numeric_cols:
|
848
|
+
df[col] = df[col].astype(str)
|
849
|
+
|
814
850
|
stats['method'] = method.__name__ if hasattr(method, '__name__') else str(method)
|
815
851
|
|
816
852
|
default_chunksize = self._sys_config.get('chunksize', None)
|
@@ -920,7 +956,6 @@ def to_sql(
|
|
920
956
|
### Check for numeric columns.
|
921
957
|
numeric_scale, numeric_precision = NUMERIC_PRECISION_FLAVORS.get(self.flavor, (None, None))
|
922
958
|
if numeric_precision is not None and numeric_scale is not None:
|
923
|
-
numeric_cols = get_numeric_cols(df)
|
924
959
|
for col in numeric_cols:
|
925
960
|
df[col] = df[col].apply(
|
926
961
|
lambda x: (
|