meerschaum 2.6.16__py3-none-any.whl → 2.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/_internal/arguments/_parse_arguments.py +1 -1
- meerschaum/actions/delete.py +65 -69
- meerschaum/actions/edit.py +22 -2
- meerschaum/actions/install.py +1 -2
- meerschaum/actions/sync.py +2 -3
- meerschaum/api/routes/_pipes.py +7 -8
- meerschaum/config/_default.py +1 -1
- meerschaum/config/_paths.py +2 -1
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/api/_pipes.py +18 -21
- meerschaum/connectors/sql/_create_engine.py +3 -3
- meerschaum/connectors/sql/_instance.py +11 -12
- meerschaum/connectors/sql/_pipes.py +143 -91
- meerschaum/connectors/sql/_sql.py +43 -8
- meerschaum/connectors/valkey/_pipes.py +12 -1
- meerschaum/core/Pipe/__init__.py +23 -13
- meerschaum/core/Pipe/_attributes.py +25 -1
- meerschaum/core/Pipe/_dtypes.py +23 -16
- meerschaum/core/Pipe/_sync.py +59 -31
- meerschaum/core/Pipe/_verify.py +8 -7
- meerschaum/jobs/_Job.py +4 -1
- meerschaum/plugins/_Plugin.py +11 -14
- meerschaum/utils/daemon/Daemon.py +22 -15
- meerschaum/utils/dataframe.py +178 -16
- meerschaum/utils/dtypes/__init__.py +149 -14
- meerschaum/utils/dtypes/sql.py +41 -7
- meerschaum/utils/misc.py +8 -8
- meerschaum/utils/packages/_packages.py +1 -1
- meerschaum/utils/schedule.py +8 -3
- meerschaum/utils/sql.py +180 -100
- meerschaum/utils/venv/_Venv.py +4 -4
- meerschaum/utils/venv/__init__.py +53 -20
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/METADATA +2 -2
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/RECORD +40 -40
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/LICENSE +0 -0
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/NOTICE +0 -0
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/WHEEL +0 -0
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/top_level.txt +0 -0
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0.dist-info}/zip-safe +0 -0
@@ -97,7 +97,6 @@ def edit_pipe(
|
|
97
97
|
if pipe.id is None:
|
98
98
|
return False, f"{pipe} is not registered and cannot be edited."
|
99
99
|
|
100
|
-
from meerschaum.utils.debug import dprint
|
101
100
|
from meerschaum.utils.packages import attempt_import
|
102
101
|
from meerschaum.utils.sql import json_flavors
|
103
102
|
if not patch:
|
@@ -172,7 +171,7 @@ def fetch_pipes_keys(
|
|
172
171
|
"""
|
173
172
|
from meerschaum.utils.debug import dprint
|
174
173
|
from meerschaum.utils.packages import attempt_import
|
175
|
-
from meerschaum.utils.misc import separate_negation_values
|
174
|
+
from meerschaum.utils.misc import separate_negation_values
|
176
175
|
from meerschaum.utils.sql import OMIT_NULLSFIRST_FLAVORS, table_exists
|
177
176
|
from meerschaum.config.static import STATIC_CONFIG
|
178
177
|
import json
|
@@ -316,7 +315,6 @@ def create_indices(
|
|
316
315
|
"""
|
317
316
|
Create a pipe's indices.
|
318
317
|
"""
|
319
|
-
from meerschaum.utils.sql import sql_item_name, update_queries
|
320
318
|
from meerschaum.utils.debug import dprint
|
321
319
|
if debug:
|
322
320
|
dprint(f"Creating indices for {pipe}...")
|
@@ -419,11 +417,14 @@ def get_create_index_queries(
|
|
419
417
|
existing_cols_indices = self.get_pipe_columns_indices(pipe, debug=debug)
|
420
418
|
existing_ix_names = set()
|
421
419
|
existing_primary_keys = []
|
420
|
+
existing_clustered_primary_keys = []
|
422
421
|
for col, col_indices in existing_cols_indices.items():
|
423
422
|
for col_ix_doc in col_indices:
|
424
423
|
existing_ix_names.add(col_ix_doc.get('name', None))
|
425
424
|
if col_ix_doc.get('type', None) == 'PRIMARY KEY':
|
426
425
|
existing_primary_keys.append(col)
|
426
|
+
if col_ix_doc.get('clustered', True):
|
427
|
+
existing_clustered_primary_keys.append(col)
|
427
428
|
|
428
429
|
_datetime = pipe.get_columns('datetime', error=False)
|
429
430
|
_datetime_name = (
|
@@ -460,10 +461,16 @@ def get_create_index_queries(
|
|
460
461
|
else None
|
461
462
|
)
|
462
463
|
primary_key_constraint_name = (
|
463
|
-
sql_item_name(f'
|
464
|
+
sql_item_name(f'PK_{pipe.target}', self.flavor, None)
|
464
465
|
if primary_key is not None
|
465
466
|
else None
|
466
467
|
)
|
468
|
+
primary_key_clustered = "CLUSTERED" if _datetime is None else "NONCLUSTERED"
|
469
|
+
datetime_clustered = (
|
470
|
+
"CLUSTERED"
|
471
|
+
if not existing_clustered_primary_keys and _datetime is not None
|
472
|
+
else "NONCLUSTERED"
|
473
|
+
)
|
467
474
|
|
468
475
|
_id_index_name = (
|
469
476
|
sql_item_name(index_names['id'], self.flavor, None)
|
@@ -474,6 +481,7 @@ def get_create_index_queries(
|
|
474
481
|
_create_space_partition = get_config('system', 'experimental', 'space')
|
475
482
|
|
476
483
|
### create datetime index
|
484
|
+
dt_query = None
|
477
485
|
if _datetime is not None:
|
478
486
|
if self.flavor == 'timescaledb' and pipe.parameters.get('hypertable', True):
|
479
487
|
_id_count = (
|
@@ -504,19 +512,19 @@ def get_create_index_queries(
|
|
504
512
|
+ 'if_not_exists => true, '
|
505
513
|
+ "migrate_data => true);"
|
506
514
|
)
|
507
|
-
elif
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
)
|
515
|
+
elif _datetime_index_name:
|
516
|
+
if self.flavor == 'mssql':
|
517
|
+
dt_query = (
|
518
|
+
f"CREATE {datetime_clustered} INDEX {_datetime_index_name} "
|
519
|
+
f"ON {_pipe_name} ({_datetime_name})"
|
520
|
+
)
|
521
|
+
else:
|
522
|
+
dt_query = (
|
523
|
+
f"CREATE INDEX {_datetime_index_name} "
|
524
|
+
+ f"ON {_pipe_name} ({_datetime_name})"
|
525
|
+
)
|
519
526
|
|
527
|
+
if dt_query:
|
520
528
|
index_queries[_datetime] = [dt_query]
|
521
529
|
|
522
530
|
primary_queries = []
|
@@ -623,7 +631,7 @@ def get_create_index_queries(
|
|
623
631
|
),
|
624
632
|
(
|
625
633
|
f"ALTER TABLE {_pipe_name}\n"
|
626
|
-
f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
|
634
|
+
f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY {primary_key_clustered} ({primary_key_name})"
|
627
635
|
),
|
628
636
|
])
|
629
637
|
index_queries[primary_key] = primary_queries
|
@@ -658,6 +666,8 @@ def get_create_index_queries(
|
|
658
666
|
cols = indices[ix_key]
|
659
667
|
if not isinstance(cols, (list, tuple)):
|
660
668
|
cols = [cols]
|
669
|
+
if ix_key == 'unique' and upsert:
|
670
|
+
continue
|
661
671
|
cols_names = [sql_item_name(col, self.flavor, None) for col in cols if col]
|
662
672
|
if not cols_names:
|
663
673
|
continue
|
@@ -785,8 +795,6 @@ def delete_pipe(
|
|
785
795
|
"""
|
786
796
|
Delete a Pipe's registration.
|
787
797
|
"""
|
788
|
-
from meerschaum.utils.sql import sql_item_name
|
789
|
-
from meerschaum.utils.debug import dprint
|
790
798
|
from meerschaum.utils.packages import attempt_import
|
791
799
|
sqlalchemy = attempt_import('sqlalchemy')
|
792
800
|
|
@@ -869,19 +877,19 @@ def get_pipe_data(
|
|
869
877
|
|
870
878
|
"""
|
871
879
|
import json
|
872
|
-
from meerschaum.utils.sql import sql_item_name
|
873
880
|
from meerschaum.utils.misc import parse_df_datetimes, to_pandas_dtype
|
874
881
|
from meerschaum.utils.packages import import_pandas
|
875
882
|
from meerschaum.utils.dtypes import (
|
876
883
|
attempt_cast_to_numeric,
|
877
884
|
attempt_cast_to_uuid,
|
885
|
+
attempt_cast_to_bytes,
|
878
886
|
are_dtypes_equal,
|
879
887
|
)
|
880
888
|
from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
|
881
889
|
pd = import_pandas()
|
882
890
|
is_dask = 'dask' in pd.__name__
|
883
891
|
|
884
|
-
cols_types = pipe.get_columns_types(debug=debug)
|
892
|
+
cols_types = pipe.get_columns_types(debug=debug) if pipe.enforce else {}
|
885
893
|
dtypes = {
|
886
894
|
**{
|
887
895
|
p_col: to_pandas_dtype(p_typ)
|
@@ -891,24 +899,21 @@ def get_pipe_data(
|
|
891
899
|
col: get_pd_type_from_db_type(typ)
|
892
900
|
for col, typ in cols_types.items()
|
893
901
|
}
|
894
|
-
}
|
902
|
+
} if pipe.enforce else {}
|
895
903
|
if dtypes:
|
896
904
|
if self.flavor == 'sqlite':
|
897
905
|
if not pipe.columns.get('datetime', None):
|
898
906
|
_dt = pipe.guess_datetime()
|
899
|
-
dt = sql_item_name(_dt, self.flavor, None) if _dt else None
|
900
|
-
is_guess = True
|
901
907
|
else:
|
902
908
|
_dt = pipe.get_columns('datetime')
|
903
|
-
dt = sql_item_name(_dt, self.flavor, None)
|
904
|
-
is_guess = False
|
905
909
|
|
906
910
|
if _dt:
|
907
911
|
dt_type = dtypes.get(_dt, 'object').lower()
|
908
912
|
if 'datetime' not in dt_type:
|
909
913
|
if 'int' not in dt_type:
|
910
914
|
dtypes[_dt] = 'datetime64[ns, UTC]'
|
911
|
-
|
915
|
+
|
916
|
+
existing_cols = cols_types.keys()
|
912
917
|
select_columns = (
|
913
918
|
[
|
914
919
|
col
|
@@ -922,14 +927,14 @@ def get_pipe_data(
|
|
922
927
|
if col in existing_cols
|
923
928
|
and col not in (omit_columns or [])
|
924
929
|
]
|
925
|
-
)
|
930
|
+
) if pipe.enforce else select_columns
|
926
931
|
if select_columns:
|
927
932
|
dtypes = {col: typ for col, typ in dtypes.items() if col in select_columns}
|
928
933
|
dtypes = {
|
929
934
|
col: to_pandas_dtype(typ)
|
930
935
|
for col, typ in dtypes.items()
|
931
936
|
if col in select_columns and col not in (omit_columns or [])
|
932
|
-
}
|
937
|
+
} if pipe.enforce else {}
|
933
938
|
query = self.get_pipe_data_query(
|
934
939
|
pipe,
|
935
940
|
select_columns=select_columns,
|
@@ -959,6 +964,11 @@ def get_pipe_data(
|
|
959
964
|
for col, typ in pipe.dtypes.items()
|
960
965
|
if typ == 'uuid' and col in dtypes
|
961
966
|
]
|
967
|
+
bytes_columns = [
|
968
|
+
col
|
969
|
+
for col, typ in pipe.dtypes.items()
|
970
|
+
if typ == 'bytes' and col in dtypes
|
971
|
+
]
|
962
972
|
|
963
973
|
kw['coerce_float'] = kw.get('coerce_float', (len(numeric_columns) == 0))
|
964
974
|
|
@@ -978,6 +988,11 @@ def get_pipe_data(
|
|
978
988
|
continue
|
979
989
|
df[col] = df[col].apply(attempt_cast_to_uuid)
|
980
990
|
|
991
|
+
for col in bytes_columns:
|
992
|
+
if col not in df.columns:
|
993
|
+
continue
|
994
|
+
df[col] = df[col].apply(attempt_cast_to_bytes)
|
995
|
+
|
981
996
|
if self.flavor == 'sqlite':
|
982
997
|
ignore_dt_cols = [
|
983
998
|
col
|
@@ -1093,12 +1108,13 @@ def get_pipe_data_query(
|
|
1093
1108
|
from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
|
1094
1109
|
|
1095
1110
|
dt_col = pipe.columns.get('datetime', None)
|
1096
|
-
existing_cols = pipe.get_columns_types(debug=debug)
|
1111
|
+
existing_cols = pipe.get_columns_types(debug=debug) if pipe.enforce else []
|
1112
|
+
skip_existing_cols_check = skip_existing_cols_check or not pipe.enforce
|
1097
1113
|
dt_typ = get_pd_type_from_db_type(existing_cols[dt_col]) if dt_col in existing_cols else None
|
1098
1114
|
select_columns = (
|
1099
1115
|
[col for col in existing_cols]
|
1100
1116
|
if not select_columns
|
1101
|
-
else [col for col in select_columns if col in existing_cols
|
1117
|
+
else [col for col in select_columns if skip_existing_cols_check or col in existing_cols]
|
1102
1118
|
)
|
1103
1119
|
if omit_columns:
|
1104
1120
|
select_columns = [col for col in select_columns if col not in omit_columns]
|
@@ -1185,7 +1201,7 @@ def get_pipe_data_query(
|
|
1185
1201
|
number=begin_add_minutes,
|
1186
1202
|
begin=begin,
|
1187
1203
|
)
|
1188
|
-
where += f"{dt} >= {begin_da}" + ("
|
1204
|
+
where += f"\n {dt} >= {begin_da}" + ("\n AND\n " if end is not None else "")
|
1189
1205
|
is_dt_bound = True
|
1190
1206
|
|
1191
1207
|
if end is not None and (_dt in existing_cols or skip_existing_cols_check):
|
@@ -1197,7 +1213,7 @@ def get_pipe_data_query(
|
|
1197
1213
|
number=end_add_minutes,
|
1198
1214
|
begin=end
|
1199
1215
|
)
|
1200
|
-
where += f"{dt} <
|
1216
|
+
where += f"{dt} < {end_da}"
|
1201
1217
|
is_dt_bound = True
|
1202
1218
|
|
1203
1219
|
if params is not None:
|
@@ -1209,7 +1225,7 @@ def get_pipe_data_query(
|
|
1209
1225
|
}
|
1210
1226
|
if valid_params:
|
1211
1227
|
where += build_where(valid_params, self).replace(
|
1212
|
-
'WHERE', ('AND' if is_dt_bound else "")
|
1228
|
+
'WHERE', (' AND' if is_dt_bound else " ")
|
1213
1229
|
)
|
1214
1230
|
|
1215
1231
|
if len(where) > 0:
|
@@ -1264,7 +1280,6 @@ def get_pipe_id(
|
|
1264
1280
|
if pipe.temporary:
|
1265
1281
|
return None
|
1266
1282
|
from meerschaum.utils.packages import attempt_import
|
1267
|
-
import json
|
1268
1283
|
sqlalchemy = attempt_import('sqlalchemy')
|
1269
1284
|
from meerschaum.connectors.sql.tables import get_tables
|
1270
1285
|
pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes']
|
@@ -1339,7 +1354,13 @@ def create_pipe_table_from_df(
|
|
1339
1354
|
"""
|
1340
1355
|
Create a pipe's table from its configured dtypes and an incoming dataframe.
|
1341
1356
|
"""
|
1342
|
-
from meerschaum.utils.dataframe import
|
1357
|
+
from meerschaum.utils.dataframe import (
|
1358
|
+
get_json_cols,
|
1359
|
+
get_numeric_cols,
|
1360
|
+
get_uuid_cols,
|
1361
|
+
get_datetime_cols,
|
1362
|
+
get_bytes_cols,
|
1363
|
+
)
|
1343
1364
|
from meerschaum.utils.sql import get_create_table_queries, sql_item_name
|
1344
1365
|
primary_key = pipe.columns.get('primary', None)
|
1345
1366
|
dt_col = pipe.columns.get('datetime', None)
|
@@ -1365,6 +1386,18 @@ def create_pipe_table_from_df(
|
|
1365
1386
|
col: 'numeric'
|
1366
1387
|
for col in get_numeric_cols(df)
|
1367
1388
|
},
|
1389
|
+
**{
|
1390
|
+
col: 'bytes'
|
1391
|
+
for col in get_bytes_cols(df)
|
1392
|
+
},
|
1393
|
+
**{
|
1394
|
+
col: 'datetime64[ns, UTC]'
|
1395
|
+
for col in get_datetime_cols(df, timezone_aware=True, timezone_naive=False)
|
1396
|
+
},
|
1397
|
+
**{
|
1398
|
+
col: 'datetime64[ns]'
|
1399
|
+
for col in get_datetime_cols(df, timezone_aware=False, timezone_naive=True)
|
1400
|
+
},
|
1368
1401
|
**pipe.dtypes
|
1369
1402
|
}
|
1370
1403
|
autoincrement = (
|
@@ -1455,11 +1488,9 @@ def sync_pipe(
|
|
1455
1488
|
get_update_queries,
|
1456
1489
|
sql_item_name,
|
1457
1490
|
update_queries,
|
1458
|
-
get_create_table_queries,
|
1459
1491
|
get_reset_autoincrement_queries,
|
1460
1492
|
)
|
1461
1493
|
from meerschaum.utils.misc import generate_password
|
1462
|
-
from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
|
1463
1494
|
from meerschaum.utils.dtypes import are_dtypes_equal
|
1464
1495
|
from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
|
1465
1496
|
from meerschaum import Pipe
|
@@ -1567,11 +1598,13 @@ def sync_pipe(
|
|
1567
1598
|
'if_exists': if_exists,
|
1568
1599
|
'debug': debug,
|
1569
1600
|
'as_dict': True,
|
1601
|
+
'safe_copy': kw.get('safe_copy', False),
|
1570
1602
|
'chunksize': chunksize,
|
1571
1603
|
'dtype': self.get_to_sql_dtype(pipe, unseen_df, update_dtypes=True),
|
1572
1604
|
'schema': self.get_pipe_schema(pipe),
|
1573
1605
|
})
|
1574
1606
|
|
1607
|
+
dt_col = pipe.columns.get('datetime', None)
|
1575
1608
|
primary_key = pipe.columns.get('primary', None)
|
1576
1609
|
autoincrement = (
|
1577
1610
|
pipe.parameters.get('autoincrement', False)
|
@@ -1589,17 +1622,23 @@ def sync_pipe(
|
|
1589
1622
|
if not edit_success:
|
1590
1623
|
return edit_success, edit_msg
|
1591
1624
|
|
1592
|
-
|
1625
|
+
def _check_pk(_df_to_clear):
|
1626
|
+
if _df_to_clear is None:
|
1627
|
+
return
|
1628
|
+
if primary_key not in _df_to_clear.columns:
|
1629
|
+
return
|
1630
|
+
if not _df_to_clear[primary_key].notnull().any():
|
1631
|
+
del _df_to_clear[primary_key]
|
1632
|
+
|
1633
|
+
autoincrement_needs_reset = bool(
|
1634
|
+
autoincrement
|
1635
|
+
and primary_key
|
1636
|
+
and primary_key in unseen_df.columns
|
1637
|
+
and unseen_df[primary_key].notnull().any()
|
1638
|
+
)
|
1593
1639
|
if autoincrement and primary_key:
|
1594
|
-
|
1595
|
-
|
1596
|
-
del unseen_df[primary_key]
|
1597
|
-
if update_df is not None and primary_key in update_df.columns:
|
1598
|
-
del update_df[primary_key]
|
1599
|
-
if delta_df is not None and primary_key in delta_df.columns:
|
1600
|
-
del delta_df[primary_key]
|
1601
|
-
elif unseen_df[primary_key].notnull().any():
|
1602
|
-
autoincrement_needs_reset = True
|
1640
|
+
for _df_to_clear in (unseen_df, update_df, delta_df):
|
1641
|
+
_check_pk(_df_to_clear)
|
1603
1642
|
|
1604
1643
|
if is_new:
|
1605
1644
|
create_success, create_msg = self.create_pipe_table_from_df(
|
@@ -1612,38 +1651,41 @@ def sync_pipe(
|
|
1612
1651
|
|
1613
1652
|
do_identity_insert = bool(
|
1614
1653
|
self.flavor in ('mssql',)
|
1654
|
+
and primary_key
|
1615
1655
|
and primary_key in unseen_df.columns
|
1616
1656
|
and autoincrement
|
1617
1657
|
)
|
1618
|
-
|
1619
|
-
|
1620
|
-
|
1621
|
-
|
1622
|
-
|
1623
|
-
|
1624
|
-
|
1625
|
-
|
1626
|
-
|
1627
|
-
|
1628
|
-
|
1629
|
-
|
1630
|
-
|
1631
|
-
|
1632
|
-
unseen_df,
|
1633
|
-
_connection=connection,
|
1634
|
-
**unseen_kw
|
1635
|
-
)
|
1658
|
+
stats = {'success': True, 'msg': 'Success'}
|
1659
|
+
if len(unseen_df) > 0:
|
1660
|
+
with self.engine.connect() as connection:
|
1661
|
+
with connection.begin():
|
1662
|
+
if do_identity_insert:
|
1663
|
+
identity_on_result = self.exec(
|
1664
|
+
f"SET IDENTITY_INSERT {pipe_name} ON",
|
1665
|
+
commit=False,
|
1666
|
+
_connection=connection,
|
1667
|
+
close=False,
|
1668
|
+
debug=debug,
|
1669
|
+
)
|
1670
|
+
if identity_on_result is None:
|
1671
|
+
return False, f"Could not enable identity inserts on {pipe}."
|
1636
1672
|
|
1637
|
-
|
1638
|
-
|
1639
|
-
f"SET IDENTITY_INSERT {pipe_name} OFF",
|
1640
|
-
commit=False,
|
1673
|
+
stats = self.to_sql(
|
1674
|
+
unseen_df,
|
1641
1675
|
_connection=connection,
|
1642
|
-
|
1643
|
-
debug=debug,
|
1676
|
+
**unseen_kw
|
1644
1677
|
)
|
1645
|
-
|
1646
|
-
|
1678
|
+
|
1679
|
+
if do_identity_insert:
|
1680
|
+
identity_off_result = self.exec(
|
1681
|
+
f"SET IDENTITY_INSERT {pipe_name} OFF",
|
1682
|
+
commit=False,
|
1683
|
+
_connection=connection,
|
1684
|
+
close=False,
|
1685
|
+
debug=debug,
|
1686
|
+
)
|
1687
|
+
if identity_off_result is None:
|
1688
|
+
return False, f"Could not disable identity inserts on {pipe}."
|
1647
1689
|
|
1648
1690
|
if is_new:
|
1649
1691
|
if not self.create_indices(pipe, debug=debug):
|
@@ -1682,11 +1724,12 @@ def sync_pipe(
|
|
1682
1724
|
},
|
1683
1725
|
target=temp_target,
|
1684
1726
|
temporary=True,
|
1727
|
+
enforce=False,
|
1728
|
+
static=True,
|
1729
|
+
autoincrement=False,
|
1685
1730
|
parameters={
|
1686
|
-
'
|
1687
|
-
'schema': self.internal_schema,
|
1731
|
+
'schema': (self.internal_schema if self.flavor != 'mssql' else None),
|
1688
1732
|
'hypertable': False,
|
1689
|
-
'autoincrement': False,
|
1690
1733
|
},
|
1691
1734
|
)
|
1692
1735
|
temp_pipe.__dict__['_columns_types'] = {
|
@@ -1707,7 +1750,11 @@ def sync_pipe(
|
|
1707
1750
|
col
|
1708
1751
|
for col_key, col in pipe.columns.items()
|
1709
1752
|
if col and col in existing_cols
|
1710
|
-
]
|
1753
|
+
] if not primary_key or self.flavor == 'oracle' else (
|
1754
|
+
[dt_col, primary_key]
|
1755
|
+
if self.flavor == 'timescaledb' and dt_col and dt_col in update_df.columns
|
1756
|
+
else [primary_key]
|
1757
|
+
)
|
1711
1758
|
update_queries = get_update_queries(
|
1712
1759
|
pipe.target,
|
1713
1760
|
temp_target,
|
@@ -1716,12 +1763,17 @@ def sync_pipe(
|
|
1716
1763
|
upsert=upsert,
|
1717
1764
|
schema=self.get_pipe_schema(pipe),
|
1718
1765
|
patch_schema=self.internal_schema,
|
1719
|
-
datetime_col=
|
1766
|
+
datetime_col=(dt_col if dt_col in update_df.columns else None),
|
1767
|
+
identity_insert=(autoincrement and primary_key in update_df.columns),
|
1720
1768
|
debug=debug,
|
1721
1769
|
)
|
1722
|
-
|
1723
|
-
|
1770
|
+
update_results = self.exec_queries(
|
1771
|
+
update_queries,
|
1772
|
+
break_on_error=True,
|
1773
|
+
rollback=True,
|
1774
|
+
debug=debug,
|
1724
1775
|
)
|
1776
|
+
update_success = all(update_results)
|
1725
1777
|
self._log_temporary_tables_creation(
|
1726
1778
|
temp_target,
|
1727
1779
|
ready_to_drop=True,
|
@@ -1730,6 +1782,8 @@ def sync_pipe(
|
|
1730
1782
|
)
|
1731
1783
|
if not update_success:
|
1732
1784
|
warn(f"Failed to apply update to {pipe}.")
|
1785
|
+
stats['success'] = stats['success'] and update_success
|
1786
|
+
stats['msg'] = (stats.get('msg', '') + f'\nFailed to apply update to {pipe}.').lstrip()
|
1733
1787
|
|
1734
1788
|
stop = time.perf_counter()
|
1735
1789
|
success = stats['success']
|
@@ -1834,7 +1888,6 @@ def sync_pipe_inplace(
|
|
1834
1888
|
session_execute,
|
1835
1889
|
update_queries,
|
1836
1890
|
)
|
1837
|
-
from meerschaum.utils.dtypes import are_dtypes_equal
|
1838
1891
|
from meerschaum.utils.dtypes.sql import (
|
1839
1892
|
get_pd_type_from_db_type,
|
1840
1893
|
)
|
@@ -1907,8 +1960,8 @@ def sync_pipe_inplace(
|
|
1907
1960
|
autoincrement=autoincrement,
|
1908
1961
|
datetime_column=dt_col,
|
1909
1962
|
)
|
1910
|
-
|
1911
|
-
if
|
1963
|
+
results = self.exec_queries(create_pipe_queries, debug=debug)
|
1964
|
+
if not all(results):
|
1912
1965
|
_ = clean_up_temp_tables()
|
1913
1966
|
return False, f"Could not insert new data into {pipe} from its SQL query definition."
|
1914
1967
|
|
@@ -2054,6 +2107,7 @@ def sync_pipe_inplace(
|
|
2054
2107
|
) if not (upsert or static) else new_cols_types
|
2055
2108
|
|
2056
2109
|
common_cols = [col for col in new_cols if col in backtrack_cols_types]
|
2110
|
+
primary_key = pipe.columns.get('primary', None)
|
2057
2111
|
on_cols = {
|
2058
2112
|
col: new_cols.get(col)
|
2059
2113
|
for col_key, col in pipe.columns.items()
|
@@ -2064,7 +2118,7 @@ def sync_pipe_inplace(
|
|
2064
2118
|
and col in backtrack_cols_types
|
2065
2119
|
and col in new_cols
|
2066
2120
|
)
|
2067
|
-
}
|
2121
|
+
} if not primary_key or self.flavor == 'oracle' else {primary_key: new_cols.get(primary_key)}
|
2068
2122
|
|
2069
2123
|
null_replace_new_cols_str = (
|
2070
2124
|
', '.join([
|
@@ -2591,7 +2645,7 @@ def get_pipe_rowcount(
|
|
2591
2645
|
result = self.value(query, debug=debug, silent=True)
|
2592
2646
|
try:
|
2593
2647
|
return int(result)
|
2594
|
-
except Exception
|
2648
|
+
except Exception:
|
2595
2649
|
return None
|
2596
2650
|
|
2597
2651
|
|
@@ -2616,10 +2670,11 @@ def drop_pipe(
|
|
2616
2670
|
from meerschaum.utils.sql import table_exists, sql_item_name, DROP_IF_EXISTS_FLAVORS
|
2617
2671
|
success = True
|
2618
2672
|
target = pipe.target
|
2673
|
+
schema = self.get_pipe_schema(pipe)
|
2619
2674
|
target_name = (
|
2620
|
-
sql_item_name(target, self.flavor,
|
2675
|
+
sql_item_name(target, self.flavor, schema)
|
2621
2676
|
)
|
2622
|
-
if table_exists(target, self, debug=debug):
|
2677
|
+
if table_exists(target, self, schema=schema, debug=debug):
|
2623
2678
|
if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
|
2624
2679
|
success = self.exec(
|
2625
2680
|
f"DROP TABLE {if_exists_str} {target_name}", silent=True, debug=debug
|
@@ -3330,9 +3385,7 @@ def deduplicate_pipe(
|
|
3330
3385
|
"""
|
3331
3386
|
from meerschaum.utils.sql import (
|
3332
3387
|
sql_item_name,
|
3333
|
-
NO_CTE_FLAVORS,
|
3334
3388
|
get_rename_table_queries,
|
3335
|
-
NO_SELECT_INTO_FLAVORS,
|
3336
3389
|
DROP_IF_EXISTS_FLAVORS,
|
3337
3390
|
get_create_table_query,
|
3338
3391
|
format_cte_subquery,
|
@@ -3454,7 +3507,6 @@ def deduplicate_pipe(
|
|
3454
3507
|
dedup_table = '-' + session_id + f'_dedup_{pipe.target}'
|
3455
3508
|
temp_old_table = '-' + session_id + f"_old_{pipe.target}"
|
3456
3509
|
|
3457
|
-
dedup_table_name = sql_item_name(dedup_table, self.flavor, self.get_pipe_schema(pipe))
|
3458
3510
|
temp_old_table_name = sql_item_name(temp_old_table, self.flavor, self.get_pipe_schema(pipe))
|
3459
3511
|
|
3460
3512
|
create_temporary_table_query = get_create_table_query(
|
@@ -624,7 +624,7 @@ def exec_queries(
|
|
624
624
|
rollback: bool = True,
|
625
625
|
silent: bool = False,
|
626
626
|
debug: bool = False,
|
627
|
-
) -> List[sqlalchemy.engine.cursor.
|
627
|
+
) -> List[Union[sqlalchemy.engine.cursor.CursorResult, None]]:
|
628
628
|
"""
|
629
629
|
Execute a list of queries in a single transaction.
|
630
630
|
|
@@ -688,6 +688,7 @@ def exec_queries(
|
|
688
688
|
if result is None and break_on_error:
|
689
689
|
if rollback:
|
690
690
|
session.rollback()
|
691
|
+
results.append(result)
|
691
692
|
break
|
692
693
|
elif result is not None and hook is not None:
|
693
694
|
hook_queries = hook(session)
|
@@ -715,6 +716,7 @@ def to_sql(
|
|
715
716
|
method: str = "",
|
716
717
|
chunksize: Optional[int] = -1,
|
717
718
|
schema: Optional[str] = None,
|
719
|
+
safe_copy: bool = True,
|
718
720
|
silent: bool = False,
|
719
721
|
debug: bool = False,
|
720
722
|
as_tuple: bool = False,
|
@@ -729,7 +731,7 @@ def to_sql(
|
|
729
731
|
Parameters
|
730
732
|
----------
|
731
733
|
df: pd.DataFrame
|
732
|
-
The DataFrame to be
|
734
|
+
The DataFrame to be inserted.
|
733
735
|
|
734
736
|
name: str
|
735
737
|
The name of the table to be created.
|
@@ -752,6 +754,9 @@ def to_sql(
|
|
752
754
|
Optionally override the schema for the table.
|
753
755
|
Defaults to `SQLConnector.schema`.
|
754
756
|
|
757
|
+
safe_copy: bool, defaul True
|
758
|
+
If `True`, copy the dataframe before making any changes.
|
759
|
+
|
755
760
|
as_tuple: bool, default False
|
756
761
|
If `True`, return a (success_bool, message) tuple instead of a `bool`.
|
757
762
|
Defaults to `False`.
|
@@ -770,8 +775,7 @@ def to_sql(
|
|
770
775
|
"""
|
771
776
|
import time
|
772
777
|
import json
|
773
|
-
import
|
774
|
-
from decimal import Decimal, Context
|
778
|
+
from decimal import Decimal
|
775
779
|
from meerschaum.utils.warnings import error, warn
|
776
780
|
import warnings
|
777
781
|
import functools
|
@@ -790,10 +794,21 @@ def to_sql(
|
|
790
794
|
truncate_item_name,
|
791
795
|
DROP_IF_EXISTS_FLAVORS,
|
792
796
|
)
|
793
|
-
from meerschaum.utils.dataframe import
|
794
|
-
|
797
|
+
from meerschaum.utils.dataframe import (
|
798
|
+
get_json_cols,
|
799
|
+
get_numeric_cols,
|
800
|
+
get_uuid_cols,
|
801
|
+
get_bytes_cols,
|
802
|
+
)
|
803
|
+
from meerschaum.utils.dtypes import (
|
804
|
+
are_dtypes_equal,
|
805
|
+
quantize_decimal,
|
806
|
+
coerce_timezone,
|
807
|
+
encode_bytes_for_bytea,
|
808
|
+
)
|
795
809
|
from meerschaum.utils.dtypes.sql import (
|
796
810
|
NUMERIC_PRECISION_FLAVORS,
|
811
|
+
NUMERIC_AS_TEXT_FLAVORS,
|
797
812
|
PD_TO_SQLALCHEMY_DTYPES_FLAVORS,
|
798
813
|
get_db_type_from_pd_type,
|
799
814
|
)
|
@@ -803,14 +818,35 @@ def to_sql(
|
|
803
818
|
pd = import_pandas()
|
804
819
|
is_dask = 'dask' in df.__module__
|
805
820
|
|
806
|
-
|
821
|
+
bytes_cols = get_bytes_cols(df)
|
822
|
+
numeric_cols = get_numeric_cols(df)
|
823
|
+
|
824
|
+
stats = {'target': name,}
|
807
825
|
### resort to defaults if None
|
826
|
+
copied = False
|
827
|
+
use_psql_copy = False
|
808
828
|
if method == "":
|
809
829
|
if self.flavor in _bulk_flavors:
|
810
830
|
method = functools.partial(psql_insert_copy, schema=self.schema)
|
831
|
+
use_psql_copy = True
|
811
832
|
else:
|
812
833
|
### Should resolve to 'multi' or `None`.
|
813
834
|
method = flavor_configs.get(self.flavor, {}).get('to_sql', {}).get('method', 'multi')
|
835
|
+
|
836
|
+
if bytes_cols and (use_psql_copy or self.flavor == 'oracle'):
|
837
|
+
if safe_copy and not copied:
|
838
|
+
df = df.copy()
|
839
|
+
copied = True
|
840
|
+
for col in bytes_cols:
|
841
|
+
df[col] = df[col].apply(encode_bytes_for_bytea, with_prefix=(self.flavor != 'oracle'))
|
842
|
+
|
843
|
+
if self.flavor in NUMERIC_AS_TEXT_FLAVORS:
|
844
|
+
if safe_copy and not copied:
|
845
|
+
df = df.copy()
|
846
|
+
copied = True
|
847
|
+
for col in numeric_cols:
|
848
|
+
df[col] = df[col].astype(str)
|
849
|
+
|
814
850
|
stats['method'] = method.__name__ if hasattr(method, '__name__') else str(method)
|
815
851
|
|
816
852
|
default_chunksize = self._sys_config.get('chunksize', None)
|
@@ -920,7 +956,6 @@ def to_sql(
|
|
920
956
|
### Check for numeric columns.
|
921
957
|
numeric_scale, numeric_precision = NUMERIC_PRECISION_FLAVORS.get(self.flavor, (None, None))
|
922
958
|
if numeric_precision is not None and numeric_scale is not None:
|
923
|
-
numeric_cols = get_numeric_cols(df)
|
924
959
|
for col in numeric_cols:
|
925
960
|
df[col] = df[col].apply(
|
926
961
|
lambda x: (
|