meerschaum 2.6.17__py3-none-any.whl → 2.7.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- meerschaum/actions/delete.py +65 -69
- meerschaum/actions/install.py +1 -2
- meerschaum/api/routes/_pipes.py +7 -8
- meerschaum/config/_default.py +1 -1
- meerschaum/config/_paths.py +2 -1
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/api/_pipes.py +18 -21
- meerschaum/connectors/sql/_instance.py +11 -12
- meerschaum/connectors/sql/_pipes.py +122 -78
- meerschaum/connectors/sql/_sql.py +43 -8
- meerschaum/connectors/valkey/_pipes.py +12 -1
- meerschaum/core/Pipe/__init__.py +23 -13
- meerschaum/core/Pipe/_attributes.py +25 -1
- meerschaum/core/Pipe/_dtypes.py +23 -16
- meerschaum/core/Pipe/_sync.py +59 -31
- meerschaum/core/Pipe/_verify.py +8 -7
- meerschaum/jobs/_Job.py +2 -0
- meerschaum/plugins/_Plugin.py +11 -14
- meerschaum/utils/daemon/Daemon.py +20 -13
- meerschaum/utils/dataframe.py +178 -16
- meerschaum/utils/dtypes/__init__.py +149 -14
- meerschaum/utils/dtypes/sql.py +41 -7
- meerschaum/utils/misc.py +8 -8
- meerschaum/utils/sql.py +174 -64
- meerschaum/utils/venv/_Venv.py +4 -4
- meerschaum/utils/venv/__init__.py +53 -20
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/METADATA +1 -1
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/RECORD +34 -34
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/LICENSE +0 -0
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/NOTICE +0 -0
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/WHEEL +0 -0
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/top_level.txt +0 -0
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/zip-safe +0 -0
@@ -97,7 +97,6 @@ def edit_pipe(
|
|
97
97
|
if pipe.id is None:
|
98
98
|
return False, f"{pipe} is not registered and cannot be edited."
|
99
99
|
|
100
|
-
from meerschaum.utils.debug import dprint
|
101
100
|
from meerschaum.utils.packages import attempt_import
|
102
101
|
from meerschaum.utils.sql import json_flavors
|
103
102
|
if not patch:
|
@@ -172,7 +171,7 @@ def fetch_pipes_keys(
|
|
172
171
|
"""
|
173
172
|
from meerschaum.utils.debug import dprint
|
174
173
|
from meerschaum.utils.packages import attempt_import
|
175
|
-
from meerschaum.utils.misc import separate_negation_values
|
174
|
+
from meerschaum.utils.misc import separate_negation_values
|
176
175
|
from meerschaum.utils.sql import OMIT_NULLSFIRST_FLAVORS, table_exists
|
177
176
|
from meerschaum.config.static import STATIC_CONFIG
|
178
177
|
import json
|
@@ -316,7 +315,6 @@ def create_indices(
|
|
316
315
|
"""
|
317
316
|
Create a pipe's indices.
|
318
317
|
"""
|
319
|
-
from meerschaum.utils.sql import sql_item_name, update_queries
|
320
318
|
from meerschaum.utils.debug import dprint
|
321
319
|
if debug:
|
322
320
|
dprint(f"Creating indices for {pipe}...")
|
@@ -419,11 +417,14 @@ def get_create_index_queries(
|
|
419
417
|
existing_cols_indices = self.get_pipe_columns_indices(pipe, debug=debug)
|
420
418
|
existing_ix_names = set()
|
421
419
|
existing_primary_keys = []
|
420
|
+
existing_clustered_primary_keys = []
|
422
421
|
for col, col_indices in existing_cols_indices.items():
|
423
422
|
for col_ix_doc in col_indices:
|
424
423
|
existing_ix_names.add(col_ix_doc.get('name', None))
|
425
424
|
if col_ix_doc.get('type', None) == 'PRIMARY KEY':
|
426
425
|
existing_primary_keys.append(col)
|
426
|
+
if col_ix_doc.get('clustered', True):
|
427
|
+
existing_clustered_primary_keys.append(col)
|
427
428
|
|
428
429
|
_datetime = pipe.get_columns('datetime', error=False)
|
429
430
|
_datetime_name = (
|
@@ -460,10 +461,16 @@ def get_create_index_queries(
|
|
460
461
|
else None
|
461
462
|
)
|
462
463
|
primary_key_constraint_name = (
|
463
|
-
sql_item_name(f'
|
464
|
+
sql_item_name(f'PK_{pipe.target}', self.flavor, None)
|
464
465
|
if primary_key is not None
|
465
466
|
else None
|
466
467
|
)
|
468
|
+
primary_key_clustered = "CLUSTERED" if _datetime is None else "NONCLUSTERED"
|
469
|
+
datetime_clustered = (
|
470
|
+
"CLUSTERED"
|
471
|
+
if not existing_clustered_primary_keys and _datetime is not None
|
472
|
+
else "NONCLUSTERED"
|
473
|
+
)
|
467
474
|
|
468
475
|
_id_index_name = (
|
469
476
|
sql_item_name(index_names['id'], self.flavor, None)
|
@@ -474,6 +481,7 @@ def get_create_index_queries(
|
|
474
481
|
_create_space_partition = get_config('system', 'experimental', 'space')
|
475
482
|
|
476
483
|
### create datetime index
|
484
|
+
dt_query = None
|
477
485
|
if _datetime is not None:
|
478
486
|
if self.flavor == 'timescaledb' and pipe.parameters.get('hypertable', True):
|
479
487
|
_id_count = (
|
@@ -504,19 +512,19 @@ def get_create_index_queries(
|
|
504
512
|
+ 'if_not_exists => true, '
|
505
513
|
+ "migrate_data => true);"
|
506
514
|
)
|
507
|
-
elif
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
)
|
515
|
+
elif _datetime_index_name:
|
516
|
+
if self.flavor == 'mssql':
|
517
|
+
dt_query = (
|
518
|
+
f"CREATE {datetime_clustered} INDEX {_datetime_index_name} "
|
519
|
+
f"ON {_pipe_name} ({_datetime_name})"
|
520
|
+
)
|
521
|
+
else:
|
522
|
+
dt_query = (
|
523
|
+
f"CREATE INDEX {_datetime_index_name} "
|
524
|
+
+ f"ON {_pipe_name} ({_datetime_name})"
|
525
|
+
)
|
519
526
|
|
527
|
+
if dt_query:
|
520
528
|
index_queries[_datetime] = [dt_query]
|
521
529
|
|
522
530
|
primary_queries = []
|
@@ -623,7 +631,7 @@ def get_create_index_queries(
|
|
623
631
|
),
|
624
632
|
(
|
625
633
|
f"ALTER TABLE {_pipe_name}\n"
|
626
|
-
f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
|
634
|
+
f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY {primary_key_clustered} ({primary_key_name})"
|
627
635
|
),
|
628
636
|
])
|
629
637
|
index_queries[primary_key] = primary_queries
|
@@ -658,6 +666,8 @@ def get_create_index_queries(
|
|
658
666
|
cols = indices[ix_key]
|
659
667
|
if not isinstance(cols, (list, tuple)):
|
660
668
|
cols = [cols]
|
669
|
+
if ix_key == 'unique' and upsert:
|
670
|
+
continue
|
661
671
|
cols_names = [sql_item_name(col, self.flavor, None) for col in cols if col]
|
662
672
|
if not cols_names:
|
663
673
|
continue
|
@@ -785,8 +795,6 @@ def delete_pipe(
|
|
785
795
|
"""
|
786
796
|
Delete a Pipe's registration.
|
787
797
|
"""
|
788
|
-
from meerschaum.utils.sql import sql_item_name
|
789
|
-
from meerschaum.utils.debug import dprint
|
790
798
|
from meerschaum.utils.packages import attempt_import
|
791
799
|
sqlalchemy = attempt_import('sqlalchemy')
|
792
800
|
|
@@ -869,19 +877,19 @@ def get_pipe_data(
|
|
869
877
|
|
870
878
|
"""
|
871
879
|
import json
|
872
|
-
from meerschaum.utils.sql import sql_item_name
|
873
880
|
from meerschaum.utils.misc import parse_df_datetimes, to_pandas_dtype
|
874
881
|
from meerschaum.utils.packages import import_pandas
|
875
882
|
from meerschaum.utils.dtypes import (
|
876
883
|
attempt_cast_to_numeric,
|
877
884
|
attempt_cast_to_uuid,
|
885
|
+
attempt_cast_to_bytes,
|
878
886
|
are_dtypes_equal,
|
879
887
|
)
|
880
888
|
from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
|
881
889
|
pd = import_pandas()
|
882
890
|
is_dask = 'dask' in pd.__name__
|
883
891
|
|
884
|
-
cols_types = pipe.get_columns_types(debug=debug)
|
892
|
+
cols_types = pipe.get_columns_types(debug=debug) if pipe.enforce else {}
|
885
893
|
dtypes = {
|
886
894
|
**{
|
887
895
|
p_col: to_pandas_dtype(p_typ)
|
@@ -891,24 +899,21 @@ def get_pipe_data(
|
|
891
899
|
col: get_pd_type_from_db_type(typ)
|
892
900
|
for col, typ in cols_types.items()
|
893
901
|
}
|
894
|
-
}
|
902
|
+
} if pipe.enforce else {}
|
895
903
|
if dtypes:
|
896
904
|
if self.flavor == 'sqlite':
|
897
905
|
if not pipe.columns.get('datetime', None):
|
898
906
|
_dt = pipe.guess_datetime()
|
899
|
-
dt = sql_item_name(_dt, self.flavor, None) if _dt else None
|
900
|
-
is_guess = True
|
901
907
|
else:
|
902
908
|
_dt = pipe.get_columns('datetime')
|
903
|
-
dt = sql_item_name(_dt, self.flavor, None)
|
904
|
-
is_guess = False
|
905
909
|
|
906
910
|
if _dt:
|
907
911
|
dt_type = dtypes.get(_dt, 'object').lower()
|
908
912
|
if 'datetime' not in dt_type:
|
909
913
|
if 'int' not in dt_type:
|
910
914
|
dtypes[_dt] = 'datetime64[ns, UTC]'
|
911
|
-
|
915
|
+
|
916
|
+
existing_cols = cols_types.keys()
|
912
917
|
select_columns = (
|
913
918
|
[
|
914
919
|
col
|
@@ -922,14 +927,14 @@ def get_pipe_data(
|
|
922
927
|
if col in existing_cols
|
923
928
|
and col not in (omit_columns or [])
|
924
929
|
]
|
925
|
-
)
|
930
|
+
) if pipe.enforce else select_columns
|
926
931
|
if select_columns:
|
927
932
|
dtypes = {col: typ for col, typ in dtypes.items() if col in select_columns}
|
928
933
|
dtypes = {
|
929
934
|
col: to_pandas_dtype(typ)
|
930
935
|
for col, typ in dtypes.items()
|
931
936
|
if col in select_columns and col not in (omit_columns or [])
|
932
|
-
}
|
937
|
+
} if pipe.enforce else {}
|
933
938
|
query = self.get_pipe_data_query(
|
934
939
|
pipe,
|
935
940
|
select_columns=select_columns,
|
@@ -959,6 +964,11 @@ def get_pipe_data(
|
|
959
964
|
for col, typ in pipe.dtypes.items()
|
960
965
|
if typ == 'uuid' and col in dtypes
|
961
966
|
]
|
967
|
+
bytes_columns = [
|
968
|
+
col
|
969
|
+
for col, typ in pipe.dtypes.items()
|
970
|
+
if typ == 'bytes' and col in dtypes
|
971
|
+
]
|
962
972
|
|
963
973
|
kw['coerce_float'] = kw.get('coerce_float', (len(numeric_columns) == 0))
|
964
974
|
|
@@ -978,6 +988,11 @@ def get_pipe_data(
|
|
978
988
|
continue
|
979
989
|
df[col] = df[col].apply(attempt_cast_to_uuid)
|
980
990
|
|
991
|
+
for col in bytes_columns:
|
992
|
+
if col not in df.columns:
|
993
|
+
continue
|
994
|
+
df[col] = df[col].apply(attempt_cast_to_bytes)
|
995
|
+
|
981
996
|
if self.flavor == 'sqlite':
|
982
997
|
ignore_dt_cols = [
|
983
998
|
col
|
@@ -1093,12 +1108,13 @@ def get_pipe_data_query(
|
|
1093
1108
|
from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
|
1094
1109
|
|
1095
1110
|
dt_col = pipe.columns.get('datetime', None)
|
1096
|
-
existing_cols = pipe.get_columns_types(debug=debug)
|
1111
|
+
existing_cols = pipe.get_columns_types(debug=debug) if pipe.enforce else []
|
1112
|
+
skip_existing_cols_check = skip_existing_cols_check or not pipe.enforce
|
1097
1113
|
dt_typ = get_pd_type_from_db_type(existing_cols[dt_col]) if dt_col in existing_cols else None
|
1098
1114
|
select_columns = (
|
1099
1115
|
[col for col in existing_cols]
|
1100
1116
|
if not select_columns
|
1101
|
-
else [col for col in select_columns if col in existing_cols
|
1117
|
+
else [col for col in select_columns if skip_existing_cols_check or col in existing_cols]
|
1102
1118
|
)
|
1103
1119
|
if omit_columns:
|
1104
1120
|
select_columns = [col for col in select_columns if col not in omit_columns]
|
@@ -1185,7 +1201,7 @@ def get_pipe_data_query(
|
|
1185
1201
|
number=begin_add_minutes,
|
1186
1202
|
begin=begin,
|
1187
1203
|
)
|
1188
|
-
where += f"{dt} >= {begin_da}" + ("
|
1204
|
+
where += f"\n {dt} >= {begin_da}" + ("\n AND\n " if end is not None else "")
|
1189
1205
|
is_dt_bound = True
|
1190
1206
|
|
1191
1207
|
if end is not None and (_dt in existing_cols or skip_existing_cols_check):
|
@@ -1197,7 +1213,7 @@ def get_pipe_data_query(
|
|
1197
1213
|
number=end_add_minutes,
|
1198
1214
|
begin=end
|
1199
1215
|
)
|
1200
|
-
where += f"{dt} <
|
1216
|
+
where += f"{dt} < {end_da}"
|
1201
1217
|
is_dt_bound = True
|
1202
1218
|
|
1203
1219
|
if params is not None:
|
@@ -1209,7 +1225,7 @@ def get_pipe_data_query(
|
|
1209
1225
|
}
|
1210
1226
|
if valid_params:
|
1211
1227
|
where += build_where(valid_params, self).replace(
|
1212
|
-
'WHERE', ('AND' if is_dt_bound else "")
|
1228
|
+
'WHERE', (' AND' if is_dt_bound else " ")
|
1213
1229
|
)
|
1214
1230
|
|
1215
1231
|
if len(where) > 0:
|
@@ -1264,7 +1280,6 @@ def get_pipe_id(
|
|
1264
1280
|
if pipe.temporary:
|
1265
1281
|
return None
|
1266
1282
|
from meerschaum.utils.packages import attempt_import
|
1267
|
-
import json
|
1268
1283
|
sqlalchemy = attempt_import('sqlalchemy')
|
1269
1284
|
from meerschaum.connectors.sql.tables import get_tables
|
1270
1285
|
pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes']
|
@@ -1339,7 +1354,13 @@ def create_pipe_table_from_df(
|
|
1339
1354
|
"""
|
1340
1355
|
Create a pipe's table from its configured dtypes and an incoming dataframe.
|
1341
1356
|
"""
|
1342
|
-
from meerschaum.utils.dataframe import
|
1357
|
+
from meerschaum.utils.dataframe import (
|
1358
|
+
get_json_cols,
|
1359
|
+
get_numeric_cols,
|
1360
|
+
get_uuid_cols,
|
1361
|
+
get_datetime_cols,
|
1362
|
+
get_bytes_cols,
|
1363
|
+
)
|
1343
1364
|
from meerschaum.utils.sql import get_create_table_queries, sql_item_name
|
1344
1365
|
primary_key = pipe.columns.get('primary', None)
|
1345
1366
|
dt_col = pipe.columns.get('datetime', None)
|
@@ -1365,6 +1386,18 @@ def create_pipe_table_from_df(
|
|
1365
1386
|
col: 'numeric'
|
1366
1387
|
for col in get_numeric_cols(df)
|
1367
1388
|
},
|
1389
|
+
**{
|
1390
|
+
col: 'bytes'
|
1391
|
+
for col in get_bytes_cols(df)
|
1392
|
+
},
|
1393
|
+
**{
|
1394
|
+
col: 'datetime64[ns, UTC]'
|
1395
|
+
for col in get_datetime_cols(df, timezone_aware=True, timezone_naive=False)
|
1396
|
+
},
|
1397
|
+
**{
|
1398
|
+
col: 'datetime64[ns]'
|
1399
|
+
for col in get_datetime_cols(df, timezone_aware=False, timezone_naive=True)
|
1400
|
+
},
|
1368
1401
|
**pipe.dtypes
|
1369
1402
|
}
|
1370
1403
|
autoincrement = (
|
@@ -1455,11 +1488,9 @@ def sync_pipe(
|
|
1455
1488
|
get_update_queries,
|
1456
1489
|
sql_item_name,
|
1457
1490
|
update_queries,
|
1458
|
-
get_create_table_queries,
|
1459
1491
|
get_reset_autoincrement_queries,
|
1460
1492
|
)
|
1461
1493
|
from meerschaum.utils.misc import generate_password
|
1462
|
-
from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
|
1463
1494
|
from meerschaum.utils.dtypes import are_dtypes_equal
|
1464
1495
|
from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
|
1465
1496
|
from meerschaum import Pipe
|
@@ -1567,11 +1598,13 @@ def sync_pipe(
|
|
1567
1598
|
'if_exists': if_exists,
|
1568
1599
|
'debug': debug,
|
1569
1600
|
'as_dict': True,
|
1601
|
+
'safe_copy': kw.get('safe_copy', False),
|
1570
1602
|
'chunksize': chunksize,
|
1571
1603
|
'dtype': self.get_to_sql_dtype(pipe, unseen_df, update_dtypes=True),
|
1572
1604
|
'schema': self.get_pipe_schema(pipe),
|
1573
1605
|
})
|
1574
1606
|
|
1607
|
+
dt_col = pipe.columns.get('datetime', None)
|
1575
1608
|
primary_key = pipe.columns.get('primary', None)
|
1576
1609
|
autoincrement = (
|
1577
1610
|
pipe.parameters.get('autoincrement', False)
|
@@ -1622,35 +1655,37 @@ def sync_pipe(
|
|
1622
1655
|
and primary_key in unseen_df.columns
|
1623
1656
|
and autoincrement
|
1624
1657
|
)
|
1625
|
-
|
1626
|
-
|
1627
|
-
|
1628
|
-
|
1629
|
-
|
1630
|
-
|
1631
|
-
|
1632
|
-
|
1633
|
-
|
1634
|
-
|
1635
|
-
|
1636
|
-
|
1637
|
-
|
1638
|
-
|
1639
|
-
unseen_df,
|
1640
|
-
_connection=connection,
|
1641
|
-
**unseen_kw
|
1642
|
-
)
|
1658
|
+
stats = {'success': True, 'msg': 'Success'}
|
1659
|
+
if len(unseen_df) > 0:
|
1660
|
+
with self.engine.connect() as connection:
|
1661
|
+
with connection.begin():
|
1662
|
+
if do_identity_insert:
|
1663
|
+
identity_on_result = self.exec(
|
1664
|
+
f"SET IDENTITY_INSERT {pipe_name} ON",
|
1665
|
+
commit=False,
|
1666
|
+
_connection=connection,
|
1667
|
+
close=False,
|
1668
|
+
debug=debug,
|
1669
|
+
)
|
1670
|
+
if identity_on_result is None:
|
1671
|
+
return False, f"Could not enable identity inserts on {pipe}."
|
1643
1672
|
|
1644
|
-
|
1645
|
-
|
1646
|
-
f"SET IDENTITY_INSERT {pipe_name} OFF",
|
1647
|
-
commit=False,
|
1673
|
+
stats = self.to_sql(
|
1674
|
+
unseen_df,
|
1648
1675
|
_connection=connection,
|
1649
|
-
|
1650
|
-
debug=debug,
|
1676
|
+
**unseen_kw
|
1651
1677
|
)
|
1652
|
-
|
1653
|
-
|
1678
|
+
|
1679
|
+
if do_identity_insert:
|
1680
|
+
identity_off_result = self.exec(
|
1681
|
+
f"SET IDENTITY_INSERT {pipe_name} OFF",
|
1682
|
+
commit=False,
|
1683
|
+
_connection=connection,
|
1684
|
+
close=False,
|
1685
|
+
debug=debug,
|
1686
|
+
)
|
1687
|
+
if identity_off_result is None:
|
1688
|
+
return False, f"Could not disable identity inserts on {pipe}."
|
1654
1689
|
|
1655
1690
|
if is_new:
|
1656
1691
|
if not self.create_indices(pipe, debug=debug):
|
@@ -1689,11 +1724,12 @@ def sync_pipe(
|
|
1689
1724
|
},
|
1690
1725
|
target=temp_target,
|
1691
1726
|
temporary=True,
|
1727
|
+
enforce=False,
|
1728
|
+
static=True,
|
1729
|
+
autoincrement=False,
|
1692
1730
|
parameters={
|
1693
|
-
'
|
1694
|
-
'schema': self.internal_schema,
|
1731
|
+
'schema': (self.internal_schema if self.flavor != 'mssql' else None),
|
1695
1732
|
'hypertable': False,
|
1696
|
-
'autoincrement': False,
|
1697
1733
|
},
|
1698
1734
|
)
|
1699
1735
|
temp_pipe.__dict__['_columns_types'] = {
|
@@ -1714,7 +1750,11 @@ def sync_pipe(
|
|
1714
1750
|
col
|
1715
1751
|
for col_key, col in pipe.columns.items()
|
1716
1752
|
if col and col in existing_cols
|
1717
|
-
]
|
1753
|
+
] if not primary_key or self.flavor == 'oracle' else (
|
1754
|
+
[dt_col, primary_key]
|
1755
|
+
if self.flavor == 'timescaledb' and dt_col and dt_col in update_df.columns
|
1756
|
+
else [primary_key]
|
1757
|
+
)
|
1718
1758
|
update_queries = get_update_queries(
|
1719
1759
|
pipe.target,
|
1720
1760
|
temp_target,
|
@@ -1723,12 +1763,17 @@ def sync_pipe(
|
|
1723
1763
|
upsert=upsert,
|
1724
1764
|
schema=self.get_pipe_schema(pipe),
|
1725
1765
|
patch_schema=self.internal_schema,
|
1726
|
-
datetime_col=
|
1766
|
+
datetime_col=(dt_col if dt_col in update_df.columns else None),
|
1767
|
+
identity_insert=(autoincrement and primary_key in update_df.columns),
|
1727
1768
|
debug=debug,
|
1728
1769
|
)
|
1729
|
-
|
1730
|
-
|
1770
|
+
update_results = self.exec_queries(
|
1771
|
+
update_queries,
|
1772
|
+
break_on_error=True,
|
1773
|
+
rollback=True,
|
1774
|
+
debug=debug,
|
1731
1775
|
)
|
1776
|
+
update_success = all(update_results)
|
1732
1777
|
self._log_temporary_tables_creation(
|
1733
1778
|
temp_target,
|
1734
1779
|
ready_to_drop=True,
|
@@ -1737,6 +1782,8 @@ def sync_pipe(
|
|
1737
1782
|
)
|
1738
1783
|
if not update_success:
|
1739
1784
|
warn(f"Failed to apply update to {pipe}.")
|
1785
|
+
stats['success'] = stats['success'] and update_success
|
1786
|
+
stats['msg'] = (stats.get('msg', '') + f'\nFailed to apply update to {pipe}.').lstrip()
|
1740
1787
|
|
1741
1788
|
stop = time.perf_counter()
|
1742
1789
|
success = stats['success']
|
@@ -1841,7 +1888,6 @@ def sync_pipe_inplace(
|
|
1841
1888
|
session_execute,
|
1842
1889
|
update_queries,
|
1843
1890
|
)
|
1844
|
-
from meerschaum.utils.dtypes import are_dtypes_equal
|
1845
1891
|
from meerschaum.utils.dtypes.sql import (
|
1846
1892
|
get_pd_type_from_db_type,
|
1847
1893
|
)
|
@@ -1914,8 +1960,8 @@ def sync_pipe_inplace(
|
|
1914
1960
|
autoincrement=autoincrement,
|
1915
1961
|
datetime_column=dt_col,
|
1916
1962
|
)
|
1917
|
-
|
1918
|
-
if
|
1963
|
+
results = self.exec_queries(create_pipe_queries, debug=debug)
|
1964
|
+
if not all(results):
|
1919
1965
|
_ = clean_up_temp_tables()
|
1920
1966
|
return False, f"Could not insert new data into {pipe} from its SQL query definition."
|
1921
1967
|
|
@@ -2061,6 +2107,7 @@ def sync_pipe_inplace(
|
|
2061
2107
|
) if not (upsert or static) else new_cols_types
|
2062
2108
|
|
2063
2109
|
common_cols = [col for col in new_cols if col in backtrack_cols_types]
|
2110
|
+
primary_key = pipe.columns.get('primary', None)
|
2064
2111
|
on_cols = {
|
2065
2112
|
col: new_cols.get(col)
|
2066
2113
|
for col_key, col in pipe.columns.items()
|
@@ -2071,7 +2118,7 @@ def sync_pipe_inplace(
|
|
2071
2118
|
and col in backtrack_cols_types
|
2072
2119
|
and col in new_cols
|
2073
2120
|
)
|
2074
|
-
}
|
2121
|
+
} if not primary_key or self.flavor == 'oracle' else {primary_key: new_cols.get(primary_key)}
|
2075
2122
|
|
2076
2123
|
null_replace_new_cols_str = (
|
2077
2124
|
', '.join([
|
@@ -3338,9 +3385,7 @@ def deduplicate_pipe(
|
|
3338
3385
|
"""
|
3339
3386
|
from meerschaum.utils.sql import (
|
3340
3387
|
sql_item_name,
|
3341
|
-
NO_CTE_FLAVORS,
|
3342
3388
|
get_rename_table_queries,
|
3343
|
-
NO_SELECT_INTO_FLAVORS,
|
3344
3389
|
DROP_IF_EXISTS_FLAVORS,
|
3345
3390
|
get_create_table_query,
|
3346
3391
|
format_cte_subquery,
|
@@ -3462,7 +3507,6 @@ def deduplicate_pipe(
|
|
3462
3507
|
dedup_table = '-' + session_id + f'_dedup_{pipe.target}'
|
3463
3508
|
temp_old_table = '-' + session_id + f"_old_{pipe.target}"
|
3464
3509
|
|
3465
|
-
dedup_table_name = sql_item_name(dedup_table, self.flavor, self.get_pipe_schema(pipe))
|
3466
3510
|
temp_old_table_name = sql_item_name(temp_old_table, self.flavor, self.get_pipe_schema(pipe))
|
3467
3511
|
|
3468
3512
|
create_temporary_table_query = get_create_table_query(
|
@@ -624,7 +624,7 @@ def exec_queries(
|
|
624
624
|
rollback: bool = True,
|
625
625
|
silent: bool = False,
|
626
626
|
debug: bool = False,
|
627
|
-
) -> List[sqlalchemy.engine.cursor.
|
627
|
+
) -> List[Union[sqlalchemy.engine.cursor.CursorResult, None]]:
|
628
628
|
"""
|
629
629
|
Execute a list of queries in a single transaction.
|
630
630
|
|
@@ -688,6 +688,7 @@ def exec_queries(
|
|
688
688
|
if result is None and break_on_error:
|
689
689
|
if rollback:
|
690
690
|
session.rollback()
|
691
|
+
results.append(result)
|
691
692
|
break
|
692
693
|
elif result is not None and hook is not None:
|
693
694
|
hook_queries = hook(session)
|
@@ -715,6 +716,7 @@ def to_sql(
|
|
715
716
|
method: str = "",
|
716
717
|
chunksize: Optional[int] = -1,
|
717
718
|
schema: Optional[str] = None,
|
719
|
+
safe_copy: bool = True,
|
718
720
|
silent: bool = False,
|
719
721
|
debug: bool = False,
|
720
722
|
as_tuple: bool = False,
|
@@ -729,7 +731,7 @@ def to_sql(
|
|
729
731
|
Parameters
|
730
732
|
----------
|
731
733
|
df: pd.DataFrame
|
732
|
-
The DataFrame to be
|
734
|
+
The DataFrame to be inserted.
|
733
735
|
|
734
736
|
name: str
|
735
737
|
The name of the table to be created.
|
@@ -752,6 +754,9 @@ def to_sql(
|
|
752
754
|
Optionally override the schema for the table.
|
753
755
|
Defaults to `SQLConnector.schema`.
|
754
756
|
|
757
|
+
safe_copy: bool, defaul True
|
758
|
+
If `True`, copy the dataframe before making any changes.
|
759
|
+
|
755
760
|
as_tuple: bool, default False
|
756
761
|
If `True`, return a (success_bool, message) tuple instead of a `bool`.
|
757
762
|
Defaults to `False`.
|
@@ -770,8 +775,7 @@ def to_sql(
|
|
770
775
|
"""
|
771
776
|
import time
|
772
777
|
import json
|
773
|
-
import
|
774
|
-
from decimal import Decimal, Context
|
778
|
+
from decimal import Decimal
|
775
779
|
from meerschaum.utils.warnings import error, warn
|
776
780
|
import warnings
|
777
781
|
import functools
|
@@ -790,10 +794,21 @@ def to_sql(
|
|
790
794
|
truncate_item_name,
|
791
795
|
DROP_IF_EXISTS_FLAVORS,
|
792
796
|
)
|
793
|
-
from meerschaum.utils.dataframe import
|
794
|
-
|
797
|
+
from meerschaum.utils.dataframe import (
|
798
|
+
get_json_cols,
|
799
|
+
get_numeric_cols,
|
800
|
+
get_uuid_cols,
|
801
|
+
get_bytes_cols,
|
802
|
+
)
|
803
|
+
from meerschaum.utils.dtypes import (
|
804
|
+
are_dtypes_equal,
|
805
|
+
quantize_decimal,
|
806
|
+
coerce_timezone,
|
807
|
+
encode_bytes_for_bytea,
|
808
|
+
)
|
795
809
|
from meerschaum.utils.dtypes.sql import (
|
796
810
|
NUMERIC_PRECISION_FLAVORS,
|
811
|
+
NUMERIC_AS_TEXT_FLAVORS,
|
797
812
|
PD_TO_SQLALCHEMY_DTYPES_FLAVORS,
|
798
813
|
get_db_type_from_pd_type,
|
799
814
|
)
|
@@ -803,14 +818,35 @@ def to_sql(
|
|
803
818
|
pd = import_pandas()
|
804
819
|
is_dask = 'dask' in df.__module__
|
805
820
|
|
806
|
-
|
821
|
+
bytes_cols = get_bytes_cols(df)
|
822
|
+
numeric_cols = get_numeric_cols(df)
|
823
|
+
|
824
|
+
stats = {'target': name,}
|
807
825
|
### resort to defaults if None
|
826
|
+
copied = False
|
827
|
+
use_psql_copy = False
|
808
828
|
if method == "":
|
809
829
|
if self.flavor in _bulk_flavors:
|
810
830
|
method = functools.partial(psql_insert_copy, schema=self.schema)
|
831
|
+
use_psql_copy = True
|
811
832
|
else:
|
812
833
|
### Should resolve to 'multi' or `None`.
|
813
834
|
method = flavor_configs.get(self.flavor, {}).get('to_sql', {}).get('method', 'multi')
|
835
|
+
|
836
|
+
if bytes_cols and (use_psql_copy or self.flavor == 'oracle'):
|
837
|
+
if safe_copy and not copied:
|
838
|
+
df = df.copy()
|
839
|
+
copied = True
|
840
|
+
for col in bytes_cols:
|
841
|
+
df[col] = df[col].apply(encode_bytes_for_bytea, with_prefix=(self.flavor != 'oracle'))
|
842
|
+
|
843
|
+
if self.flavor in NUMERIC_AS_TEXT_FLAVORS:
|
844
|
+
if safe_copy and not copied:
|
845
|
+
df = df.copy()
|
846
|
+
copied = True
|
847
|
+
for col in numeric_cols:
|
848
|
+
df[col] = df[col].astype(str)
|
849
|
+
|
814
850
|
stats['method'] = method.__name__ if hasattr(method, '__name__') else str(method)
|
815
851
|
|
816
852
|
default_chunksize = self._sys_config.get('chunksize', None)
|
@@ -920,7 +956,6 @@ def to_sql(
|
|
920
956
|
### Check for numeric columns.
|
921
957
|
numeric_scale, numeric_precision = NUMERIC_PRECISION_FLAVORS.get(self.flavor, (None, None))
|
922
958
|
if numeric_precision is not None and numeric_scale is not None:
|
923
|
-
numeric_cols = get_numeric_cols(df)
|
924
959
|
for col in numeric_cols:
|
925
960
|
df[col] = df[col].apply(
|
926
961
|
lambda x: (
|
@@ -46,9 +46,20 @@ def serialize_document(doc: Dict[str, Any]) -> str:
|
|
46
46
|
-------
|
47
47
|
A serialized string for the document.
|
48
48
|
"""
|
49
|
+
from meerschaum.utils.dtypes import serialize_bytes
|
49
50
|
return json.dumps(
|
50
51
|
doc,
|
51
|
-
default=(
|
52
|
+
default=(
|
53
|
+
lambda x: (
|
54
|
+
json_serialize_datetime(x)
|
55
|
+
if hasattr(x, 'tzinfo')
|
56
|
+
else (
|
57
|
+
serialize_bytes(x)
|
58
|
+
if isinstance(x, bytes)
|
59
|
+
else str(x)
|
60
|
+
)
|
61
|
+
)
|
62
|
+
),
|
52
63
|
separators=(',', ':'),
|
53
64
|
sort_keys=True,
|
54
65
|
)
|