meerschaum 2.6.17__py3-none-any.whl → 2.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/actions/delete.py +65 -69
- meerschaum/actions/install.py +1 -2
- meerschaum/api/routes/_pipes.py +7 -8
- meerschaum/config/_default.py +1 -1
- meerschaum/config/_paths.py +2 -1
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/api/_pipes.py +18 -21
- meerschaum/connectors/sql/_instance.py +11 -12
- meerschaum/connectors/sql/_pipes.py +122 -78
- meerschaum/connectors/sql/_sql.py +43 -8
- meerschaum/connectors/valkey/_pipes.py +12 -1
- meerschaum/core/Pipe/__init__.py +23 -13
- meerschaum/core/Pipe/_attributes.py +25 -1
- meerschaum/core/Pipe/_dtypes.py +23 -16
- meerschaum/core/Pipe/_sync.py +59 -31
- meerschaum/core/Pipe/_verify.py +8 -7
- meerschaum/jobs/_Job.py +2 -0
- meerschaum/plugins/_Plugin.py +11 -14
- meerschaum/utils/daemon/Daemon.py +20 -13
- meerschaum/utils/dataframe.py +178 -16
- meerschaum/utils/dtypes/__init__.py +149 -14
- meerschaum/utils/dtypes/sql.py +41 -7
- meerschaum/utils/misc.py +8 -8
- meerschaum/utils/sql.py +174 -64
- meerschaum/utils/venv/_Venv.py +4 -4
- meerschaum/utils/venv/__init__.py +53 -20
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/METADATA +1 -1
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/RECORD +34 -34
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/LICENSE +0 -0
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/NOTICE +0 -0
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/WHEEL +0 -0
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/top_level.txt +0 -0
- {meerschaum-2.6.17.dist-info → meerschaum-2.7.0.dist-info}/zip-safe +0 -0
@@ -97,7 +97,6 @@ def edit_pipe(
|
|
97
97
|
if pipe.id is None:
|
98
98
|
return False, f"{pipe} is not registered and cannot be edited."
|
99
99
|
|
100
|
-
from meerschaum.utils.debug import dprint
|
101
100
|
from meerschaum.utils.packages import attempt_import
|
102
101
|
from meerschaum.utils.sql import json_flavors
|
103
102
|
if not patch:
|
@@ -172,7 +171,7 @@ def fetch_pipes_keys(
|
|
172
171
|
"""
|
173
172
|
from meerschaum.utils.debug import dprint
|
174
173
|
from meerschaum.utils.packages import attempt_import
|
175
|
-
from meerschaum.utils.misc import separate_negation_values
|
174
|
+
from meerschaum.utils.misc import separate_negation_values
|
176
175
|
from meerschaum.utils.sql import OMIT_NULLSFIRST_FLAVORS, table_exists
|
177
176
|
from meerschaum.config.static import STATIC_CONFIG
|
178
177
|
import json
|
@@ -316,7 +315,6 @@ def create_indices(
|
|
316
315
|
"""
|
317
316
|
Create a pipe's indices.
|
318
317
|
"""
|
319
|
-
from meerschaum.utils.sql import sql_item_name, update_queries
|
320
318
|
from meerschaum.utils.debug import dprint
|
321
319
|
if debug:
|
322
320
|
dprint(f"Creating indices for {pipe}...")
|
@@ -419,11 +417,14 @@ def get_create_index_queries(
|
|
419
417
|
existing_cols_indices = self.get_pipe_columns_indices(pipe, debug=debug)
|
420
418
|
existing_ix_names = set()
|
421
419
|
existing_primary_keys = []
|
420
|
+
existing_clustered_primary_keys = []
|
422
421
|
for col, col_indices in existing_cols_indices.items():
|
423
422
|
for col_ix_doc in col_indices:
|
424
423
|
existing_ix_names.add(col_ix_doc.get('name', None))
|
425
424
|
if col_ix_doc.get('type', None) == 'PRIMARY KEY':
|
426
425
|
existing_primary_keys.append(col)
|
426
|
+
if col_ix_doc.get('clustered', True):
|
427
|
+
existing_clustered_primary_keys.append(col)
|
427
428
|
|
428
429
|
_datetime = pipe.get_columns('datetime', error=False)
|
429
430
|
_datetime_name = (
|
@@ -460,10 +461,16 @@ def get_create_index_queries(
|
|
460
461
|
else None
|
461
462
|
)
|
462
463
|
primary_key_constraint_name = (
|
463
|
-
sql_item_name(f'
|
464
|
+
sql_item_name(f'PK_{pipe.target}', self.flavor, None)
|
464
465
|
if primary_key is not None
|
465
466
|
else None
|
466
467
|
)
|
468
|
+
primary_key_clustered = "CLUSTERED" if _datetime is None else "NONCLUSTERED"
|
469
|
+
datetime_clustered = (
|
470
|
+
"CLUSTERED"
|
471
|
+
if not existing_clustered_primary_keys and _datetime is not None
|
472
|
+
else "NONCLUSTERED"
|
473
|
+
)
|
467
474
|
|
468
475
|
_id_index_name = (
|
469
476
|
sql_item_name(index_names['id'], self.flavor, None)
|
@@ -474,6 +481,7 @@ def get_create_index_queries(
|
|
474
481
|
_create_space_partition = get_config('system', 'experimental', 'space')
|
475
482
|
|
476
483
|
### create datetime index
|
484
|
+
dt_query = None
|
477
485
|
if _datetime is not None:
|
478
486
|
if self.flavor == 'timescaledb' and pipe.parameters.get('hypertable', True):
|
479
487
|
_id_count = (
|
@@ -504,19 +512,19 @@ def get_create_index_queries(
|
|
504
512
|
+ 'if_not_exists => true, '
|
505
513
|
+ "migrate_data => true);"
|
506
514
|
)
|
507
|
-
elif
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
)
|
515
|
+
elif _datetime_index_name:
|
516
|
+
if self.flavor == 'mssql':
|
517
|
+
dt_query = (
|
518
|
+
f"CREATE {datetime_clustered} INDEX {_datetime_index_name} "
|
519
|
+
f"ON {_pipe_name} ({_datetime_name})"
|
520
|
+
)
|
521
|
+
else:
|
522
|
+
dt_query = (
|
523
|
+
f"CREATE INDEX {_datetime_index_name} "
|
524
|
+
+ f"ON {_pipe_name} ({_datetime_name})"
|
525
|
+
)
|
519
526
|
|
527
|
+
if dt_query:
|
520
528
|
index_queries[_datetime] = [dt_query]
|
521
529
|
|
522
530
|
primary_queries = []
|
@@ -623,7 +631,7 @@ def get_create_index_queries(
|
|
623
631
|
),
|
624
632
|
(
|
625
633
|
f"ALTER TABLE {_pipe_name}\n"
|
626
|
-
f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
|
634
|
+
f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY {primary_key_clustered} ({primary_key_name})"
|
627
635
|
),
|
628
636
|
])
|
629
637
|
index_queries[primary_key] = primary_queries
|
@@ -658,6 +666,8 @@ def get_create_index_queries(
|
|
658
666
|
cols = indices[ix_key]
|
659
667
|
if not isinstance(cols, (list, tuple)):
|
660
668
|
cols = [cols]
|
669
|
+
if ix_key == 'unique' and upsert:
|
670
|
+
continue
|
661
671
|
cols_names = [sql_item_name(col, self.flavor, None) for col in cols if col]
|
662
672
|
if not cols_names:
|
663
673
|
continue
|
@@ -785,8 +795,6 @@ def delete_pipe(
|
|
785
795
|
"""
|
786
796
|
Delete a Pipe's registration.
|
787
797
|
"""
|
788
|
-
from meerschaum.utils.sql import sql_item_name
|
789
|
-
from meerschaum.utils.debug import dprint
|
790
798
|
from meerschaum.utils.packages import attempt_import
|
791
799
|
sqlalchemy = attempt_import('sqlalchemy')
|
792
800
|
|
@@ -869,19 +877,19 @@ def get_pipe_data(
|
|
869
877
|
|
870
878
|
"""
|
871
879
|
import json
|
872
|
-
from meerschaum.utils.sql import sql_item_name
|
873
880
|
from meerschaum.utils.misc import parse_df_datetimes, to_pandas_dtype
|
874
881
|
from meerschaum.utils.packages import import_pandas
|
875
882
|
from meerschaum.utils.dtypes import (
|
876
883
|
attempt_cast_to_numeric,
|
877
884
|
attempt_cast_to_uuid,
|
885
|
+
attempt_cast_to_bytes,
|
878
886
|
are_dtypes_equal,
|
879
887
|
)
|
880
888
|
from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
|
881
889
|
pd = import_pandas()
|
882
890
|
is_dask = 'dask' in pd.__name__
|
883
891
|
|
884
|
-
cols_types = pipe.get_columns_types(debug=debug)
|
892
|
+
cols_types = pipe.get_columns_types(debug=debug) if pipe.enforce else {}
|
885
893
|
dtypes = {
|
886
894
|
**{
|
887
895
|
p_col: to_pandas_dtype(p_typ)
|
@@ -891,24 +899,21 @@ def get_pipe_data(
|
|
891
899
|
col: get_pd_type_from_db_type(typ)
|
892
900
|
for col, typ in cols_types.items()
|
893
901
|
}
|
894
|
-
}
|
902
|
+
} if pipe.enforce else {}
|
895
903
|
if dtypes:
|
896
904
|
if self.flavor == 'sqlite':
|
897
905
|
if not pipe.columns.get('datetime', None):
|
898
906
|
_dt = pipe.guess_datetime()
|
899
|
-
dt = sql_item_name(_dt, self.flavor, None) if _dt else None
|
900
|
-
is_guess = True
|
901
907
|
else:
|
902
908
|
_dt = pipe.get_columns('datetime')
|
903
|
-
dt = sql_item_name(_dt, self.flavor, None)
|
904
|
-
is_guess = False
|
905
909
|
|
906
910
|
if _dt:
|
907
911
|
dt_type = dtypes.get(_dt, 'object').lower()
|
908
912
|
if 'datetime' not in dt_type:
|
909
913
|
if 'int' not in dt_type:
|
910
914
|
dtypes[_dt] = 'datetime64[ns, UTC]'
|
911
|
-
|
915
|
+
|
916
|
+
existing_cols = cols_types.keys()
|
912
917
|
select_columns = (
|
913
918
|
[
|
914
919
|
col
|
@@ -922,14 +927,14 @@ def get_pipe_data(
|
|
922
927
|
if col in existing_cols
|
923
928
|
and col not in (omit_columns or [])
|
924
929
|
]
|
925
|
-
)
|
930
|
+
) if pipe.enforce else select_columns
|
926
931
|
if select_columns:
|
927
932
|
dtypes = {col: typ for col, typ in dtypes.items() if col in select_columns}
|
928
933
|
dtypes = {
|
929
934
|
col: to_pandas_dtype(typ)
|
930
935
|
for col, typ in dtypes.items()
|
931
936
|
if col in select_columns and col not in (omit_columns or [])
|
932
|
-
}
|
937
|
+
} if pipe.enforce else {}
|
933
938
|
query = self.get_pipe_data_query(
|
934
939
|
pipe,
|
935
940
|
select_columns=select_columns,
|
@@ -959,6 +964,11 @@ def get_pipe_data(
|
|
959
964
|
for col, typ in pipe.dtypes.items()
|
960
965
|
if typ == 'uuid' and col in dtypes
|
961
966
|
]
|
967
|
+
bytes_columns = [
|
968
|
+
col
|
969
|
+
for col, typ in pipe.dtypes.items()
|
970
|
+
if typ == 'bytes' and col in dtypes
|
971
|
+
]
|
962
972
|
|
963
973
|
kw['coerce_float'] = kw.get('coerce_float', (len(numeric_columns) == 0))
|
964
974
|
|
@@ -978,6 +988,11 @@ def get_pipe_data(
|
|
978
988
|
continue
|
979
989
|
df[col] = df[col].apply(attempt_cast_to_uuid)
|
980
990
|
|
991
|
+
for col in bytes_columns:
|
992
|
+
if col not in df.columns:
|
993
|
+
continue
|
994
|
+
df[col] = df[col].apply(attempt_cast_to_bytes)
|
995
|
+
|
981
996
|
if self.flavor == 'sqlite':
|
982
997
|
ignore_dt_cols = [
|
983
998
|
col
|
@@ -1093,12 +1108,13 @@ def get_pipe_data_query(
|
|
1093
1108
|
from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
|
1094
1109
|
|
1095
1110
|
dt_col = pipe.columns.get('datetime', None)
|
1096
|
-
existing_cols = pipe.get_columns_types(debug=debug)
|
1111
|
+
existing_cols = pipe.get_columns_types(debug=debug) if pipe.enforce else []
|
1112
|
+
skip_existing_cols_check = skip_existing_cols_check or not pipe.enforce
|
1097
1113
|
dt_typ = get_pd_type_from_db_type(existing_cols[dt_col]) if dt_col in existing_cols else None
|
1098
1114
|
select_columns = (
|
1099
1115
|
[col for col in existing_cols]
|
1100
1116
|
if not select_columns
|
1101
|
-
else [col for col in select_columns if col in existing_cols
|
1117
|
+
else [col for col in select_columns if skip_existing_cols_check or col in existing_cols]
|
1102
1118
|
)
|
1103
1119
|
if omit_columns:
|
1104
1120
|
select_columns = [col for col in select_columns if col not in omit_columns]
|
@@ -1185,7 +1201,7 @@ def get_pipe_data_query(
|
|
1185
1201
|
number=begin_add_minutes,
|
1186
1202
|
begin=begin,
|
1187
1203
|
)
|
1188
|
-
where += f"{dt} >= {begin_da}" + ("
|
1204
|
+
where += f"\n {dt} >= {begin_da}" + ("\n AND\n " if end is not None else "")
|
1189
1205
|
is_dt_bound = True
|
1190
1206
|
|
1191
1207
|
if end is not None and (_dt in existing_cols or skip_existing_cols_check):
|
@@ -1197,7 +1213,7 @@ def get_pipe_data_query(
|
|
1197
1213
|
number=end_add_minutes,
|
1198
1214
|
begin=end
|
1199
1215
|
)
|
1200
|
-
where += f"{dt} <
|
1216
|
+
where += f"{dt} < {end_da}"
|
1201
1217
|
is_dt_bound = True
|
1202
1218
|
|
1203
1219
|
if params is not None:
|
@@ -1209,7 +1225,7 @@ def get_pipe_data_query(
|
|
1209
1225
|
}
|
1210
1226
|
if valid_params:
|
1211
1227
|
where += build_where(valid_params, self).replace(
|
1212
|
-
'WHERE', ('AND' if is_dt_bound else "")
|
1228
|
+
'WHERE', (' AND' if is_dt_bound else " ")
|
1213
1229
|
)
|
1214
1230
|
|
1215
1231
|
if len(where) > 0:
|
@@ -1264,7 +1280,6 @@ def get_pipe_id(
|
|
1264
1280
|
if pipe.temporary:
|
1265
1281
|
return None
|
1266
1282
|
from meerschaum.utils.packages import attempt_import
|
1267
|
-
import json
|
1268
1283
|
sqlalchemy = attempt_import('sqlalchemy')
|
1269
1284
|
from meerschaum.connectors.sql.tables import get_tables
|
1270
1285
|
pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes']
|
@@ -1339,7 +1354,13 @@ def create_pipe_table_from_df(
|
|
1339
1354
|
"""
|
1340
1355
|
Create a pipe's table from its configured dtypes and an incoming dataframe.
|
1341
1356
|
"""
|
1342
|
-
from meerschaum.utils.dataframe import
|
1357
|
+
from meerschaum.utils.dataframe import (
|
1358
|
+
get_json_cols,
|
1359
|
+
get_numeric_cols,
|
1360
|
+
get_uuid_cols,
|
1361
|
+
get_datetime_cols,
|
1362
|
+
get_bytes_cols,
|
1363
|
+
)
|
1343
1364
|
from meerschaum.utils.sql import get_create_table_queries, sql_item_name
|
1344
1365
|
primary_key = pipe.columns.get('primary', None)
|
1345
1366
|
dt_col = pipe.columns.get('datetime', None)
|
@@ -1365,6 +1386,18 @@ def create_pipe_table_from_df(
|
|
1365
1386
|
col: 'numeric'
|
1366
1387
|
for col in get_numeric_cols(df)
|
1367
1388
|
},
|
1389
|
+
**{
|
1390
|
+
col: 'bytes'
|
1391
|
+
for col in get_bytes_cols(df)
|
1392
|
+
},
|
1393
|
+
**{
|
1394
|
+
col: 'datetime64[ns, UTC]'
|
1395
|
+
for col in get_datetime_cols(df, timezone_aware=True, timezone_naive=False)
|
1396
|
+
},
|
1397
|
+
**{
|
1398
|
+
col: 'datetime64[ns]'
|
1399
|
+
for col in get_datetime_cols(df, timezone_aware=False, timezone_naive=True)
|
1400
|
+
},
|
1368
1401
|
**pipe.dtypes
|
1369
1402
|
}
|
1370
1403
|
autoincrement = (
|
@@ -1455,11 +1488,9 @@ def sync_pipe(
|
|
1455
1488
|
get_update_queries,
|
1456
1489
|
sql_item_name,
|
1457
1490
|
update_queries,
|
1458
|
-
get_create_table_queries,
|
1459
1491
|
get_reset_autoincrement_queries,
|
1460
1492
|
)
|
1461
1493
|
from meerschaum.utils.misc import generate_password
|
1462
|
-
from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
|
1463
1494
|
from meerschaum.utils.dtypes import are_dtypes_equal
|
1464
1495
|
from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
|
1465
1496
|
from meerschaum import Pipe
|
@@ -1567,11 +1598,13 @@ def sync_pipe(
|
|
1567
1598
|
'if_exists': if_exists,
|
1568
1599
|
'debug': debug,
|
1569
1600
|
'as_dict': True,
|
1601
|
+
'safe_copy': kw.get('safe_copy', False),
|
1570
1602
|
'chunksize': chunksize,
|
1571
1603
|
'dtype': self.get_to_sql_dtype(pipe, unseen_df, update_dtypes=True),
|
1572
1604
|
'schema': self.get_pipe_schema(pipe),
|
1573
1605
|
})
|
1574
1606
|
|
1607
|
+
dt_col = pipe.columns.get('datetime', None)
|
1575
1608
|
primary_key = pipe.columns.get('primary', None)
|
1576
1609
|
autoincrement = (
|
1577
1610
|
pipe.parameters.get('autoincrement', False)
|
@@ -1622,35 +1655,37 @@ def sync_pipe(
|
|
1622
1655
|
and primary_key in unseen_df.columns
|
1623
1656
|
and autoincrement
|
1624
1657
|
)
|
1625
|
-
|
1626
|
-
|
1627
|
-
|
1628
|
-
|
1629
|
-
|
1630
|
-
|
1631
|
-
|
1632
|
-
|
1633
|
-
|
1634
|
-
|
1635
|
-
|
1636
|
-
|
1637
|
-
|
1638
|
-
|
1639
|
-
unseen_df,
|
1640
|
-
_connection=connection,
|
1641
|
-
**unseen_kw
|
1642
|
-
)
|
1658
|
+
stats = {'success': True, 'msg': 'Success'}
|
1659
|
+
if len(unseen_df) > 0:
|
1660
|
+
with self.engine.connect() as connection:
|
1661
|
+
with connection.begin():
|
1662
|
+
if do_identity_insert:
|
1663
|
+
identity_on_result = self.exec(
|
1664
|
+
f"SET IDENTITY_INSERT {pipe_name} ON",
|
1665
|
+
commit=False,
|
1666
|
+
_connection=connection,
|
1667
|
+
close=False,
|
1668
|
+
debug=debug,
|
1669
|
+
)
|
1670
|
+
if identity_on_result is None:
|
1671
|
+
return False, f"Could not enable identity inserts on {pipe}."
|
1643
1672
|
|
1644
|
-
|
1645
|
-
|
1646
|
-
f"SET IDENTITY_INSERT {pipe_name} OFF",
|
1647
|
-
commit=False,
|
1673
|
+
stats = self.to_sql(
|
1674
|
+
unseen_df,
|
1648
1675
|
_connection=connection,
|
1649
|
-
|
1650
|
-
debug=debug,
|
1676
|
+
**unseen_kw
|
1651
1677
|
)
|
1652
|
-
|
1653
|
-
|
1678
|
+
|
1679
|
+
if do_identity_insert:
|
1680
|
+
identity_off_result = self.exec(
|
1681
|
+
f"SET IDENTITY_INSERT {pipe_name} OFF",
|
1682
|
+
commit=False,
|
1683
|
+
_connection=connection,
|
1684
|
+
close=False,
|
1685
|
+
debug=debug,
|
1686
|
+
)
|
1687
|
+
if identity_off_result is None:
|
1688
|
+
return False, f"Could not disable identity inserts on {pipe}."
|
1654
1689
|
|
1655
1690
|
if is_new:
|
1656
1691
|
if not self.create_indices(pipe, debug=debug):
|
@@ -1689,11 +1724,12 @@ def sync_pipe(
|
|
1689
1724
|
},
|
1690
1725
|
target=temp_target,
|
1691
1726
|
temporary=True,
|
1727
|
+
enforce=False,
|
1728
|
+
static=True,
|
1729
|
+
autoincrement=False,
|
1692
1730
|
parameters={
|
1693
|
-
'
|
1694
|
-
'schema': self.internal_schema,
|
1731
|
+
'schema': (self.internal_schema if self.flavor != 'mssql' else None),
|
1695
1732
|
'hypertable': False,
|
1696
|
-
'autoincrement': False,
|
1697
1733
|
},
|
1698
1734
|
)
|
1699
1735
|
temp_pipe.__dict__['_columns_types'] = {
|
@@ -1714,7 +1750,11 @@ def sync_pipe(
|
|
1714
1750
|
col
|
1715
1751
|
for col_key, col in pipe.columns.items()
|
1716
1752
|
if col and col in existing_cols
|
1717
|
-
]
|
1753
|
+
] if not primary_key or self.flavor == 'oracle' else (
|
1754
|
+
[dt_col, primary_key]
|
1755
|
+
if self.flavor == 'timescaledb' and dt_col and dt_col in update_df.columns
|
1756
|
+
else [primary_key]
|
1757
|
+
)
|
1718
1758
|
update_queries = get_update_queries(
|
1719
1759
|
pipe.target,
|
1720
1760
|
temp_target,
|
@@ -1723,12 +1763,17 @@ def sync_pipe(
|
|
1723
1763
|
upsert=upsert,
|
1724
1764
|
schema=self.get_pipe_schema(pipe),
|
1725
1765
|
patch_schema=self.internal_schema,
|
1726
|
-
datetime_col=
|
1766
|
+
datetime_col=(dt_col if dt_col in update_df.columns else None),
|
1767
|
+
identity_insert=(autoincrement and primary_key in update_df.columns),
|
1727
1768
|
debug=debug,
|
1728
1769
|
)
|
1729
|
-
|
1730
|
-
|
1770
|
+
update_results = self.exec_queries(
|
1771
|
+
update_queries,
|
1772
|
+
break_on_error=True,
|
1773
|
+
rollback=True,
|
1774
|
+
debug=debug,
|
1731
1775
|
)
|
1776
|
+
update_success = all(update_results)
|
1732
1777
|
self._log_temporary_tables_creation(
|
1733
1778
|
temp_target,
|
1734
1779
|
ready_to_drop=True,
|
@@ -1737,6 +1782,8 @@ def sync_pipe(
|
|
1737
1782
|
)
|
1738
1783
|
if not update_success:
|
1739
1784
|
warn(f"Failed to apply update to {pipe}.")
|
1785
|
+
stats['success'] = stats['success'] and update_success
|
1786
|
+
stats['msg'] = (stats.get('msg', '') + f'\nFailed to apply update to {pipe}.').lstrip()
|
1740
1787
|
|
1741
1788
|
stop = time.perf_counter()
|
1742
1789
|
success = stats['success']
|
@@ -1841,7 +1888,6 @@ def sync_pipe_inplace(
|
|
1841
1888
|
session_execute,
|
1842
1889
|
update_queries,
|
1843
1890
|
)
|
1844
|
-
from meerschaum.utils.dtypes import are_dtypes_equal
|
1845
1891
|
from meerschaum.utils.dtypes.sql import (
|
1846
1892
|
get_pd_type_from_db_type,
|
1847
1893
|
)
|
@@ -1914,8 +1960,8 @@ def sync_pipe_inplace(
|
|
1914
1960
|
autoincrement=autoincrement,
|
1915
1961
|
datetime_column=dt_col,
|
1916
1962
|
)
|
1917
|
-
|
1918
|
-
if
|
1963
|
+
results = self.exec_queries(create_pipe_queries, debug=debug)
|
1964
|
+
if not all(results):
|
1919
1965
|
_ = clean_up_temp_tables()
|
1920
1966
|
return False, f"Could not insert new data into {pipe} from its SQL query definition."
|
1921
1967
|
|
@@ -2061,6 +2107,7 @@ def sync_pipe_inplace(
|
|
2061
2107
|
) if not (upsert or static) else new_cols_types
|
2062
2108
|
|
2063
2109
|
common_cols = [col for col in new_cols if col in backtrack_cols_types]
|
2110
|
+
primary_key = pipe.columns.get('primary', None)
|
2064
2111
|
on_cols = {
|
2065
2112
|
col: new_cols.get(col)
|
2066
2113
|
for col_key, col in pipe.columns.items()
|
@@ -2071,7 +2118,7 @@ def sync_pipe_inplace(
|
|
2071
2118
|
and col in backtrack_cols_types
|
2072
2119
|
and col in new_cols
|
2073
2120
|
)
|
2074
|
-
}
|
2121
|
+
} if not primary_key or self.flavor == 'oracle' else {primary_key: new_cols.get(primary_key)}
|
2075
2122
|
|
2076
2123
|
null_replace_new_cols_str = (
|
2077
2124
|
', '.join([
|
@@ -3338,9 +3385,7 @@ def deduplicate_pipe(
|
|
3338
3385
|
"""
|
3339
3386
|
from meerschaum.utils.sql import (
|
3340
3387
|
sql_item_name,
|
3341
|
-
NO_CTE_FLAVORS,
|
3342
3388
|
get_rename_table_queries,
|
3343
|
-
NO_SELECT_INTO_FLAVORS,
|
3344
3389
|
DROP_IF_EXISTS_FLAVORS,
|
3345
3390
|
get_create_table_query,
|
3346
3391
|
format_cte_subquery,
|
@@ -3462,7 +3507,6 @@ def deduplicate_pipe(
|
|
3462
3507
|
dedup_table = '-' + session_id + f'_dedup_{pipe.target}'
|
3463
3508
|
temp_old_table = '-' + session_id + f"_old_{pipe.target}"
|
3464
3509
|
|
3465
|
-
dedup_table_name = sql_item_name(dedup_table, self.flavor, self.get_pipe_schema(pipe))
|
3466
3510
|
temp_old_table_name = sql_item_name(temp_old_table, self.flavor, self.get_pipe_schema(pipe))
|
3467
3511
|
|
3468
3512
|
create_temporary_table_query = get_create_table_query(
|
@@ -624,7 +624,7 @@ def exec_queries(
|
|
624
624
|
rollback: bool = True,
|
625
625
|
silent: bool = False,
|
626
626
|
debug: bool = False,
|
627
|
-
) -> List[sqlalchemy.engine.cursor.
|
627
|
+
) -> List[Union[sqlalchemy.engine.cursor.CursorResult, None]]:
|
628
628
|
"""
|
629
629
|
Execute a list of queries in a single transaction.
|
630
630
|
|
@@ -688,6 +688,7 @@ def exec_queries(
|
|
688
688
|
if result is None and break_on_error:
|
689
689
|
if rollback:
|
690
690
|
session.rollback()
|
691
|
+
results.append(result)
|
691
692
|
break
|
692
693
|
elif result is not None and hook is not None:
|
693
694
|
hook_queries = hook(session)
|
@@ -715,6 +716,7 @@ def to_sql(
|
|
715
716
|
method: str = "",
|
716
717
|
chunksize: Optional[int] = -1,
|
717
718
|
schema: Optional[str] = None,
|
719
|
+
safe_copy: bool = True,
|
718
720
|
silent: bool = False,
|
719
721
|
debug: bool = False,
|
720
722
|
as_tuple: bool = False,
|
@@ -729,7 +731,7 @@ def to_sql(
|
|
729
731
|
Parameters
|
730
732
|
----------
|
731
733
|
df: pd.DataFrame
|
732
|
-
The DataFrame to be
|
734
|
+
The DataFrame to be inserted.
|
733
735
|
|
734
736
|
name: str
|
735
737
|
The name of the table to be created.
|
@@ -752,6 +754,9 @@ def to_sql(
|
|
752
754
|
Optionally override the schema for the table.
|
753
755
|
Defaults to `SQLConnector.schema`.
|
754
756
|
|
757
|
+
safe_copy: bool, defaul True
|
758
|
+
If `True`, copy the dataframe before making any changes.
|
759
|
+
|
755
760
|
as_tuple: bool, default False
|
756
761
|
If `True`, return a (success_bool, message) tuple instead of a `bool`.
|
757
762
|
Defaults to `False`.
|
@@ -770,8 +775,7 @@ def to_sql(
|
|
770
775
|
"""
|
771
776
|
import time
|
772
777
|
import json
|
773
|
-
import
|
774
|
-
from decimal import Decimal, Context
|
778
|
+
from decimal import Decimal
|
775
779
|
from meerschaum.utils.warnings import error, warn
|
776
780
|
import warnings
|
777
781
|
import functools
|
@@ -790,10 +794,21 @@ def to_sql(
|
|
790
794
|
truncate_item_name,
|
791
795
|
DROP_IF_EXISTS_FLAVORS,
|
792
796
|
)
|
793
|
-
from meerschaum.utils.dataframe import
|
794
|
-
|
797
|
+
from meerschaum.utils.dataframe import (
|
798
|
+
get_json_cols,
|
799
|
+
get_numeric_cols,
|
800
|
+
get_uuid_cols,
|
801
|
+
get_bytes_cols,
|
802
|
+
)
|
803
|
+
from meerschaum.utils.dtypes import (
|
804
|
+
are_dtypes_equal,
|
805
|
+
quantize_decimal,
|
806
|
+
coerce_timezone,
|
807
|
+
encode_bytes_for_bytea,
|
808
|
+
)
|
795
809
|
from meerschaum.utils.dtypes.sql import (
|
796
810
|
NUMERIC_PRECISION_FLAVORS,
|
811
|
+
NUMERIC_AS_TEXT_FLAVORS,
|
797
812
|
PD_TO_SQLALCHEMY_DTYPES_FLAVORS,
|
798
813
|
get_db_type_from_pd_type,
|
799
814
|
)
|
@@ -803,14 +818,35 @@ def to_sql(
|
|
803
818
|
pd = import_pandas()
|
804
819
|
is_dask = 'dask' in df.__module__
|
805
820
|
|
806
|
-
|
821
|
+
bytes_cols = get_bytes_cols(df)
|
822
|
+
numeric_cols = get_numeric_cols(df)
|
823
|
+
|
824
|
+
stats = {'target': name,}
|
807
825
|
### resort to defaults if None
|
826
|
+
copied = False
|
827
|
+
use_psql_copy = False
|
808
828
|
if method == "":
|
809
829
|
if self.flavor in _bulk_flavors:
|
810
830
|
method = functools.partial(psql_insert_copy, schema=self.schema)
|
831
|
+
use_psql_copy = True
|
811
832
|
else:
|
812
833
|
### Should resolve to 'multi' or `None`.
|
813
834
|
method = flavor_configs.get(self.flavor, {}).get('to_sql', {}).get('method', 'multi')
|
835
|
+
|
836
|
+
if bytes_cols and (use_psql_copy or self.flavor == 'oracle'):
|
837
|
+
if safe_copy and not copied:
|
838
|
+
df = df.copy()
|
839
|
+
copied = True
|
840
|
+
for col in bytes_cols:
|
841
|
+
df[col] = df[col].apply(encode_bytes_for_bytea, with_prefix=(self.flavor != 'oracle'))
|
842
|
+
|
843
|
+
if self.flavor in NUMERIC_AS_TEXT_FLAVORS:
|
844
|
+
if safe_copy and not copied:
|
845
|
+
df = df.copy()
|
846
|
+
copied = True
|
847
|
+
for col in numeric_cols:
|
848
|
+
df[col] = df[col].astype(str)
|
849
|
+
|
814
850
|
stats['method'] = method.__name__ if hasattr(method, '__name__') else str(method)
|
815
851
|
|
816
852
|
default_chunksize = self._sys_config.get('chunksize', None)
|
@@ -920,7 +956,6 @@ def to_sql(
|
|
920
956
|
### Check for numeric columns.
|
921
957
|
numeric_scale, numeric_precision = NUMERIC_PRECISION_FLAVORS.get(self.flavor, (None, None))
|
922
958
|
if numeric_precision is not None and numeric_scale is not None:
|
923
|
-
numeric_cols = get_numeric_cols(df)
|
924
959
|
for col in numeric_cols:
|
925
960
|
df[col] = df[col].apply(
|
926
961
|
lambda x: (
|
@@ -46,9 +46,20 @@ def serialize_document(doc: Dict[str, Any]) -> str:
|
|
46
46
|
-------
|
47
47
|
A serialized string for the document.
|
48
48
|
"""
|
49
|
+
from meerschaum.utils.dtypes import serialize_bytes
|
49
50
|
return json.dumps(
|
50
51
|
doc,
|
51
|
-
default=(
|
52
|
+
default=(
|
53
|
+
lambda x: (
|
54
|
+
json_serialize_datetime(x)
|
55
|
+
if hasattr(x, 'tzinfo')
|
56
|
+
else (
|
57
|
+
serialize_bytes(x)
|
58
|
+
if isinstance(x, bytes)
|
59
|
+
else str(x)
|
60
|
+
)
|
61
|
+
)
|
62
|
+
),
|
52
63
|
separators=(',', ':'),
|
53
64
|
sort_keys=True,
|
54
65
|
)
|