meerschaum 2.7.0rc1__py3-none-any.whl → 2.7.2__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- meerschaum/api/dash/callbacks/dashboard.py +46 -37
- meerschaum/api/dash/connectors.py +7 -9
- meerschaum/api/resources/templates/termpage.html +32 -24
- meerschaum/api/routes/_pipes.py +7 -8
- meerschaum/api/routes/_webterm.py +4 -3
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/api/_pipes.py +14 -18
- meerschaum/connectors/sql/_create_engine.py +6 -1
- meerschaum/connectors/sql/_instance.py +11 -12
- meerschaum/connectors/sql/_pipes.py +62 -56
- meerschaum/connectors/sql/_sql.py +37 -7
- meerschaum/core/Pipe/_attributes.py +6 -1
- meerschaum/core/Pipe/_dtypes.py +23 -16
- meerschaum/core/Pipe/_sync.py +1 -13
- meerschaum/jobs/_Job.py +2 -0
- meerschaum/utils/daemon/Daemon.py +2 -2
- meerschaum/utils/dataframe.py +3 -3
- meerschaum/utils/dtypes/__init__.py +48 -2
- meerschaum/utils/dtypes/sql.py +15 -7
- meerschaum/utils/sql.py +114 -57
- meerschaum/utils/venv/__init__.py +22 -9
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/METADATA +1 -1
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/RECORD +29 -29
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/LICENSE +0 -0
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/NOTICE +0 -0
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/WHEEL +0 -0
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/top_level.txt +0 -0
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/zip-safe +0 -0
@@ -97,7 +97,6 @@ def edit_pipe(
|
|
97
97
|
if pipe.id is None:
|
98
98
|
return False, f"{pipe} is not registered and cannot be edited."
|
99
99
|
|
100
|
-
from meerschaum.utils.debug import dprint
|
101
100
|
from meerschaum.utils.packages import attempt_import
|
102
101
|
from meerschaum.utils.sql import json_flavors
|
103
102
|
if not patch:
|
@@ -172,7 +171,7 @@ def fetch_pipes_keys(
|
|
172
171
|
"""
|
173
172
|
from meerschaum.utils.debug import dprint
|
174
173
|
from meerschaum.utils.packages import attempt_import
|
175
|
-
from meerschaum.utils.misc import separate_negation_values
|
174
|
+
from meerschaum.utils.misc import separate_negation_values
|
176
175
|
from meerschaum.utils.sql import OMIT_NULLSFIRST_FLAVORS, table_exists
|
177
176
|
from meerschaum.config.static import STATIC_CONFIG
|
178
177
|
import json
|
@@ -316,7 +315,6 @@ def create_indices(
|
|
316
315
|
"""
|
317
316
|
Create a pipe's indices.
|
318
317
|
"""
|
319
|
-
from meerschaum.utils.sql import sql_item_name, update_queries
|
320
318
|
from meerschaum.utils.debug import dprint
|
321
319
|
if debug:
|
322
320
|
dprint(f"Creating indices for {pipe}...")
|
@@ -419,11 +417,14 @@ def get_create_index_queries(
|
|
419
417
|
existing_cols_indices = self.get_pipe_columns_indices(pipe, debug=debug)
|
420
418
|
existing_ix_names = set()
|
421
419
|
existing_primary_keys = []
|
420
|
+
existing_clustered_primary_keys = []
|
422
421
|
for col, col_indices in existing_cols_indices.items():
|
423
422
|
for col_ix_doc in col_indices:
|
424
423
|
existing_ix_names.add(col_ix_doc.get('name', None))
|
425
424
|
if col_ix_doc.get('type', None) == 'PRIMARY KEY':
|
426
425
|
existing_primary_keys.append(col)
|
426
|
+
if col_ix_doc.get('clustered', True):
|
427
|
+
existing_clustered_primary_keys.append(col)
|
427
428
|
|
428
429
|
_datetime = pipe.get_columns('datetime', error=False)
|
429
430
|
_datetime_name = (
|
@@ -467,7 +468,7 @@ def get_create_index_queries(
|
|
467
468
|
primary_key_clustered = "CLUSTERED" if _datetime is None else "NONCLUSTERED"
|
468
469
|
datetime_clustered = (
|
469
470
|
"CLUSTERED"
|
470
|
-
if not
|
471
|
+
if not existing_clustered_primary_keys and _datetime is not None
|
471
472
|
else "NONCLUSTERED"
|
472
473
|
)
|
473
474
|
|
@@ -665,6 +666,8 @@ def get_create_index_queries(
|
|
665
666
|
cols = indices[ix_key]
|
666
667
|
if not isinstance(cols, (list, tuple)):
|
667
668
|
cols = [cols]
|
669
|
+
if ix_key == 'unique' and upsert:
|
670
|
+
continue
|
668
671
|
cols_names = [sql_item_name(col, self.flavor, None) for col in cols if col]
|
669
672
|
if not cols_names:
|
670
673
|
continue
|
@@ -792,8 +795,6 @@ def delete_pipe(
|
|
792
795
|
"""
|
793
796
|
Delete a Pipe's registration.
|
794
797
|
"""
|
795
|
-
from meerschaum.utils.sql import sql_item_name
|
796
|
-
from meerschaum.utils.debug import dprint
|
797
798
|
from meerschaum.utils.packages import attempt_import
|
798
799
|
sqlalchemy = attempt_import('sqlalchemy')
|
799
800
|
|
@@ -876,7 +877,6 @@ def get_pipe_data(
|
|
876
877
|
|
877
878
|
"""
|
878
879
|
import json
|
879
|
-
from meerschaum.utils.sql import sql_item_name
|
880
880
|
from meerschaum.utils.misc import parse_df_datetimes, to_pandas_dtype
|
881
881
|
from meerschaum.utils.packages import import_pandas
|
882
882
|
from meerschaum.utils.dtypes import (
|
@@ -889,7 +889,7 @@ def get_pipe_data(
|
|
889
889
|
pd = import_pandas()
|
890
890
|
is_dask = 'dask' in pd.__name__
|
891
891
|
|
892
|
-
cols_types = pipe.get_columns_types(debug=debug)
|
892
|
+
cols_types = pipe.get_columns_types(debug=debug) if pipe.enforce else {}
|
893
893
|
dtypes = {
|
894
894
|
**{
|
895
895
|
p_col: to_pandas_dtype(p_typ)
|
@@ -904,17 +904,16 @@ def get_pipe_data(
|
|
904
904
|
if self.flavor == 'sqlite':
|
905
905
|
if not pipe.columns.get('datetime', None):
|
906
906
|
_dt = pipe.guess_datetime()
|
907
|
-
dt = sql_item_name(_dt, self.flavor, None) if _dt else None
|
908
907
|
else:
|
909
908
|
_dt = pipe.get_columns('datetime')
|
910
|
-
dt = sql_item_name(_dt, self.flavor, None)
|
911
909
|
|
912
910
|
if _dt:
|
913
911
|
dt_type = dtypes.get(_dt, 'object').lower()
|
914
912
|
if 'datetime' not in dt_type:
|
915
913
|
if 'int' not in dt_type:
|
916
914
|
dtypes[_dt] = 'datetime64[ns, UTC]'
|
917
|
-
|
915
|
+
|
916
|
+
existing_cols = cols_types.keys()
|
918
917
|
select_columns = (
|
919
918
|
[
|
920
919
|
col
|
@@ -928,7 +927,7 @@ def get_pipe_data(
|
|
928
927
|
if col in existing_cols
|
929
928
|
and col not in (omit_columns or [])
|
930
929
|
]
|
931
|
-
)
|
930
|
+
) if pipe.enforce else select_columns
|
932
931
|
if select_columns:
|
933
932
|
dtypes = {col: typ for col, typ in dtypes.items() if col in select_columns}
|
934
933
|
dtypes = {
|
@@ -1109,12 +1108,13 @@ def get_pipe_data_query(
|
|
1109
1108
|
from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
|
1110
1109
|
|
1111
1110
|
dt_col = pipe.columns.get('datetime', None)
|
1112
|
-
existing_cols = pipe.get_columns_types(debug=debug)
|
1111
|
+
existing_cols = pipe.get_columns_types(debug=debug) if pipe.enforce else []
|
1112
|
+
skip_existing_cols_check = skip_existing_cols_check or not pipe.enforce
|
1113
1113
|
dt_typ = get_pd_type_from_db_type(existing_cols[dt_col]) if dt_col in existing_cols else None
|
1114
1114
|
select_columns = (
|
1115
1115
|
[col for col in existing_cols]
|
1116
1116
|
if not select_columns
|
1117
|
-
else [col for col in select_columns if col in existing_cols
|
1117
|
+
else [col for col in select_columns if skip_existing_cols_check or col in existing_cols]
|
1118
1118
|
)
|
1119
1119
|
if omit_columns:
|
1120
1120
|
select_columns = [col for col in select_columns if col not in omit_columns]
|
@@ -1201,7 +1201,7 @@ def get_pipe_data_query(
|
|
1201
1201
|
number=begin_add_minutes,
|
1202
1202
|
begin=begin,
|
1203
1203
|
)
|
1204
|
-
where += f"{dt} >= {begin_da}" + ("
|
1204
|
+
where += f"\n {dt} >= {begin_da}" + ("\n AND\n " if end is not None else "")
|
1205
1205
|
is_dt_bound = True
|
1206
1206
|
|
1207
1207
|
if end is not None and (_dt in existing_cols or skip_existing_cols_check):
|
@@ -1213,7 +1213,7 @@ def get_pipe_data_query(
|
|
1213
1213
|
number=end_add_minutes,
|
1214
1214
|
begin=end
|
1215
1215
|
)
|
1216
|
-
where += f"{dt} <
|
1216
|
+
where += f"{dt} < {end_da}"
|
1217
1217
|
is_dt_bound = True
|
1218
1218
|
|
1219
1219
|
if params is not None:
|
@@ -1225,7 +1225,7 @@ def get_pipe_data_query(
|
|
1225
1225
|
}
|
1226
1226
|
if valid_params:
|
1227
1227
|
where += build_where(valid_params, self).replace(
|
1228
|
-
'WHERE', ('AND' if is_dt_bound else "")
|
1228
|
+
'WHERE', (' AND' if is_dt_bound else " ")
|
1229
1229
|
)
|
1230
1230
|
|
1231
1231
|
if len(where) > 0:
|
@@ -1280,7 +1280,6 @@ def get_pipe_id(
|
|
1280
1280
|
if pipe.temporary:
|
1281
1281
|
return None
|
1282
1282
|
from meerschaum.utils.packages import attempt_import
|
1283
|
-
import json
|
1284
1283
|
sqlalchemy = attempt_import('sqlalchemy')
|
1285
1284
|
from meerschaum.connectors.sql.tables import get_tables
|
1286
1285
|
pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes']
|
@@ -1599,6 +1598,7 @@ def sync_pipe(
|
|
1599
1598
|
'if_exists': if_exists,
|
1600
1599
|
'debug': debug,
|
1601
1600
|
'as_dict': True,
|
1601
|
+
'safe_copy': kw.get('safe_copy', False),
|
1602
1602
|
'chunksize': chunksize,
|
1603
1603
|
'dtype': self.get_to_sql_dtype(pipe, unseen_df, update_dtypes=True),
|
1604
1604
|
'schema': self.get_pipe_schema(pipe),
|
@@ -1655,35 +1655,37 @@ def sync_pipe(
|
|
1655
1655
|
and primary_key in unseen_df.columns
|
1656
1656
|
and autoincrement
|
1657
1657
|
)
|
1658
|
-
|
1659
|
-
|
1660
|
-
|
1661
|
-
|
1662
|
-
|
1663
|
-
|
1664
|
-
|
1665
|
-
|
1666
|
-
|
1667
|
-
|
1668
|
-
|
1669
|
-
|
1670
|
-
|
1671
|
-
|
1672
|
-
unseen_df,
|
1673
|
-
_connection=connection,
|
1674
|
-
**unseen_kw
|
1675
|
-
)
|
1658
|
+
stats = {'success': True, 'msg': 'Success'}
|
1659
|
+
if len(unseen_df) > 0:
|
1660
|
+
with self.engine.connect() as connection:
|
1661
|
+
with connection.begin():
|
1662
|
+
if do_identity_insert:
|
1663
|
+
identity_on_result = self.exec(
|
1664
|
+
f"SET IDENTITY_INSERT {pipe_name} ON",
|
1665
|
+
commit=False,
|
1666
|
+
_connection=connection,
|
1667
|
+
close=False,
|
1668
|
+
debug=debug,
|
1669
|
+
)
|
1670
|
+
if identity_on_result is None:
|
1671
|
+
return False, f"Could not enable identity inserts on {pipe}."
|
1676
1672
|
|
1677
|
-
|
1678
|
-
|
1679
|
-
f"SET IDENTITY_INSERT {pipe_name} OFF",
|
1680
|
-
commit=False,
|
1673
|
+
stats = self.to_sql(
|
1674
|
+
unseen_df,
|
1681
1675
|
_connection=connection,
|
1682
|
-
|
1683
|
-
debug=debug,
|
1676
|
+
**unseen_kw
|
1684
1677
|
)
|
1685
|
-
|
1686
|
-
|
1678
|
+
|
1679
|
+
if do_identity_insert:
|
1680
|
+
identity_off_result = self.exec(
|
1681
|
+
f"SET IDENTITY_INSERT {pipe_name} OFF",
|
1682
|
+
commit=False,
|
1683
|
+
_connection=connection,
|
1684
|
+
close=False,
|
1685
|
+
debug=debug,
|
1686
|
+
)
|
1687
|
+
if identity_off_result is None:
|
1688
|
+
return False, f"Could not disable identity inserts on {pipe}."
|
1687
1689
|
|
1688
1690
|
if is_new:
|
1689
1691
|
if not self.create_indices(pipe, debug=debug):
|
@@ -1722,11 +1724,12 @@ def sync_pipe(
|
|
1722
1724
|
},
|
1723
1725
|
target=temp_target,
|
1724
1726
|
temporary=True,
|
1727
|
+
enforce=False,
|
1728
|
+
static=True,
|
1729
|
+
autoincrement=False,
|
1725
1730
|
parameters={
|
1726
|
-
'
|
1727
|
-
'schema': self.internal_schema,
|
1731
|
+
'schema': (self.internal_schema if self.flavor != 'mssql' else None),
|
1728
1732
|
'hypertable': False,
|
1729
|
-
'autoincrement': False,
|
1730
1733
|
},
|
1731
1734
|
)
|
1732
1735
|
temp_pipe.__dict__['_columns_types'] = {
|
@@ -1747,7 +1750,7 @@ def sync_pipe(
|
|
1747
1750
|
col
|
1748
1751
|
for col_key, col in pipe.columns.items()
|
1749
1752
|
if col and col in existing_cols
|
1750
|
-
] if not primary_key else (
|
1753
|
+
] if not primary_key or self.flavor == 'oracle' else (
|
1751
1754
|
[dt_col, primary_key]
|
1752
1755
|
if self.flavor == 'timescaledb' and dt_col and dt_col in update_df.columns
|
1753
1756
|
else [primary_key]
|
@@ -1764,9 +1767,13 @@ def sync_pipe(
|
|
1764
1767
|
identity_insert=(autoincrement and primary_key in update_df.columns),
|
1765
1768
|
debug=debug,
|
1766
1769
|
)
|
1767
|
-
|
1768
|
-
|
1770
|
+
update_results = self.exec_queries(
|
1771
|
+
update_queries,
|
1772
|
+
break_on_error=True,
|
1773
|
+
rollback=True,
|
1774
|
+
debug=debug,
|
1769
1775
|
)
|
1776
|
+
update_success = all(update_results)
|
1770
1777
|
self._log_temporary_tables_creation(
|
1771
1778
|
temp_target,
|
1772
1779
|
ready_to_drop=True,
|
@@ -1775,6 +1782,8 @@ def sync_pipe(
|
|
1775
1782
|
)
|
1776
1783
|
if not update_success:
|
1777
1784
|
warn(f"Failed to apply update to {pipe}.")
|
1785
|
+
stats['success'] = stats['success'] and update_success
|
1786
|
+
stats['msg'] = (stats.get('msg', '') + f'\nFailed to apply update to {pipe}.').lstrip()
|
1778
1787
|
|
1779
1788
|
stop = time.perf_counter()
|
1780
1789
|
success = stats['success']
|
@@ -1951,8 +1960,8 @@ def sync_pipe_inplace(
|
|
1951
1960
|
autoincrement=autoincrement,
|
1952
1961
|
datetime_column=dt_col,
|
1953
1962
|
)
|
1954
|
-
|
1955
|
-
if
|
1963
|
+
results = self.exec_queries(create_pipe_queries, debug=debug)
|
1964
|
+
if not all(results):
|
1956
1965
|
_ = clean_up_temp_tables()
|
1957
1966
|
return False, f"Could not insert new data into {pipe} from its SQL query definition."
|
1958
1967
|
|
@@ -2109,7 +2118,7 @@ def sync_pipe_inplace(
|
|
2109
2118
|
and col in backtrack_cols_types
|
2110
2119
|
and col in new_cols
|
2111
2120
|
)
|
2112
|
-
} if not primary_key else {primary_key: new_cols.get(primary_key)}
|
2121
|
+
} if not primary_key or self.flavor == 'oracle' else {primary_key: new_cols.get(primary_key)}
|
2113
2122
|
|
2114
2123
|
null_replace_new_cols_str = (
|
2115
2124
|
', '.join([
|
@@ -3376,9 +3385,7 @@ def deduplicate_pipe(
|
|
3376
3385
|
"""
|
3377
3386
|
from meerschaum.utils.sql import (
|
3378
3387
|
sql_item_name,
|
3379
|
-
NO_CTE_FLAVORS,
|
3380
3388
|
get_rename_table_queries,
|
3381
|
-
NO_SELECT_INTO_FLAVORS,
|
3382
3389
|
DROP_IF_EXISTS_FLAVORS,
|
3383
3390
|
get_create_table_query,
|
3384
3391
|
format_cte_subquery,
|
@@ -3500,7 +3507,6 @@ def deduplicate_pipe(
|
|
3500
3507
|
dedup_table = '-' + session_id + f'_dedup_{pipe.target}'
|
3501
3508
|
temp_old_table = '-' + session_id + f"_old_{pipe.target}"
|
3502
3509
|
|
3503
|
-
dedup_table_name = sql_item_name(dedup_table, self.flavor, self.get_pipe_schema(pipe))
|
3504
3510
|
temp_old_table_name = sql_item_name(temp_old_table, self.flavor, self.get_pipe_schema(pipe))
|
3505
3511
|
|
3506
3512
|
create_temporary_table_query = get_create_table_query(
|
@@ -624,7 +624,7 @@ def exec_queries(
|
|
624
624
|
rollback: bool = True,
|
625
625
|
silent: bool = False,
|
626
626
|
debug: bool = False,
|
627
|
-
) -> List[sqlalchemy.engine.cursor.
|
627
|
+
) -> List[Union[sqlalchemy.engine.cursor.CursorResult, None]]:
|
628
628
|
"""
|
629
629
|
Execute a list of queries in a single transaction.
|
630
630
|
|
@@ -688,6 +688,7 @@ def exec_queries(
|
|
688
688
|
if result is None and break_on_error:
|
689
689
|
if rollback:
|
690
690
|
session.rollback()
|
691
|
+
results.append(result)
|
691
692
|
break
|
692
693
|
elif result is not None and hook is not None:
|
693
694
|
hook_queries = hook(session)
|
@@ -715,6 +716,7 @@ def to_sql(
|
|
715
716
|
method: str = "",
|
716
717
|
chunksize: Optional[int] = -1,
|
717
718
|
schema: Optional[str] = None,
|
719
|
+
safe_copy: bool = True,
|
718
720
|
silent: bool = False,
|
719
721
|
debug: bool = False,
|
720
722
|
as_tuple: bool = False,
|
@@ -729,7 +731,7 @@ def to_sql(
|
|
729
731
|
Parameters
|
730
732
|
----------
|
731
733
|
df: pd.DataFrame
|
732
|
-
The DataFrame to be
|
734
|
+
The DataFrame to be inserted.
|
733
735
|
|
734
736
|
name: str
|
735
737
|
The name of the table to be created.
|
@@ -752,6 +754,9 @@ def to_sql(
|
|
752
754
|
Optionally override the schema for the table.
|
753
755
|
Defaults to `SQLConnector.schema`.
|
754
756
|
|
757
|
+
safe_copy: bool, defaul True
|
758
|
+
If `True`, copy the dataframe before making any changes.
|
759
|
+
|
755
760
|
as_tuple: bool, default False
|
756
761
|
If `True`, return a (success_bool, message) tuple instead of a `bool`.
|
757
762
|
Defaults to `False`.
|
@@ -770,8 +775,7 @@ def to_sql(
|
|
770
775
|
"""
|
771
776
|
import time
|
772
777
|
import json
|
773
|
-
import
|
774
|
-
from decimal import Decimal, Context
|
778
|
+
from decimal import Decimal
|
775
779
|
from meerschaum.utils.warnings import error, warn
|
776
780
|
import warnings
|
777
781
|
import functools
|
@@ -796,9 +800,15 @@ def to_sql(
|
|
796
800
|
get_uuid_cols,
|
797
801
|
get_bytes_cols,
|
798
802
|
)
|
799
|
-
from meerschaum.utils.dtypes import
|
803
|
+
from meerschaum.utils.dtypes import (
|
804
|
+
are_dtypes_equal,
|
805
|
+
quantize_decimal,
|
806
|
+
coerce_timezone,
|
807
|
+
encode_bytes_for_bytea,
|
808
|
+
)
|
800
809
|
from meerschaum.utils.dtypes.sql import (
|
801
810
|
NUMERIC_PRECISION_FLAVORS,
|
811
|
+
NUMERIC_AS_TEXT_FLAVORS,
|
802
812
|
PD_TO_SQLALCHEMY_DTYPES_FLAVORS,
|
803
813
|
get_db_type_from_pd_type,
|
804
814
|
)
|
@@ -808,14 +818,35 @@ def to_sql(
|
|
808
818
|
pd = import_pandas()
|
809
819
|
is_dask = 'dask' in df.__module__
|
810
820
|
|
811
|
-
|
821
|
+
bytes_cols = get_bytes_cols(df)
|
822
|
+
numeric_cols = get_numeric_cols(df)
|
823
|
+
|
824
|
+
stats = {'target': name,}
|
812
825
|
### resort to defaults if None
|
826
|
+
copied = False
|
827
|
+
use_psql_copy = False
|
813
828
|
if method == "":
|
814
829
|
if self.flavor in _bulk_flavors:
|
815
830
|
method = functools.partial(psql_insert_copy, schema=self.schema)
|
831
|
+
use_psql_copy = True
|
816
832
|
else:
|
817
833
|
### Should resolve to 'multi' or `None`.
|
818
834
|
method = flavor_configs.get(self.flavor, {}).get('to_sql', {}).get('method', 'multi')
|
835
|
+
|
836
|
+
if bytes_cols and (use_psql_copy or self.flavor == 'oracle'):
|
837
|
+
if safe_copy and not copied:
|
838
|
+
df = df.copy()
|
839
|
+
copied = True
|
840
|
+
for col in bytes_cols:
|
841
|
+
df[col] = df[col].apply(encode_bytes_for_bytea, with_prefix=(self.flavor != 'oracle'))
|
842
|
+
|
843
|
+
if self.flavor in NUMERIC_AS_TEXT_FLAVORS:
|
844
|
+
if safe_copy and not copied:
|
845
|
+
df = df.copy()
|
846
|
+
copied = True
|
847
|
+
for col in numeric_cols:
|
848
|
+
df[col] = df[col].astype(str)
|
849
|
+
|
819
850
|
stats['method'] = method.__name__ if hasattr(method, '__name__') else str(method)
|
820
851
|
|
821
852
|
default_chunksize = self._sys_config.get('chunksize', None)
|
@@ -925,7 +956,6 @@ def to_sql(
|
|
925
956
|
### Check for numeric columns.
|
926
957
|
numeric_scale, numeric_precision = NUMERIC_PRECISION_FLAVORS.get(self.flavor, (None, None))
|
927
958
|
if numeric_precision is not None and numeric_scale is not None:
|
928
|
-
numeric_cols = get_numeric_cols(df)
|
929
959
|
for col in numeric_cols:
|
930
960
|
df[col] = df[col].apply(
|
931
961
|
lambda x: (
|
@@ -200,10 +200,15 @@ def dtypes(self) -> Union[Dict[str, Any], None]:
|
|
200
200
|
If defined, return the `dtypes` dictionary defined in `meerschaum.Pipe.parameters`.
|
201
201
|
"""
|
202
202
|
from meerschaum.config._patch import apply_patch_to_config
|
203
|
+
from meerschaum.utils.dtypes import MRSM_ALIAS_DTYPES
|
203
204
|
configured_dtypes = self.parameters.get('dtypes', {})
|
204
205
|
remote_dtypes = self.infer_dtypes(persist=False)
|
205
206
|
patched_dtypes = apply_patch_to_config(remote_dtypes, configured_dtypes)
|
206
|
-
return
|
207
|
+
return {
|
208
|
+
col: MRSM_ALIAS_DTYPES.get(typ, typ)
|
209
|
+
for col, typ in patched_dtypes.items()
|
210
|
+
if col and typ
|
211
|
+
}
|
207
212
|
|
208
213
|
|
209
214
|
@dtypes.setter
|
meerschaum/core/Pipe/_dtypes.py
CHANGED
@@ -15,6 +15,7 @@ from typing import TYPE_CHECKING
|
|
15
15
|
if TYPE_CHECKING:
|
16
16
|
pd = mrsm.attempt_import('pandas')
|
17
17
|
|
18
|
+
|
18
19
|
def enforce_dtypes(
|
19
20
|
self,
|
20
21
|
df: 'pd.DataFrame',
|
@@ -30,7 +31,7 @@ def enforce_dtypes(
|
|
30
31
|
from meerschaum.utils.warnings import warn
|
31
32
|
from meerschaum.utils.debug import dprint
|
32
33
|
from meerschaum.utils.dataframe import parse_df_datetimes, enforce_dtypes as _enforce_dtypes
|
33
|
-
from meerschaum.utils.dtypes import are_dtypes_equal
|
34
|
+
from meerschaum.utils.dtypes import are_dtypes_equal, MRSM_PD_DTYPES
|
34
35
|
from meerschaum.utils.packages import import_pandas
|
35
36
|
pd = import_pandas(debug=debug)
|
36
37
|
if df is None:
|
@@ -41,7 +42,11 @@ def enforce_dtypes(
|
|
41
42
|
)
|
42
43
|
return df
|
43
44
|
|
44
|
-
pipe_dtypes = self.dtypes if self.enforce else {
|
45
|
+
pipe_dtypes = self.dtypes if self.enforce else {
|
46
|
+
col: typ
|
47
|
+
for col, typ in self.dtypes.items()
|
48
|
+
if typ in MRSM_PD_DTYPES
|
49
|
+
}
|
45
50
|
|
46
51
|
try:
|
47
52
|
if isinstance(df, str):
|
@@ -105,22 +110,16 @@ def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str,
|
|
105
110
|
A dictionary of strings containing the pandas data types for this Pipe.
|
106
111
|
"""
|
107
112
|
if not self.exists(debug=debug):
|
108
|
-
|
109
|
-
if not self.columns:
|
110
|
-
return {}
|
111
|
-
dt_col = self.columns.get('datetime', None)
|
112
|
-
if dt_col:
|
113
|
-
if not self.parameters.get('dtypes', {}).get(dt_col, None):
|
114
|
-
dtypes[dt_col] = 'datetime64[ns, UTC]'
|
115
|
-
return dtypes
|
113
|
+
return {}
|
116
114
|
|
117
115
|
from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
|
118
116
|
from meerschaum.utils.dtypes import to_pandas_dtype
|
119
|
-
columns_types = self.get_columns_types(debug=debug)
|
120
117
|
|
121
118
|
### NOTE: get_columns_types() may return either the types as
|
122
119
|
### PostgreSQL- or Pandas-style.
|
123
|
-
|
120
|
+
columns_types = self.get_columns_types(debug=debug)
|
121
|
+
|
122
|
+
remote_pd_dtypes = {
|
124
123
|
c: (
|
125
124
|
get_pd_type_from_db_type(t, allow_custom_dtypes=True)
|
126
125
|
if str(t).isupper()
|
@@ -128,7 +127,15 @@ def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str,
|
|
128
127
|
)
|
129
128
|
for c, t in columns_types.items()
|
130
129
|
} if columns_types else {}
|
131
|
-
if persist:
|
132
|
-
|
133
|
-
|
134
|
-
|
130
|
+
if not persist:
|
131
|
+
return remote_pd_dtypes
|
132
|
+
|
133
|
+
dtypes = self.parameters.get('dtypes', {})
|
134
|
+
dtypes.update({
|
135
|
+
col: typ
|
136
|
+
for col, typ in remote_pd_dtypes.items()
|
137
|
+
if col not in dtypes
|
138
|
+
})
|
139
|
+
self.dtypes = dtypes
|
140
|
+
self.edit(interactive=False, debug=debug)
|
141
|
+
return remote_pd_dtypes
|
meerschaum/core/Pipe/_sync.py
CHANGED
@@ -161,7 +161,7 @@ def sync(
|
|
161
161
|
self._exists = None
|
162
162
|
|
163
163
|
def _sync(
|
164
|
-
p:
|
164
|
+
p: mrsm.Pipe,
|
165
165
|
df: Union[
|
166
166
|
'pd.DataFrame',
|
167
167
|
Dict[str, List[Any]],
|
@@ -960,10 +960,7 @@ def _persist_new_numeric_columns(self, df, debug: bool = False) -> SuccessTuple:
|
|
960
960
|
return True, "Success"
|
961
961
|
|
962
962
|
self._attributes_sync_time = None
|
963
|
-
dt_col = self.columns.get('datetime', None)
|
964
963
|
dtypes = self.parameters.get('dtypes', {})
|
965
|
-
if dt_col not in dtypes:
|
966
|
-
dtypes[dt_col] = 'datetime'
|
967
964
|
dtypes.update({col: 'numeric' for col in numeric_cols})
|
968
965
|
self.parameters['dtypes'] = dtypes
|
969
966
|
if not self.temporary:
|
@@ -988,10 +985,7 @@ def _persist_new_uuid_columns(self, df, debug: bool = False) -> SuccessTuple:
|
|
988
985
|
return True, "Success"
|
989
986
|
|
990
987
|
self._attributes_sync_time = None
|
991
|
-
dt_col = self.columns.get('datetime', None)
|
992
988
|
dtypes = self.parameters.get('dtypes', {})
|
993
|
-
if dt_col not in dtypes:
|
994
|
-
dtypes[dt_col] = 'datetime'
|
995
989
|
dtypes.update({col: 'uuid' for col in uuid_cols})
|
996
990
|
self.parameters['dtypes'] = dtypes
|
997
991
|
if not self.temporary:
|
@@ -1016,10 +1010,7 @@ def _persist_new_json_columns(self, df, debug: bool = False) -> SuccessTuple:
|
|
1016
1010
|
return True, "Success"
|
1017
1011
|
|
1018
1012
|
self._attributes_sync_time = None
|
1019
|
-
dt_col = self.columns.get('datetime', None)
|
1020
1013
|
dtypes = self.parameters.get('dtypes', {})
|
1021
|
-
if dt_col not in dtypes:
|
1022
|
-
dtypes[dt_col] = 'datetime'
|
1023
1014
|
dtypes.update({col: 'json' for col in json_cols})
|
1024
1015
|
self.parameters['dtypes'] = dtypes
|
1025
1016
|
|
@@ -1045,10 +1036,7 @@ def _persist_new_bytes_columns(self, df, debug: bool = False) -> SuccessTuple:
|
|
1045
1036
|
return True, "Success"
|
1046
1037
|
|
1047
1038
|
self._attributes_sync_time = None
|
1048
|
-
dt_col = self.columns.get('datetime', None)
|
1049
1039
|
dtypes = self.parameters.get('dtypes', {})
|
1050
|
-
if dt_col not in dtypes:
|
1051
|
-
dtypes[dt_col] = 'datetime'
|
1052
1040
|
dtypes.update({col: 'bytes' for col in bytes_cols})
|
1053
1041
|
self.parameters['dtypes'] = dtypes
|
1054
1042
|
|
meerschaum/jobs/_Job.py
CHANGED
@@ -200,6 +200,8 @@ class Job:
|
|
200
200
|
if root_dir is None:
|
201
201
|
from meerschaum.config.paths import ROOT_DIR_PATH
|
202
202
|
root_dir = ROOT_DIR_PATH
|
203
|
+
else:
|
204
|
+
root_dir = pathlib.Path(root_dir)
|
203
205
|
jobs_dir = root_dir / DAEMON_RESOURCES_PATH.name
|
204
206
|
daemon_dir = jobs_dir / daemon_id
|
205
207
|
pid_file = daemon_dir / 'process.pid'
|
@@ -472,7 +472,7 @@ class Daemon:
|
|
472
472
|
process.kill()
|
473
473
|
process.wait(timeout=timeout)
|
474
474
|
except Exception as e:
|
475
|
-
return False, f"Failed to kill job {self} with exception: {e}"
|
475
|
+
return False, f"Failed to kill job {self} ({process}) with exception: {e}"
|
476
476
|
|
477
477
|
try:
|
478
478
|
if process.status():
|
@@ -734,7 +734,7 @@ class Daemon:
|
|
734
734
|
time.sleep(check_timeout_interval)
|
735
735
|
|
736
736
|
return False, (
|
737
|
-
f"Failed to stop daemon '{self.daemon_id}' within {timeout} second"
|
737
|
+
f"Failed to stop daemon '{self.daemon_id}' (PID: {pid}) within {timeout} second"
|
738
738
|
+ ('s' if timeout != 1 else '') + '.'
|
739
739
|
)
|
740
740
|
|
meerschaum/utils/dataframe.py
CHANGED
@@ -494,7 +494,7 @@ def parse_df_datetimes(
|
|
494
494
|
### skip parsing if DataFrame is empty
|
495
495
|
if len(pdf) == 0:
|
496
496
|
if debug:
|
497
|
-
dprint(
|
497
|
+
dprint("df is empty. Returning original DataFrame without casting datetime columns...")
|
498
498
|
return df
|
499
499
|
|
500
500
|
ignore_cols = set(
|
@@ -509,7 +509,7 @@ def parse_df_datetimes(
|
|
509
509
|
if len(cols_to_inspect) == 0:
|
510
510
|
if debug:
|
511
511
|
dprint("All columns are ignored, skipping datetime detection...")
|
512
|
-
return df.fillna(pandas.NA)
|
512
|
+
return df.infer_objects(copy=False).fillna(pandas.NA)
|
513
513
|
|
514
514
|
### apply regex to columns to determine which are ISO datetimes
|
515
515
|
iso_dt_regex = r'\d{4}-\d{2}-\d{2}.\d{2}\:\d{2}\:\d+'
|
@@ -522,7 +522,7 @@ def parse_df_datetimes(
|
|
522
522
|
if not datetime_cols:
|
523
523
|
if debug:
|
524
524
|
dprint("No columns detected as datetimes, returning...")
|
525
|
-
return df.fillna(pandas.NA)
|
525
|
+
return df.infer_objects(copy=False).fillna(pandas.NA)
|
526
526
|
|
527
527
|
if debug:
|
528
528
|
dprint("Converting columns to datetimes: " + str(datetime_cols))
|