meerschaum 2.7.0rc1__py3-none-any.whl → 2.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/api/dash/callbacks/dashboard.py +46 -37
- meerschaum/api/dash/connectors.py +7 -9
- meerschaum/api/resources/templates/termpage.html +32 -24
- meerschaum/api/routes/_pipes.py +7 -8
- meerschaum/api/routes/_webterm.py +4 -3
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/api/_pipes.py +14 -18
- meerschaum/connectors/sql/_create_engine.py +6 -1
- meerschaum/connectors/sql/_instance.py +11 -12
- meerschaum/connectors/sql/_pipes.py +62 -56
- meerschaum/connectors/sql/_sql.py +37 -7
- meerschaum/core/Pipe/_attributes.py +6 -1
- meerschaum/core/Pipe/_dtypes.py +23 -16
- meerschaum/core/Pipe/_sync.py +1 -13
- meerschaum/jobs/_Job.py +2 -0
- meerschaum/utils/daemon/Daemon.py +2 -2
- meerschaum/utils/dataframe.py +3 -3
- meerschaum/utils/dtypes/__init__.py +48 -2
- meerschaum/utils/dtypes/sql.py +15 -7
- meerschaum/utils/sql.py +114 -57
- meerschaum/utils/venv/__init__.py +22 -9
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/METADATA +1 -1
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/RECORD +29 -29
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/LICENSE +0 -0
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/NOTICE +0 -0
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/WHEEL +0 -0
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/top_level.txt +0 -0
- {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/zip-safe +0 -0
@@ -97,7 +97,6 @@ def edit_pipe(
|
|
97
97
|
if pipe.id is None:
|
98
98
|
return False, f"{pipe} is not registered and cannot be edited."
|
99
99
|
|
100
|
-
from meerschaum.utils.debug import dprint
|
101
100
|
from meerschaum.utils.packages import attempt_import
|
102
101
|
from meerschaum.utils.sql import json_flavors
|
103
102
|
if not patch:
|
@@ -172,7 +171,7 @@ def fetch_pipes_keys(
|
|
172
171
|
"""
|
173
172
|
from meerschaum.utils.debug import dprint
|
174
173
|
from meerschaum.utils.packages import attempt_import
|
175
|
-
from meerschaum.utils.misc import separate_negation_values
|
174
|
+
from meerschaum.utils.misc import separate_negation_values
|
176
175
|
from meerschaum.utils.sql import OMIT_NULLSFIRST_FLAVORS, table_exists
|
177
176
|
from meerschaum.config.static import STATIC_CONFIG
|
178
177
|
import json
|
@@ -316,7 +315,6 @@ def create_indices(
|
|
316
315
|
"""
|
317
316
|
Create a pipe's indices.
|
318
317
|
"""
|
319
|
-
from meerschaum.utils.sql import sql_item_name, update_queries
|
320
318
|
from meerschaum.utils.debug import dprint
|
321
319
|
if debug:
|
322
320
|
dprint(f"Creating indices for {pipe}...")
|
@@ -419,11 +417,14 @@ def get_create_index_queries(
|
|
419
417
|
existing_cols_indices = self.get_pipe_columns_indices(pipe, debug=debug)
|
420
418
|
existing_ix_names = set()
|
421
419
|
existing_primary_keys = []
|
420
|
+
existing_clustered_primary_keys = []
|
422
421
|
for col, col_indices in existing_cols_indices.items():
|
423
422
|
for col_ix_doc in col_indices:
|
424
423
|
existing_ix_names.add(col_ix_doc.get('name', None))
|
425
424
|
if col_ix_doc.get('type', None) == 'PRIMARY KEY':
|
426
425
|
existing_primary_keys.append(col)
|
426
|
+
if col_ix_doc.get('clustered', True):
|
427
|
+
existing_clustered_primary_keys.append(col)
|
427
428
|
|
428
429
|
_datetime = pipe.get_columns('datetime', error=False)
|
429
430
|
_datetime_name = (
|
@@ -467,7 +468,7 @@ def get_create_index_queries(
|
|
467
468
|
primary_key_clustered = "CLUSTERED" if _datetime is None else "NONCLUSTERED"
|
468
469
|
datetime_clustered = (
|
469
470
|
"CLUSTERED"
|
470
|
-
if not
|
471
|
+
if not existing_clustered_primary_keys and _datetime is not None
|
471
472
|
else "NONCLUSTERED"
|
472
473
|
)
|
473
474
|
|
@@ -665,6 +666,8 @@ def get_create_index_queries(
|
|
665
666
|
cols = indices[ix_key]
|
666
667
|
if not isinstance(cols, (list, tuple)):
|
667
668
|
cols = [cols]
|
669
|
+
if ix_key == 'unique' and upsert:
|
670
|
+
continue
|
668
671
|
cols_names = [sql_item_name(col, self.flavor, None) for col in cols if col]
|
669
672
|
if not cols_names:
|
670
673
|
continue
|
@@ -792,8 +795,6 @@ def delete_pipe(
|
|
792
795
|
"""
|
793
796
|
Delete a Pipe's registration.
|
794
797
|
"""
|
795
|
-
from meerschaum.utils.sql import sql_item_name
|
796
|
-
from meerschaum.utils.debug import dprint
|
797
798
|
from meerschaum.utils.packages import attempt_import
|
798
799
|
sqlalchemy = attempt_import('sqlalchemy')
|
799
800
|
|
@@ -876,7 +877,6 @@ def get_pipe_data(
|
|
876
877
|
|
877
878
|
"""
|
878
879
|
import json
|
879
|
-
from meerschaum.utils.sql import sql_item_name
|
880
880
|
from meerschaum.utils.misc import parse_df_datetimes, to_pandas_dtype
|
881
881
|
from meerschaum.utils.packages import import_pandas
|
882
882
|
from meerschaum.utils.dtypes import (
|
@@ -889,7 +889,7 @@ def get_pipe_data(
|
|
889
889
|
pd = import_pandas()
|
890
890
|
is_dask = 'dask' in pd.__name__
|
891
891
|
|
892
|
-
cols_types = pipe.get_columns_types(debug=debug)
|
892
|
+
cols_types = pipe.get_columns_types(debug=debug) if pipe.enforce else {}
|
893
893
|
dtypes = {
|
894
894
|
**{
|
895
895
|
p_col: to_pandas_dtype(p_typ)
|
@@ -904,17 +904,16 @@ def get_pipe_data(
|
|
904
904
|
if self.flavor == 'sqlite':
|
905
905
|
if not pipe.columns.get('datetime', None):
|
906
906
|
_dt = pipe.guess_datetime()
|
907
|
-
dt = sql_item_name(_dt, self.flavor, None) if _dt else None
|
908
907
|
else:
|
909
908
|
_dt = pipe.get_columns('datetime')
|
910
|
-
dt = sql_item_name(_dt, self.flavor, None)
|
911
909
|
|
912
910
|
if _dt:
|
913
911
|
dt_type = dtypes.get(_dt, 'object').lower()
|
914
912
|
if 'datetime' not in dt_type:
|
915
913
|
if 'int' not in dt_type:
|
916
914
|
dtypes[_dt] = 'datetime64[ns, UTC]'
|
917
|
-
|
915
|
+
|
916
|
+
existing_cols = cols_types.keys()
|
918
917
|
select_columns = (
|
919
918
|
[
|
920
919
|
col
|
@@ -928,7 +927,7 @@ def get_pipe_data(
|
|
928
927
|
if col in existing_cols
|
929
928
|
and col not in (omit_columns or [])
|
930
929
|
]
|
931
|
-
)
|
930
|
+
) if pipe.enforce else select_columns
|
932
931
|
if select_columns:
|
933
932
|
dtypes = {col: typ for col, typ in dtypes.items() if col in select_columns}
|
934
933
|
dtypes = {
|
@@ -1109,12 +1108,13 @@ def get_pipe_data_query(
|
|
1109
1108
|
from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
|
1110
1109
|
|
1111
1110
|
dt_col = pipe.columns.get('datetime', None)
|
1112
|
-
existing_cols = pipe.get_columns_types(debug=debug)
|
1111
|
+
existing_cols = pipe.get_columns_types(debug=debug) if pipe.enforce else []
|
1112
|
+
skip_existing_cols_check = skip_existing_cols_check or not pipe.enforce
|
1113
1113
|
dt_typ = get_pd_type_from_db_type(existing_cols[dt_col]) if dt_col in existing_cols else None
|
1114
1114
|
select_columns = (
|
1115
1115
|
[col for col in existing_cols]
|
1116
1116
|
if not select_columns
|
1117
|
-
else [col for col in select_columns if col in existing_cols
|
1117
|
+
else [col for col in select_columns if skip_existing_cols_check or col in existing_cols]
|
1118
1118
|
)
|
1119
1119
|
if omit_columns:
|
1120
1120
|
select_columns = [col for col in select_columns if col not in omit_columns]
|
@@ -1201,7 +1201,7 @@ def get_pipe_data_query(
|
|
1201
1201
|
number=begin_add_minutes,
|
1202
1202
|
begin=begin,
|
1203
1203
|
)
|
1204
|
-
where += f"{dt} >= {begin_da}" + ("
|
1204
|
+
where += f"\n {dt} >= {begin_da}" + ("\n AND\n " if end is not None else "")
|
1205
1205
|
is_dt_bound = True
|
1206
1206
|
|
1207
1207
|
if end is not None and (_dt in existing_cols or skip_existing_cols_check):
|
@@ -1213,7 +1213,7 @@ def get_pipe_data_query(
|
|
1213
1213
|
number=end_add_minutes,
|
1214
1214
|
begin=end
|
1215
1215
|
)
|
1216
|
-
where += f"{dt} <
|
1216
|
+
where += f"{dt} < {end_da}"
|
1217
1217
|
is_dt_bound = True
|
1218
1218
|
|
1219
1219
|
if params is not None:
|
@@ -1225,7 +1225,7 @@ def get_pipe_data_query(
|
|
1225
1225
|
}
|
1226
1226
|
if valid_params:
|
1227
1227
|
where += build_where(valid_params, self).replace(
|
1228
|
-
'WHERE', ('AND' if is_dt_bound else "")
|
1228
|
+
'WHERE', (' AND' if is_dt_bound else " ")
|
1229
1229
|
)
|
1230
1230
|
|
1231
1231
|
if len(where) > 0:
|
@@ -1280,7 +1280,6 @@ def get_pipe_id(
|
|
1280
1280
|
if pipe.temporary:
|
1281
1281
|
return None
|
1282
1282
|
from meerschaum.utils.packages import attempt_import
|
1283
|
-
import json
|
1284
1283
|
sqlalchemy = attempt_import('sqlalchemy')
|
1285
1284
|
from meerschaum.connectors.sql.tables import get_tables
|
1286
1285
|
pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes']
|
@@ -1599,6 +1598,7 @@ def sync_pipe(
|
|
1599
1598
|
'if_exists': if_exists,
|
1600
1599
|
'debug': debug,
|
1601
1600
|
'as_dict': True,
|
1601
|
+
'safe_copy': kw.get('safe_copy', False),
|
1602
1602
|
'chunksize': chunksize,
|
1603
1603
|
'dtype': self.get_to_sql_dtype(pipe, unseen_df, update_dtypes=True),
|
1604
1604
|
'schema': self.get_pipe_schema(pipe),
|
@@ -1655,35 +1655,37 @@ def sync_pipe(
|
|
1655
1655
|
and primary_key in unseen_df.columns
|
1656
1656
|
and autoincrement
|
1657
1657
|
)
|
1658
|
-
|
1659
|
-
|
1660
|
-
|
1661
|
-
|
1662
|
-
|
1663
|
-
|
1664
|
-
|
1665
|
-
|
1666
|
-
|
1667
|
-
|
1668
|
-
|
1669
|
-
|
1670
|
-
|
1671
|
-
|
1672
|
-
unseen_df,
|
1673
|
-
_connection=connection,
|
1674
|
-
**unseen_kw
|
1675
|
-
)
|
1658
|
+
stats = {'success': True, 'msg': 'Success'}
|
1659
|
+
if len(unseen_df) > 0:
|
1660
|
+
with self.engine.connect() as connection:
|
1661
|
+
with connection.begin():
|
1662
|
+
if do_identity_insert:
|
1663
|
+
identity_on_result = self.exec(
|
1664
|
+
f"SET IDENTITY_INSERT {pipe_name} ON",
|
1665
|
+
commit=False,
|
1666
|
+
_connection=connection,
|
1667
|
+
close=False,
|
1668
|
+
debug=debug,
|
1669
|
+
)
|
1670
|
+
if identity_on_result is None:
|
1671
|
+
return False, f"Could not enable identity inserts on {pipe}."
|
1676
1672
|
|
1677
|
-
|
1678
|
-
|
1679
|
-
f"SET IDENTITY_INSERT {pipe_name} OFF",
|
1680
|
-
commit=False,
|
1673
|
+
stats = self.to_sql(
|
1674
|
+
unseen_df,
|
1681
1675
|
_connection=connection,
|
1682
|
-
|
1683
|
-
debug=debug,
|
1676
|
+
**unseen_kw
|
1684
1677
|
)
|
1685
|
-
|
1686
|
-
|
1678
|
+
|
1679
|
+
if do_identity_insert:
|
1680
|
+
identity_off_result = self.exec(
|
1681
|
+
f"SET IDENTITY_INSERT {pipe_name} OFF",
|
1682
|
+
commit=False,
|
1683
|
+
_connection=connection,
|
1684
|
+
close=False,
|
1685
|
+
debug=debug,
|
1686
|
+
)
|
1687
|
+
if identity_off_result is None:
|
1688
|
+
return False, f"Could not disable identity inserts on {pipe}."
|
1687
1689
|
|
1688
1690
|
if is_new:
|
1689
1691
|
if not self.create_indices(pipe, debug=debug):
|
@@ -1722,11 +1724,12 @@ def sync_pipe(
|
|
1722
1724
|
},
|
1723
1725
|
target=temp_target,
|
1724
1726
|
temporary=True,
|
1727
|
+
enforce=False,
|
1728
|
+
static=True,
|
1729
|
+
autoincrement=False,
|
1725
1730
|
parameters={
|
1726
|
-
'
|
1727
|
-
'schema': self.internal_schema,
|
1731
|
+
'schema': (self.internal_schema if self.flavor != 'mssql' else None),
|
1728
1732
|
'hypertable': False,
|
1729
|
-
'autoincrement': False,
|
1730
1733
|
},
|
1731
1734
|
)
|
1732
1735
|
temp_pipe.__dict__['_columns_types'] = {
|
@@ -1747,7 +1750,7 @@ def sync_pipe(
|
|
1747
1750
|
col
|
1748
1751
|
for col_key, col in pipe.columns.items()
|
1749
1752
|
if col and col in existing_cols
|
1750
|
-
] if not primary_key else (
|
1753
|
+
] if not primary_key or self.flavor == 'oracle' else (
|
1751
1754
|
[dt_col, primary_key]
|
1752
1755
|
if self.flavor == 'timescaledb' and dt_col and dt_col in update_df.columns
|
1753
1756
|
else [primary_key]
|
@@ -1764,9 +1767,13 @@ def sync_pipe(
|
|
1764
1767
|
identity_insert=(autoincrement and primary_key in update_df.columns),
|
1765
1768
|
debug=debug,
|
1766
1769
|
)
|
1767
|
-
|
1768
|
-
|
1770
|
+
update_results = self.exec_queries(
|
1771
|
+
update_queries,
|
1772
|
+
break_on_error=True,
|
1773
|
+
rollback=True,
|
1774
|
+
debug=debug,
|
1769
1775
|
)
|
1776
|
+
update_success = all(update_results)
|
1770
1777
|
self._log_temporary_tables_creation(
|
1771
1778
|
temp_target,
|
1772
1779
|
ready_to_drop=True,
|
@@ -1775,6 +1782,8 @@ def sync_pipe(
|
|
1775
1782
|
)
|
1776
1783
|
if not update_success:
|
1777
1784
|
warn(f"Failed to apply update to {pipe}.")
|
1785
|
+
stats['success'] = stats['success'] and update_success
|
1786
|
+
stats['msg'] = (stats.get('msg', '') + f'\nFailed to apply update to {pipe}.').lstrip()
|
1778
1787
|
|
1779
1788
|
stop = time.perf_counter()
|
1780
1789
|
success = stats['success']
|
@@ -1951,8 +1960,8 @@ def sync_pipe_inplace(
|
|
1951
1960
|
autoincrement=autoincrement,
|
1952
1961
|
datetime_column=dt_col,
|
1953
1962
|
)
|
1954
|
-
|
1955
|
-
if
|
1963
|
+
results = self.exec_queries(create_pipe_queries, debug=debug)
|
1964
|
+
if not all(results):
|
1956
1965
|
_ = clean_up_temp_tables()
|
1957
1966
|
return False, f"Could not insert new data into {pipe} from its SQL query definition."
|
1958
1967
|
|
@@ -2109,7 +2118,7 @@ def sync_pipe_inplace(
|
|
2109
2118
|
and col in backtrack_cols_types
|
2110
2119
|
and col in new_cols
|
2111
2120
|
)
|
2112
|
-
} if not primary_key else {primary_key: new_cols.get(primary_key)}
|
2121
|
+
} if not primary_key or self.flavor == 'oracle' else {primary_key: new_cols.get(primary_key)}
|
2113
2122
|
|
2114
2123
|
null_replace_new_cols_str = (
|
2115
2124
|
', '.join([
|
@@ -3376,9 +3385,7 @@ def deduplicate_pipe(
|
|
3376
3385
|
"""
|
3377
3386
|
from meerschaum.utils.sql import (
|
3378
3387
|
sql_item_name,
|
3379
|
-
NO_CTE_FLAVORS,
|
3380
3388
|
get_rename_table_queries,
|
3381
|
-
NO_SELECT_INTO_FLAVORS,
|
3382
3389
|
DROP_IF_EXISTS_FLAVORS,
|
3383
3390
|
get_create_table_query,
|
3384
3391
|
format_cte_subquery,
|
@@ -3500,7 +3507,6 @@ def deduplicate_pipe(
|
|
3500
3507
|
dedup_table = '-' + session_id + f'_dedup_{pipe.target}'
|
3501
3508
|
temp_old_table = '-' + session_id + f"_old_{pipe.target}"
|
3502
3509
|
|
3503
|
-
dedup_table_name = sql_item_name(dedup_table, self.flavor, self.get_pipe_schema(pipe))
|
3504
3510
|
temp_old_table_name = sql_item_name(temp_old_table, self.flavor, self.get_pipe_schema(pipe))
|
3505
3511
|
|
3506
3512
|
create_temporary_table_query = get_create_table_query(
|
@@ -624,7 +624,7 @@ def exec_queries(
|
|
624
624
|
rollback: bool = True,
|
625
625
|
silent: bool = False,
|
626
626
|
debug: bool = False,
|
627
|
-
) -> List[sqlalchemy.engine.cursor.
|
627
|
+
) -> List[Union[sqlalchemy.engine.cursor.CursorResult, None]]:
|
628
628
|
"""
|
629
629
|
Execute a list of queries in a single transaction.
|
630
630
|
|
@@ -688,6 +688,7 @@ def exec_queries(
|
|
688
688
|
if result is None and break_on_error:
|
689
689
|
if rollback:
|
690
690
|
session.rollback()
|
691
|
+
results.append(result)
|
691
692
|
break
|
692
693
|
elif result is not None and hook is not None:
|
693
694
|
hook_queries = hook(session)
|
@@ -715,6 +716,7 @@ def to_sql(
|
|
715
716
|
method: str = "",
|
716
717
|
chunksize: Optional[int] = -1,
|
717
718
|
schema: Optional[str] = None,
|
719
|
+
safe_copy: bool = True,
|
718
720
|
silent: bool = False,
|
719
721
|
debug: bool = False,
|
720
722
|
as_tuple: bool = False,
|
@@ -729,7 +731,7 @@ def to_sql(
|
|
729
731
|
Parameters
|
730
732
|
----------
|
731
733
|
df: pd.DataFrame
|
732
|
-
The DataFrame to be
|
734
|
+
The DataFrame to be inserted.
|
733
735
|
|
734
736
|
name: str
|
735
737
|
The name of the table to be created.
|
@@ -752,6 +754,9 @@ def to_sql(
|
|
752
754
|
Optionally override the schema for the table.
|
753
755
|
Defaults to `SQLConnector.schema`.
|
754
756
|
|
757
|
+
safe_copy: bool, defaul True
|
758
|
+
If `True`, copy the dataframe before making any changes.
|
759
|
+
|
755
760
|
as_tuple: bool, default False
|
756
761
|
If `True`, return a (success_bool, message) tuple instead of a `bool`.
|
757
762
|
Defaults to `False`.
|
@@ -770,8 +775,7 @@ def to_sql(
|
|
770
775
|
"""
|
771
776
|
import time
|
772
777
|
import json
|
773
|
-
import
|
774
|
-
from decimal import Decimal, Context
|
778
|
+
from decimal import Decimal
|
775
779
|
from meerschaum.utils.warnings import error, warn
|
776
780
|
import warnings
|
777
781
|
import functools
|
@@ -796,9 +800,15 @@ def to_sql(
|
|
796
800
|
get_uuid_cols,
|
797
801
|
get_bytes_cols,
|
798
802
|
)
|
799
|
-
from meerschaum.utils.dtypes import
|
803
|
+
from meerschaum.utils.dtypes import (
|
804
|
+
are_dtypes_equal,
|
805
|
+
quantize_decimal,
|
806
|
+
coerce_timezone,
|
807
|
+
encode_bytes_for_bytea,
|
808
|
+
)
|
800
809
|
from meerschaum.utils.dtypes.sql import (
|
801
810
|
NUMERIC_PRECISION_FLAVORS,
|
811
|
+
NUMERIC_AS_TEXT_FLAVORS,
|
802
812
|
PD_TO_SQLALCHEMY_DTYPES_FLAVORS,
|
803
813
|
get_db_type_from_pd_type,
|
804
814
|
)
|
@@ -808,14 +818,35 @@ def to_sql(
|
|
808
818
|
pd = import_pandas()
|
809
819
|
is_dask = 'dask' in df.__module__
|
810
820
|
|
811
|
-
|
821
|
+
bytes_cols = get_bytes_cols(df)
|
822
|
+
numeric_cols = get_numeric_cols(df)
|
823
|
+
|
824
|
+
stats = {'target': name,}
|
812
825
|
### resort to defaults if None
|
826
|
+
copied = False
|
827
|
+
use_psql_copy = False
|
813
828
|
if method == "":
|
814
829
|
if self.flavor in _bulk_flavors:
|
815
830
|
method = functools.partial(psql_insert_copy, schema=self.schema)
|
831
|
+
use_psql_copy = True
|
816
832
|
else:
|
817
833
|
### Should resolve to 'multi' or `None`.
|
818
834
|
method = flavor_configs.get(self.flavor, {}).get('to_sql', {}).get('method', 'multi')
|
835
|
+
|
836
|
+
if bytes_cols and (use_psql_copy or self.flavor == 'oracle'):
|
837
|
+
if safe_copy and not copied:
|
838
|
+
df = df.copy()
|
839
|
+
copied = True
|
840
|
+
for col in bytes_cols:
|
841
|
+
df[col] = df[col].apply(encode_bytes_for_bytea, with_prefix=(self.flavor != 'oracle'))
|
842
|
+
|
843
|
+
if self.flavor in NUMERIC_AS_TEXT_FLAVORS:
|
844
|
+
if safe_copy and not copied:
|
845
|
+
df = df.copy()
|
846
|
+
copied = True
|
847
|
+
for col in numeric_cols:
|
848
|
+
df[col] = df[col].astype(str)
|
849
|
+
|
819
850
|
stats['method'] = method.__name__ if hasattr(method, '__name__') else str(method)
|
820
851
|
|
821
852
|
default_chunksize = self._sys_config.get('chunksize', None)
|
@@ -925,7 +956,6 @@ def to_sql(
|
|
925
956
|
### Check for numeric columns.
|
926
957
|
numeric_scale, numeric_precision = NUMERIC_PRECISION_FLAVORS.get(self.flavor, (None, None))
|
927
958
|
if numeric_precision is not None and numeric_scale is not None:
|
928
|
-
numeric_cols = get_numeric_cols(df)
|
929
959
|
for col in numeric_cols:
|
930
960
|
df[col] = df[col].apply(
|
931
961
|
lambda x: (
|
@@ -200,10 +200,15 @@ def dtypes(self) -> Union[Dict[str, Any], None]:
|
|
200
200
|
If defined, return the `dtypes` dictionary defined in `meerschaum.Pipe.parameters`.
|
201
201
|
"""
|
202
202
|
from meerschaum.config._patch import apply_patch_to_config
|
203
|
+
from meerschaum.utils.dtypes import MRSM_ALIAS_DTYPES
|
203
204
|
configured_dtypes = self.parameters.get('dtypes', {})
|
204
205
|
remote_dtypes = self.infer_dtypes(persist=False)
|
205
206
|
patched_dtypes = apply_patch_to_config(remote_dtypes, configured_dtypes)
|
206
|
-
return
|
207
|
+
return {
|
208
|
+
col: MRSM_ALIAS_DTYPES.get(typ, typ)
|
209
|
+
for col, typ in patched_dtypes.items()
|
210
|
+
if col and typ
|
211
|
+
}
|
207
212
|
|
208
213
|
|
209
214
|
@dtypes.setter
|
meerschaum/core/Pipe/_dtypes.py
CHANGED
@@ -15,6 +15,7 @@ from typing import TYPE_CHECKING
|
|
15
15
|
if TYPE_CHECKING:
|
16
16
|
pd = mrsm.attempt_import('pandas')
|
17
17
|
|
18
|
+
|
18
19
|
def enforce_dtypes(
|
19
20
|
self,
|
20
21
|
df: 'pd.DataFrame',
|
@@ -30,7 +31,7 @@ def enforce_dtypes(
|
|
30
31
|
from meerschaum.utils.warnings import warn
|
31
32
|
from meerschaum.utils.debug import dprint
|
32
33
|
from meerschaum.utils.dataframe import parse_df_datetimes, enforce_dtypes as _enforce_dtypes
|
33
|
-
from meerschaum.utils.dtypes import are_dtypes_equal
|
34
|
+
from meerschaum.utils.dtypes import are_dtypes_equal, MRSM_PD_DTYPES
|
34
35
|
from meerschaum.utils.packages import import_pandas
|
35
36
|
pd = import_pandas(debug=debug)
|
36
37
|
if df is None:
|
@@ -41,7 +42,11 @@ def enforce_dtypes(
|
|
41
42
|
)
|
42
43
|
return df
|
43
44
|
|
44
|
-
pipe_dtypes = self.dtypes if self.enforce else {
|
45
|
+
pipe_dtypes = self.dtypes if self.enforce else {
|
46
|
+
col: typ
|
47
|
+
for col, typ in self.dtypes.items()
|
48
|
+
if typ in MRSM_PD_DTYPES
|
49
|
+
}
|
45
50
|
|
46
51
|
try:
|
47
52
|
if isinstance(df, str):
|
@@ -105,22 +110,16 @@ def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str,
|
|
105
110
|
A dictionary of strings containing the pandas data types for this Pipe.
|
106
111
|
"""
|
107
112
|
if not self.exists(debug=debug):
|
108
|
-
|
109
|
-
if not self.columns:
|
110
|
-
return {}
|
111
|
-
dt_col = self.columns.get('datetime', None)
|
112
|
-
if dt_col:
|
113
|
-
if not self.parameters.get('dtypes', {}).get(dt_col, None):
|
114
|
-
dtypes[dt_col] = 'datetime64[ns, UTC]'
|
115
|
-
return dtypes
|
113
|
+
return {}
|
116
114
|
|
117
115
|
from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
|
118
116
|
from meerschaum.utils.dtypes import to_pandas_dtype
|
119
|
-
columns_types = self.get_columns_types(debug=debug)
|
120
117
|
|
121
118
|
### NOTE: get_columns_types() may return either the types as
|
122
119
|
### PostgreSQL- or Pandas-style.
|
123
|
-
|
120
|
+
columns_types = self.get_columns_types(debug=debug)
|
121
|
+
|
122
|
+
remote_pd_dtypes = {
|
124
123
|
c: (
|
125
124
|
get_pd_type_from_db_type(t, allow_custom_dtypes=True)
|
126
125
|
if str(t).isupper()
|
@@ -128,7 +127,15 @@ def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str,
|
|
128
127
|
)
|
129
128
|
for c, t in columns_types.items()
|
130
129
|
} if columns_types else {}
|
131
|
-
if persist:
|
132
|
-
|
133
|
-
|
134
|
-
|
130
|
+
if not persist:
|
131
|
+
return remote_pd_dtypes
|
132
|
+
|
133
|
+
dtypes = self.parameters.get('dtypes', {})
|
134
|
+
dtypes.update({
|
135
|
+
col: typ
|
136
|
+
for col, typ in remote_pd_dtypes.items()
|
137
|
+
if col not in dtypes
|
138
|
+
})
|
139
|
+
self.dtypes = dtypes
|
140
|
+
self.edit(interactive=False, debug=debug)
|
141
|
+
return remote_pd_dtypes
|
meerschaum/core/Pipe/_sync.py
CHANGED
@@ -161,7 +161,7 @@ def sync(
|
|
161
161
|
self._exists = None
|
162
162
|
|
163
163
|
def _sync(
|
164
|
-
p:
|
164
|
+
p: mrsm.Pipe,
|
165
165
|
df: Union[
|
166
166
|
'pd.DataFrame',
|
167
167
|
Dict[str, List[Any]],
|
@@ -960,10 +960,7 @@ def _persist_new_numeric_columns(self, df, debug: bool = False) -> SuccessTuple:
|
|
960
960
|
return True, "Success"
|
961
961
|
|
962
962
|
self._attributes_sync_time = None
|
963
|
-
dt_col = self.columns.get('datetime', None)
|
964
963
|
dtypes = self.parameters.get('dtypes', {})
|
965
|
-
if dt_col not in dtypes:
|
966
|
-
dtypes[dt_col] = 'datetime'
|
967
964
|
dtypes.update({col: 'numeric' for col in numeric_cols})
|
968
965
|
self.parameters['dtypes'] = dtypes
|
969
966
|
if not self.temporary:
|
@@ -988,10 +985,7 @@ def _persist_new_uuid_columns(self, df, debug: bool = False) -> SuccessTuple:
|
|
988
985
|
return True, "Success"
|
989
986
|
|
990
987
|
self._attributes_sync_time = None
|
991
|
-
dt_col = self.columns.get('datetime', None)
|
992
988
|
dtypes = self.parameters.get('dtypes', {})
|
993
|
-
if dt_col not in dtypes:
|
994
|
-
dtypes[dt_col] = 'datetime'
|
995
989
|
dtypes.update({col: 'uuid' for col in uuid_cols})
|
996
990
|
self.parameters['dtypes'] = dtypes
|
997
991
|
if not self.temporary:
|
@@ -1016,10 +1010,7 @@ def _persist_new_json_columns(self, df, debug: bool = False) -> SuccessTuple:
|
|
1016
1010
|
return True, "Success"
|
1017
1011
|
|
1018
1012
|
self._attributes_sync_time = None
|
1019
|
-
dt_col = self.columns.get('datetime', None)
|
1020
1013
|
dtypes = self.parameters.get('dtypes', {})
|
1021
|
-
if dt_col not in dtypes:
|
1022
|
-
dtypes[dt_col] = 'datetime'
|
1023
1014
|
dtypes.update({col: 'json' for col in json_cols})
|
1024
1015
|
self.parameters['dtypes'] = dtypes
|
1025
1016
|
|
@@ -1045,10 +1036,7 @@ def _persist_new_bytes_columns(self, df, debug: bool = False) -> SuccessTuple:
|
|
1045
1036
|
return True, "Success"
|
1046
1037
|
|
1047
1038
|
self._attributes_sync_time = None
|
1048
|
-
dt_col = self.columns.get('datetime', None)
|
1049
1039
|
dtypes = self.parameters.get('dtypes', {})
|
1050
|
-
if dt_col not in dtypes:
|
1051
|
-
dtypes[dt_col] = 'datetime'
|
1052
1040
|
dtypes.update({col: 'bytes' for col in bytes_cols})
|
1053
1041
|
self.parameters['dtypes'] = dtypes
|
1054
1042
|
|
meerschaum/jobs/_Job.py
CHANGED
@@ -200,6 +200,8 @@ class Job:
|
|
200
200
|
if root_dir is None:
|
201
201
|
from meerschaum.config.paths import ROOT_DIR_PATH
|
202
202
|
root_dir = ROOT_DIR_PATH
|
203
|
+
else:
|
204
|
+
root_dir = pathlib.Path(root_dir)
|
203
205
|
jobs_dir = root_dir / DAEMON_RESOURCES_PATH.name
|
204
206
|
daemon_dir = jobs_dir / daemon_id
|
205
207
|
pid_file = daemon_dir / 'process.pid'
|
@@ -472,7 +472,7 @@ class Daemon:
|
|
472
472
|
process.kill()
|
473
473
|
process.wait(timeout=timeout)
|
474
474
|
except Exception as e:
|
475
|
-
return False, f"Failed to kill job {self} with exception: {e}"
|
475
|
+
return False, f"Failed to kill job {self} ({process}) with exception: {e}"
|
476
476
|
|
477
477
|
try:
|
478
478
|
if process.status():
|
@@ -734,7 +734,7 @@ class Daemon:
|
|
734
734
|
time.sleep(check_timeout_interval)
|
735
735
|
|
736
736
|
return False, (
|
737
|
-
f"Failed to stop daemon '{self.daemon_id}' within {timeout} second"
|
737
|
+
f"Failed to stop daemon '{self.daemon_id}' (PID: {pid}) within {timeout} second"
|
738
738
|
+ ('s' if timeout != 1 else '') + '.'
|
739
739
|
)
|
740
740
|
|
meerschaum/utils/dataframe.py
CHANGED
@@ -494,7 +494,7 @@ def parse_df_datetimes(
|
|
494
494
|
### skip parsing if DataFrame is empty
|
495
495
|
if len(pdf) == 0:
|
496
496
|
if debug:
|
497
|
-
dprint(
|
497
|
+
dprint("df is empty. Returning original DataFrame without casting datetime columns...")
|
498
498
|
return df
|
499
499
|
|
500
500
|
ignore_cols = set(
|
@@ -509,7 +509,7 @@ def parse_df_datetimes(
|
|
509
509
|
if len(cols_to_inspect) == 0:
|
510
510
|
if debug:
|
511
511
|
dprint("All columns are ignored, skipping datetime detection...")
|
512
|
-
return df.fillna(pandas.NA)
|
512
|
+
return df.infer_objects(copy=False).fillna(pandas.NA)
|
513
513
|
|
514
514
|
### apply regex to columns to determine which are ISO datetimes
|
515
515
|
iso_dt_regex = r'\d{4}-\d{2}-\d{2}.\d{2}\:\d{2}\:\d+'
|
@@ -522,7 +522,7 @@ def parse_df_datetimes(
|
|
522
522
|
if not datetime_cols:
|
523
523
|
if debug:
|
524
524
|
dprint("No columns detected as datetimes, returning...")
|
525
|
-
return df.fillna(pandas.NA)
|
525
|
+
return df.infer_objects(copy=False).fillna(pandas.NA)
|
526
526
|
|
527
527
|
if debug:
|
528
528
|
dprint("Converting columns to datetimes: " + str(datetime_cols))
|