meerschaum 2.7.0rc1__py3-none-any.whl → 2.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. meerschaum/api/dash/callbacks/dashboard.py +46 -37
  2. meerschaum/api/dash/connectors.py +7 -9
  3. meerschaum/api/resources/templates/termpage.html +32 -24
  4. meerschaum/api/routes/_pipes.py +7 -8
  5. meerschaum/api/routes/_webterm.py +4 -3
  6. meerschaum/config/_version.py +1 -1
  7. meerschaum/connectors/api/_pipes.py +14 -18
  8. meerschaum/connectors/sql/_create_engine.py +6 -1
  9. meerschaum/connectors/sql/_instance.py +11 -12
  10. meerschaum/connectors/sql/_pipes.py +62 -56
  11. meerschaum/connectors/sql/_sql.py +37 -7
  12. meerschaum/core/Pipe/_attributes.py +6 -1
  13. meerschaum/core/Pipe/_dtypes.py +23 -16
  14. meerschaum/core/Pipe/_sync.py +1 -13
  15. meerschaum/jobs/_Job.py +2 -0
  16. meerschaum/utils/daemon/Daemon.py +2 -2
  17. meerschaum/utils/dataframe.py +3 -3
  18. meerschaum/utils/dtypes/__init__.py +48 -2
  19. meerschaum/utils/dtypes/sql.py +15 -7
  20. meerschaum/utils/sql.py +114 -57
  21. meerschaum/utils/venv/__init__.py +22 -9
  22. {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/METADATA +1 -1
  23. {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/RECORD +29 -29
  24. {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/LICENSE +0 -0
  25. {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/NOTICE +0 -0
  26. {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/WHEEL +0 -0
  27. {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/entry_points.txt +0 -0
  28. {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/top_level.txt +0 -0
  29. {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/zip-safe +0 -0
@@ -97,7 +97,6 @@ def edit_pipe(
97
97
  if pipe.id is None:
98
98
  return False, f"{pipe} is not registered and cannot be edited."
99
99
 
100
- from meerschaum.utils.debug import dprint
101
100
  from meerschaum.utils.packages import attempt_import
102
101
  from meerschaum.utils.sql import json_flavors
103
102
  if not patch:
@@ -172,7 +171,7 @@ def fetch_pipes_keys(
172
171
  """
173
172
  from meerschaum.utils.debug import dprint
174
173
  from meerschaum.utils.packages import attempt_import
175
- from meerschaum.utils.misc import separate_negation_values, flatten_list
174
+ from meerschaum.utils.misc import separate_negation_values
176
175
  from meerschaum.utils.sql import OMIT_NULLSFIRST_FLAVORS, table_exists
177
176
  from meerschaum.config.static import STATIC_CONFIG
178
177
  import json
@@ -316,7 +315,6 @@ def create_indices(
316
315
  """
317
316
  Create a pipe's indices.
318
317
  """
319
- from meerschaum.utils.sql import sql_item_name, update_queries
320
318
  from meerschaum.utils.debug import dprint
321
319
  if debug:
322
320
  dprint(f"Creating indices for {pipe}...")
@@ -419,11 +417,14 @@ def get_create_index_queries(
419
417
  existing_cols_indices = self.get_pipe_columns_indices(pipe, debug=debug)
420
418
  existing_ix_names = set()
421
419
  existing_primary_keys = []
420
+ existing_clustered_primary_keys = []
422
421
  for col, col_indices in existing_cols_indices.items():
423
422
  for col_ix_doc in col_indices:
424
423
  existing_ix_names.add(col_ix_doc.get('name', None))
425
424
  if col_ix_doc.get('type', None) == 'PRIMARY KEY':
426
425
  existing_primary_keys.append(col)
426
+ if col_ix_doc.get('clustered', True):
427
+ existing_clustered_primary_keys.append(col)
427
428
 
428
429
  _datetime = pipe.get_columns('datetime', error=False)
429
430
  _datetime_name = (
@@ -467,7 +468,7 @@ def get_create_index_queries(
467
468
  primary_key_clustered = "CLUSTERED" if _datetime is None else "NONCLUSTERED"
468
469
  datetime_clustered = (
469
470
  "CLUSTERED"
470
- if not existing_primary_keys and _datetime is not None
471
+ if not existing_clustered_primary_keys and _datetime is not None
471
472
  else "NONCLUSTERED"
472
473
  )
473
474
 
@@ -665,6 +666,8 @@ def get_create_index_queries(
665
666
  cols = indices[ix_key]
666
667
  if not isinstance(cols, (list, tuple)):
667
668
  cols = [cols]
669
+ if ix_key == 'unique' and upsert:
670
+ continue
668
671
  cols_names = [sql_item_name(col, self.flavor, None) for col in cols if col]
669
672
  if not cols_names:
670
673
  continue
@@ -792,8 +795,6 @@ def delete_pipe(
792
795
  """
793
796
  Delete a Pipe's registration.
794
797
  """
795
- from meerschaum.utils.sql import sql_item_name
796
- from meerschaum.utils.debug import dprint
797
798
  from meerschaum.utils.packages import attempt_import
798
799
  sqlalchemy = attempt_import('sqlalchemy')
799
800
 
@@ -876,7 +877,6 @@ def get_pipe_data(
876
877
 
877
878
  """
878
879
  import json
879
- from meerschaum.utils.sql import sql_item_name
880
880
  from meerschaum.utils.misc import parse_df_datetimes, to_pandas_dtype
881
881
  from meerschaum.utils.packages import import_pandas
882
882
  from meerschaum.utils.dtypes import (
@@ -889,7 +889,7 @@ def get_pipe_data(
889
889
  pd = import_pandas()
890
890
  is_dask = 'dask' in pd.__name__
891
891
 
892
- cols_types = pipe.get_columns_types(debug=debug)
892
+ cols_types = pipe.get_columns_types(debug=debug) if pipe.enforce else {}
893
893
  dtypes = {
894
894
  **{
895
895
  p_col: to_pandas_dtype(p_typ)
@@ -904,17 +904,16 @@ def get_pipe_data(
904
904
  if self.flavor == 'sqlite':
905
905
  if not pipe.columns.get('datetime', None):
906
906
  _dt = pipe.guess_datetime()
907
- dt = sql_item_name(_dt, self.flavor, None) if _dt else None
908
907
  else:
909
908
  _dt = pipe.get_columns('datetime')
910
- dt = sql_item_name(_dt, self.flavor, None)
911
909
 
912
910
  if _dt:
913
911
  dt_type = dtypes.get(_dt, 'object').lower()
914
912
  if 'datetime' not in dt_type:
915
913
  if 'int' not in dt_type:
916
914
  dtypes[_dt] = 'datetime64[ns, UTC]'
917
- existing_cols = pipe.get_columns_types(debug=debug)
915
+
916
+ existing_cols = cols_types.keys()
918
917
  select_columns = (
919
918
  [
920
919
  col
@@ -928,7 +927,7 @@ def get_pipe_data(
928
927
  if col in existing_cols
929
928
  and col not in (omit_columns or [])
930
929
  ]
931
- )
930
+ ) if pipe.enforce else select_columns
932
931
  if select_columns:
933
932
  dtypes = {col: typ for col, typ in dtypes.items() if col in select_columns}
934
933
  dtypes = {
@@ -1109,12 +1108,13 @@ def get_pipe_data_query(
1109
1108
  from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
1110
1109
 
1111
1110
  dt_col = pipe.columns.get('datetime', None)
1112
- existing_cols = pipe.get_columns_types(debug=debug)
1111
+ existing_cols = pipe.get_columns_types(debug=debug) if pipe.enforce else []
1112
+ skip_existing_cols_check = skip_existing_cols_check or not pipe.enforce
1113
1113
  dt_typ = get_pd_type_from_db_type(existing_cols[dt_col]) if dt_col in existing_cols else None
1114
1114
  select_columns = (
1115
1115
  [col for col in existing_cols]
1116
1116
  if not select_columns
1117
- else [col for col in select_columns if col in existing_cols or skip_existing_cols_check]
1117
+ else [col for col in select_columns if skip_existing_cols_check or col in existing_cols]
1118
1118
  )
1119
1119
  if omit_columns:
1120
1120
  select_columns = [col for col in select_columns if col not in omit_columns]
@@ -1201,7 +1201,7 @@ def get_pipe_data_query(
1201
1201
  number=begin_add_minutes,
1202
1202
  begin=begin,
1203
1203
  )
1204
- where += f"{dt} >= {begin_da}" + (" AND " if end is not None else "")
1204
+ where += f"\n {dt} >= {begin_da}" + ("\n AND\n " if end is not None else "")
1205
1205
  is_dt_bound = True
1206
1206
 
1207
1207
  if end is not None and (_dt in existing_cols or skip_existing_cols_check):
@@ -1213,7 +1213,7 @@ def get_pipe_data_query(
1213
1213
  number=end_add_minutes,
1214
1214
  begin=end
1215
1215
  )
1216
- where += f"{dt} < {end_da}"
1216
+ where += f"{dt} < {end_da}"
1217
1217
  is_dt_bound = True
1218
1218
 
1219
1219
  if params is not None:
@@ -1225,7 +1225,7 @@ def get_pipe_data_query(
1225
1225
  }
1226
1226
  if valid_params:
1227
1227
  where += build_where(valid_params, self).replace(
1228
- 'WHERE', ('AND' if is_dt_bound else "")
1228
+ 'WHERE', (' AND' if is_dt_bound else " ")
1229
1229
  )
1230
1230
 
1231
1231
  if len(where) > 0:
@@ -1280,7 +1280,6 @@ def get_pipe_id(
1280
1280
  if pipe.temporary:
1281
1281
  return None
1282
1282
  from meerschaum.utils.packages import attempt_import
1283
- import json
1284
1283
  sqlalchemy = attempt_import('sqlalchemy')
1285
1284
  from meerschaum.connectors.sql.tables import get_tables
1286
1285
  pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes']
@@ -1599,6 +1598,7 @@ def sync_pipe(
1599
1598
  'if_exists': if_exists,
1600
1599
  'debug': debug,
1601
1600
  'as_dict': True,
1601
+ 'safe_copy': kw.get('safe_copy', False),
1602
1602
  'chunksize': chunksize,
1603
1603
  'dtype': self.get_to_sql_dtype(pipe, unseen_df, update_dtypes=True),
1604
1604
  'schema': self.get_pipe_schema(pipe),
@@ -1655,35 +1655,37 @@ def sync_pipe(
1655
1655
  and primary_key in unseen_df.columns
1656
1656
  and autoincrement
1657
1657
  )
1658
- with self.engine.connect() as connection:
1659
- with connection.begin():
1660
- if do_identity_insert:
1661
- identity_on_result = self.exec(
1662
- f"SET IDENTITY_INSERT {pipe_name} ON",
1663
- commit=False,
1664
- _connection=connection,
1665
- close=False,
1666
- debug=debug,
1667
- )
1668
- if identity_on_result is None:
1669
- return False, f"Could not enable identity inserts on {pipe}."
1670
-
1671
- stats = self.to_sql(
1672
- unseen_df,
1673
- _connection=connection,
1674
- **unseen_kw
1675
- )
1658
+ stats = {'success': True, 'msg': 'Success'}
1659
+ if len(unseen_df) > 0:
1660
+ with self.engine.connect() as connection:
1661
+ with connection.begin():
1662
+ if do_identity_insert:
1663
+ identity_on_result = self.exec(
1664
+ f"SET IDENTITY_INSERT {pipe_name} ON",
1665
+ commit=False,
1666
+ _connection=connection,
1667
+ close=False,
1668
+ debug=debug,
1669
+ )
1670
+ if identity_on_result is None:
1671
+ return False, f"Could not enable identity inserts on {pipe}."
1676
1672
 
1677
- if do_identity_insert:
1678
- identity_off_result = self.exec(
1679
- f"SET IDENTITY_INSERT {pipe_name} OFF",
1680
- commit=False,
1673
+ stats = self.to_sql(
1674
+ unseen_df,
1681
1675
  _connection=connection,
1682
- close=False,
1683
- debug=debug,
1676
+ **unseen_kw
1684
1677
  )
1685
- if identity_off_result is None:
1686
- return False, f"Could not disable identity inserts on {pipe}."
1678
+
1679
+ if do_identity_insert:
1680
+ identity_off_result = self.exec(
1681
+ f"SET IDENTITY_INSERT {pipe_name} OFF",
1682
+ commit=False,
1683
+ _connection=connection,
1684
+ close=False,
1685
+ debug=debug,
1686
+ )
1687
+ if identity_off_result is None:
1688
+ return False, f"Could not disable identity inserts on {pipe}."
1687
1689
 
1688
1690
  if is_new:
1689
1691
  if not self.create_indices(pipe, debug=debug):
@@ -1722,11 +1724,12 @@ def sync_pipe(
1722
1724
  },
1723
1725
  target=temp_target,
1724
1726
  temporary=True,
1727
+ enforce=False,
1728
+ static=True,
1729
+ autoincrement=False,
1725
1730
  parameters={
1726
- 'static': True,
1727
- 'schema': self.internal_schema,
1731
+ 'schema': (self.internal_schema if self.flavor != 'mssql' else None),
1728
1732
  'hypertable': False,
1729
- 'autoincrement': False,
1730
1733
  },
1731
1734
  )
1732
1735
  temp_pipe.__dict__['_columns_types'] = {
@@ -1747,7 +1750,7 @@ def sync_pipe(
1747
1750
  col
1748
1751
  for col_key, col in pipe.columns.items()
1749
1752
  if col and col in existing_cols
1750
- ] if not primary_key else (
1753
+ ] if not primary_key or self.flavor == 'oracle' else (
1751
1754
  [dt_col, primary_key]
1752
1755
  if self.flavor == 'timescaledb' and dt_col and dt_col in update_df.columns
1753
1756
  else [primary_key]
@@ -1764,9 +1767,13 @@ def sync_pipe(
1764
1767
  identity_insert=(autoincrement and primary_key in update_df.columns),
1765
1768
  debug=debug,
1766
1769
  )
1767
- update_success = all(
1768
- self.exec_queries(update_queries, break_on_error=True, rollback=True, debug=debug)
1770
+ update_results = self.exec_queries(
1771
+ update_queries,
1772
+ break_on_error=True,
1773
+ rollback=True,
1774
+ debug=debug,
1769
1775
  )
1776
+ update_success = all(update_results)
1770
1777
  self._log_temporary_tables_creation(
1771
1778
  temp_target,
1772
1779
  ready_to_drop=True,
@@ -1775,6 +1782,8 @@ def sync_pipe(
1775
1782
  )
1776
1783
  if not update_success:
1777
1784
  warn(f"Failed to apply update to {pipe}.")
1785
+ stats['success'] = stats['success'] and update_success
1786
+ stats['msg'] = (stats.get('msg', '') + f'\nFailed to apply update to {pipe}.').lstrip()
1778
1787
 
1779
1788
  stop = time.perf_counter()
1780
1789
  success = stats['success']
@@ -1951,8 +1960,8 @@ def sync_pipe_inplace(
1951
1960
  autoincrement=autoincrement,
1952
1961
  datetime_column=dt_col,
1953
1962
  )
1954
- result = self.exec_queries(create_pipe_queries, debug=debug)
1955
- if result is None:
1963
+ results = self.exec_queries(create_pipe_queries, debug=debug)
1964
+ if not all(results):
1956
1965
  _ = clean_up_temp_tables()
1957
1966
  return False, f"Could not insert new data into {pipe} from its SQL query definition."
1958
1967
 
@@ -2109,7 +2118,7 @@ def sync_pipe_inplace(
2109
2118
  and col in backtrack_cols_types
2110
2119
  and col in new_cols
2111
2120
  )
2112
- } if not primary_key else {primary_key: new_cols.get(primary_key)}
2121
+ } if not primary_key or self.flavor == 'oracle' else {primary_key: new_cols.get(primary_key)}
2113
2122
 
2114
2123
  null_replace_new_cols_str = (
2115
2124
  ', '.join([
@@ -3376,9 +3385,7 @@ def deduplicate_pipe(
3376
3385
  """
3377
3386
  from meerschaum.utils.sql import (
3378
3387
  sql_item_name,
3379
- NO_CTE_FLAVORS,
3380
3388
  get_rename_table_queries,
3381
- NO_SELECT_INTO_FLAVORS,
3382
3389
  DROP_IF_EXISTS_FLAVORS,
3383
3390
  get_create_table_query,
3384
3391
  format_cte_subquery,
@@ -3500,7 +3507,6 @@ def deduplicate_pipe(
3500
3507
  dedup_table = '-' + session_id + f'_dedup_{pipe.target}'
3501
3508
  temp_old_table = '-' + session_id + f"_old_{pipe.target}"
3502
3509
 
3503
- dedup_table_name = sql_item_name(dedup_table, self.flavor, self.get_pipe_schema(pipe))
3504
3510
  temp_old_table_name = sql_item_name(temp_old_table, self.flavor, self.get_pipe_schema(pipe))
3505
3511
 
3506
3512
  create_temporary_table_query = get_create_table_query(
@@ -624,7 +624,7 @@ def exec_queries(
624
624
  rollback: bool = True,
625
625
  silent: bool = False,
626
626
  debug: bool = False,
627
- ) -> List[sqlalchemy.engine.cursor.LegacyCursorResult]:
627
+ ) -> List[Union[sqlalchemy.engine.cursor.CursorResult, None]]:
628
628
  """
629
629
  Execute a list of queries in a single transaction.
630
630
 
@@ -688,6 +688,7 @@ def exec_queries(
688
688
  if result is None and break_on_error:
689
689
  if rollback:
690
690
  session.rollback()
691
+ results.append(result)
691
692
  break
692
693
  elif result is not None and hook is not None:
693
694
  hook_queries = hook(session)
@@ -715,6 +716,7 @@ def to_sql(
715
716
  method: str = "",
716
717
  chunksize: Optional[int] = -1,
717
718
  schema: Optional[str] = None,
719
+ safe_copy: bool = True,
718
720
  silent: bool = False,
719
721
  debug: bool = False,
720
722
  as_tuple: bool = False,
@@ -729,7 +731,7 @@ def to_sql(
729
731
  Parameters
730
732
  ----------
731
733
  df: pd.DataFrame
732
- The DataFrame to be uploaded.
734
+ The DataFrame to be inserted.
733
735
 
734
736
  name: str
735
737
  The name of the table to be created.
@@ -752,6 +754,9 @@ def to_sql(
752
754
  Optionally override the schema for the table.
753
755
  Defaults to `SQLConnector.schema`.
754
756
 
757
+ safe_copy: bool, defaul True
758
+ If `True`, copy the dataframe before making any changes.
759
+
755
760
  as_tuple: bool, default False
756
761
  If `True`, return a (success_bool, message) tuple instead of a `bool`.
757
762
  Defaults to `False`.
@@ -770,8 +775,7 @@ def to_sql(
770
775
  """
771
776
  import time
772
777
  import json
773
- import decimal
774
- from decimal import Decimal, Context
778
+ from decimal import Decimal
775
779
  from meerschaum.utils.warnings import error, warn
776
780
  import warnings
777
781
  import functools
@@ -796,9 +800,15 @@ def to_sql(
796
800
  get_uuid_cols,
797
801
  get_bytes_cols,
798
802
  )
799
- from meerschaum.utils.dtypes import are_dtypes_equal, quantize_decimal, coerce_timezone
803
+ from meerschaum.utils.dtypes import (
804
+ are_dtypes_equal,
805
+ quantize_decimal,
806
+ coerce_timezone,
807
+ encode_bytes_for_bytea,
808
+ )
800
809
  from meerschaum.utils.dtypes.sql import (
801
810
  NUMERIC_PRECISION_FLAVORS,
811
+ NUMERIC_AS_TEXT_FLAVORS,
802
812
  PD_TO_SQLALCHEMY_DTYPES_FLAVORS,
803
813
  get_db_type_from_pd_type,
804
814
  )
@@ -808,14 +818,35 @@ def to_sql(
808
818
  pd = import_pandas()
809
819
  is_dask = 'dask' in df.__module__
810
820
 
811
- stats = {'target': name, }
821
+ bytes_cols = get_bytes_cols(df)
822
+ numeric_cols = get_numeric_cols(df)
823
+
824
+ stats = {'target': name,}
812
825
  ### resort to defaults if None
826
+ copied = False
827
+ use_psql_copy = False
813
828
  if method == "":
814
829
  if self.flavor in _bulk_flavors:
815
830
  method = functools.partial(psql_insert_copy, schema=self.schema)
831
+ use_psql_copy = True
816
832
  else:
817
833
  ### Should resolve to 'multi' or `None`.
818
834
  method = flavor_configs.get(self.flavor, {}).get('to_sql', {}).get('method', 'multi')
835
+
836
+ if bytes_cols and (use_psql_copy or self.flavor == 'oracle'):
837
+ if safe_copy and not copied:
838
+ df = df.copy()
839
+ copied = True
840
+ for col in bytes_cols:
841
+ df[col] = df[col].apply(encode_bytes_for_bytea, with_prefix=(self.flavor != 'oracle'))
842
+
843
+ if self.flavor in NUMERIC_AS_TEXT_FLAVORS:
844
+ if safe_copy and not copied:
845
+ df = df.copy()
846
+ copied = True
847
+ for col in numeric_cols:
848
+ df[col] = df[col].astype(str)
849
+
819
850
  stats['method'] = method.__name__ if hasattr(method, '__name__') else str(method)
820
851
 
821
852
  default_chunksize = self._sys_config.get('chunksize', None)
@@ -925,7 +956,6 @@ def to_sql(
925
956
  ### Check for numeric columns.
926
957
  numeric_scale, numeric_precision = NUMERIC_PRECISION_FLAVORS.get(self.flavor, (None, None))
927
958
  if numeric_precision is not None and numeric_scale is not None:
928
- numeric_cols = get_numeric_cols(df)
929
959
  for col in numeric_cols:
930
960
  df[col] = df[col].apply(
931
961
  lambda x: (
@@ -200,10 +200,15 @@ def dtypes(self) -> Union[Dict[str, Any], None]:
200
200
  If defined, return the `dtypes` dictionary defined in `meerschaum.Pipe.parameters`.
201
201
  """
202
202
  from meerschaum.config._patch import apply_patch_to_config
203
+ from meerschaum.utils.dtypes import MRSM_ALIAS_DTYPES
203
204
  configured_dtypes = self.parameters.get('dtypes', {})
204
205
  remote_dtypes = self.infer_dtypes(persist=False)
205
206
  patched_dtypes = apply_patch_to_config(remote_dtypes, configured_dtypes)
206
- return patched_dtypes
207
+ return {
208
+ col: MRSM_ALIAS_DTYPES.get(typ, typ)
209
+ for col, typ in patched_dtypes.items()
210
+ if col and typ
211
+ }
207
212
 
208
213
 
209
214
  @dtypes.setter
@@ -15,6 +15,7 @@ from typing import TYPE_CHECKING
15
15
  if TYPE_CHECKING:
16
16
  pd = mrsm.attempt_import('pandas')
17
17
 
18
+
18
19
  def enforce_dtypes(
19
20
  self,
20
21
  df: 'pd.DataFrame',
@@ -30,7 +31,7 @@ def enforce_dtypes(
30
31
  from meerschaum.utils.warnings import warn
31
32
  from meerschaum.utils.debug import dprint
32
33
  from meerschaum.utils.dataframe import parse_df_datetimes, enforce_dtypes as _enforce_dtypes
33
- from meerschaum.utils.dtypes import are_dtypes_equal
34
+ from meerschaum.utils.dtypes import are_dtypes_equal, MRSM_PD_DTYPES
34
35
  from meerschaum.utils.packages import import_pandas
35
36
  pd = import_pandas(debug=debug)
36
37
  if df is None:
@@ -41,7 +42,11 @@ def enforce_dtypes(
41
42
  )
42
43
  return df
43
44
 
44
- pipe_dtypes = self.dtypes if self.enforce else {}
45
+ pipe_dtypes = self.dtypes if self.enforce else {
46
+ col: typ
47
+ for col, typ in self.dtypes.items()
48
+ if typ in MRSM_PD_DTYPES
49
+ }
45
50
 
46
51
  try:
47
52
  if isinstance(df, str):
@@ -105,22 +110,16 @@ def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str,
105
110
  A dictionary of strings containing the pandas data types for this Pipe.
106
111
  """
107
112
  if not self.exists(debug=debug):
108
- dtypes = {}
109
- if not self.columns:
110
- return {}
111
- dt_col = self.columns.get('datetime', None)
112
- if dt_col:
113
- if not self.parameters.get('dtypes', {}).get(dt_col, None):
114
- dtypes[dt_col] = 'datetime64[ns, UTC]'
115
- return dtypes
113
+ return {}
116
114
 
117
115
  from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
118
116
  from meerschaum.utils.dtypes import to_pandas_dtype
119
- columns_types = self.get_columns_types(debug=debug)
120
117
 
121
118
  ### NOTE: get_columns_types() may return either the types as
122
119
  ### PostgreSQL- or Pandas-style.
123
- dtypes = {
120
+ columns_types = self.get_columns_types(debug=debug)
121
+
122
+ remote_pd_dtypes = {
124
123
  c: (
125
124
  get_pd_type_from_db_type(t, allow_custom_dtypes=True)
126
125
  if str(t).isupper()
@@ -128,7 +127,15 @@ def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str,
128
127
  )
129
128
  for c, t in columns_types.items()
130
129
  } if columns_types else {}
131
- if persist:
132
- self.dtypes = dtypes
133
- self.edit(interactive=False, debug=debug)
134
- return dtypes
130
+ if not persist:
131
+ return remote_pd_dtypes
132
+
133
+ dtypes = self.parameters.get('dtypes', {})
134
+ dtypes.update({
135
+ col: typ
136
+ for col, typ in remote_pd_dtypes.items()
137
+ if col not in dtypes
138
+ })
139
+ self.dtypes = dtypes
140
+ self.edit(interactive=False, debug=debug)
141
+ return remote_pd_dtypes
@@ -161,7 +161,7 @@ def sync(
161
161
  self._exists = None
162
162
 
163
163
  def _sync(
164
- p: 'meerschaum.Pipe',
164
+ p: mrsm.Pipe,
165
165
  df: Union[
166
166
  'pd.DataFrame',
167
167
  Dict[str, List[Any]],
@@ -960,10 +960,7 @@ def _persist_new_numeric_columns(self, df, debug: bool = False) -> SuccessTuple:
960
960
  return True, "Success"
961
961
 
962
962
  self._attributes_sync_time = None
963
- dt_col = self.columns.get('datetime', None)
964
963
  dtypes = self.parameters.get('dtypes', {})
965
- if dt_col not in dtypes:
966
- dtypes[dt_col] = 'datetime'
967
964
  dtypes.update({col: 'numeric' for col in numeric_cols})
968
965
  self.parameters['dtypes'] = dtypes
969
966
  if not self.temporary:
@@ -988,10 +985,7 @@ def _persist_new_uuid_columns(self, df, debug: bool = False) -> SuccessTuple:
988
985
  return True, "Success"
989
986
 
990
987
  self._attributes_sync_time = None
991
- dt_col = self.columns.get('datetime', None)
992
988
  dtypes = self.parameters.get('dtypes', {})
993
- if dt_col not in dtypes:
994
- dtypes[dt_col] = 'datetime'
995
989
  dtypes.update({col: 'uuid' for col in uuid_cols})
996
990
  self.parameters['dtypes'] = dtypes
997
991
  if not self.temporary:
@@ -1016,10 +1010,7 @@ def _persist_new_json_columns(self, df, debug: bool = False) -> SuccessTuple:
1016
1010
  return True, "Success"
1017
1011
 
1018
1012
  self._attributes_sync_time = None
1019
- dt_col = self.columns.get('datetime', None)
1020
1013
  dtypes = self.parameters.get('dtypes', {})
1021
- if dt_col not in dtypes:
1022
- dtypes[dt_col] = 'datetime'
1023
1014
  dtypes.update({col: 'json' for col in json_cols})
1024
1015
  self.parameters['dtypes'] = dtypes
1025
1016
 
@@ -1045,10 +1036,7 @@ def _persist_new_bytes_columns(self, df, debug: bool = False) -> SuccessTuple:
1045
1036
  return True, "Success"
1046
1037
 
1047
1038
  self._attributes_sync_time = None
1048
- dt_col = self.columns.get('datetime', None)
1049
1039
  dtypes = self.parameters.get('dtypes', {})
1050
- if dt_col not in dtypes:
1051
- dtypes[dt_col] = 'datetime'
1052
1040
  dtypes.update({col: 'bytes' for col in bytes_cols})
1053
1041
  self.parameters['dtypes'] = dtypes
1054
1042
 
meerschaum/jobs/_Job.py CHANGED
@@ -200,6 +200,8 @@ class Job:
200
200
  if root_dir is None:
201
201
  from meerschaum.config.paths import ROOT_DIR_PATH
202
202
  root_dir = ROOT_DIR_PATH
203
+ else:
204
+ root_dir = pathlib.Path(root_dir)
203
205
  jobs_dir = root_dir / DAEMON_RESOURCES_PATH.name
204
206
  daemon_dir = jobs_dir / daemon_id
205
207
  pid_file = daemon_dir / 'process.pid'
@@ -472,7 +472,7 @@ class Daemon:
472
472
  process.kill()
473
473
  process.wait(timeout=timeout)
474
474
  except Exception as e:
475
- return False, f"Failed to kill job {self} with exception: {e}"
475
+ return False, f"Failed to kill job {self} ({process}) with exception: {e}"
476
476
 
477
477
  try:
478
478
  if process.status():
@@ -734,7 +734,7 @@ class Daemon:
734
734
  time.sleep(check_timeout_interval)
735
735
 
736
736
  return False, (
737
- f"Failed to stop daemon '{self.daemon_id}' within {timeout} second"
737
+ f"Failed to stop daemon '{self.daemon_id}' (PID: {pid}) within {timeout} second"
738
738
  + ('s' if timeout != 1 else '') + '.'
739
739
  )
740
740
 
@@ -494,7 +494,7 @@ def parse_df_datetimes(
494
494
  ### skip parsing if DataFrame is empty
495
495
  if len(pdf) == 0:
496
496
  if debug:
497
- dprint(f"df is empty. Returning original DataFrame without casting datetime columns...")
497
+ dprint("df is empty. Returning original DataFrame without casting datetime columns...")
498
498
  return df
499
499
 
500
500
  ignore_cols = set(
@@ -509,7 +509,7 @@ def parse_df_datetimes(
509
509
  if len(cols_to_inspect) == 0:
510
510
  if debug:
511
511
  dprint("All columns are ignored, skipping datetime detection...")
512
- return df.fillna(pandas.NA)
512
+ return df.infer_objects(copy=False).fillna(pandas.NA)
513
513
 
514
514
  ### apply regex to columns to determine which are ISO datetimes
515
515
  iso_dt_regex = r'\d{4}-\d{2}-\d{2}.\d{2}\:\d{2}\:\d+'
@@ -522,7 +522,7 @@ def parse_df_datetimes(
522
522
  if not datetime_cols:
523
523
  if debug:
524
524
  dprint("No columns detected as datetimes, returning...")
525
- return df.fillna(pandas.NA)
525
+ return df.infer_objects(copy=False).fillna(pandas.NA)
526
526
 
527
527
  if debug:
528
528
  dprint("Converting columns to datetimes: " + str(datetime_cols))