meerschaum 2.7.0rc1__py3-none-any.whl → 2.7.2__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (29) hide show
  1. meerschaum/api/dash/callbacks/dashboard.py +46 -37
  2. meerschaum/api/dash/connectors.py +7 -9
  3. meerschaum/api/resources/templates/termpage.html +32 -24
  4. meerschaum/api/routes/_pipes.py +7 -8
  5. meerschaum/api/routes/_webterm.py +4 -3
  6. meerschaum/config/_version.py +1 -1
  7. meerschaum/connectors/api/_pipes.py +14 -18
  8. meerschaum/connectors/sql/_create_engine.py +6 -1
  9. meerschaum/connectors/sql/_instance.py +11 -12
  10. meerschaum/connectors/sql/_pipes.py +62 -56
  11. meerschaum/connectors/sql/_sql.py +37 -7
  12. meerschaum/core/Pipe/_attributes.py +6 -1
  13. meerschaum/core/Pipe/_dtypes.py +23 -16
  14. meerschaum/core/Pipe/_sync.py +1 -13
  15. meerschaum/jobs/_Job.py +2 -0
  16. meerschaum/utils/daemon/Daemon.py +2 -2
  17. meerschaum/utils/dataframe.py +3 -3
  18. meerschaum/utils/dtypes/__init__.py +48 -2
  19. meerschaum/utils/dtypes/sql.py +15 -7
  20. meerschaum/utils/sql.py +114 -57
  21. meerschaum/utils/venv/__init__.py +22 -9
  22. {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/METADATA +1 -1
  23. {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/RECORD +29 -29
  24. {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/LICENSE +0 -0
  25. {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/NOTICE +0 -0
  26. {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/WHEEL +0 -0
  27. {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/entry_points.txt +0 -0
  28. {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/top_level.txt +0 -0
  29. {meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/zip-safe +0 -0
@@ -97,7 +97,6 @@ def edit_pipe(
97
97
  if pipe.id is None:
98
98
  return False, f"{pipe} is not registered and cannot be edited."
99
99
 
100
- from meerschaum.utils.debug import dprint
101
100
  from meerschaum.utils.packages import attempt_import
102
101
  from meerschaum.utils.sql import json_flavors
103
102
  if not patch:
@@ -172,7 +171,7 @@ def fetch_pipes_keys(
172
171
  """
173
172
  from meerschaum.utils.debug import dprint
174
173
  from meerschaum.utils.packages import attempt_import
175
- from meerschaum.utils.misc import separate_negation_values, flatten_list
174
+ from meerschaum.utils.misc import separate_negation_values
176
175
  from meerschaum.utils.sql import OMIT_NULLSFIRST_FLAVORS, table_exists
177
176
  from meerschaum.config.static import STATIC_CONFIG
178
177
  import json
@@ -316,7 +315,6 @@ def create_indices(
316
315
  """
317
316
  Create a pipe's indices.
318
317
  """
319
- from meerschaum.utils.sql import sql_item_name, update_queries
320
318
  from meerschaum.utils.debug import dprint
321
319
  if debug:
322
320
  dprint(f"Creating indices for {pipe}...")
@@ -419,11 +417,14 @@ def get_create_index_queries(
419
417
  existing_cols_indices = self.get_pipe_columns_indices(pipe, debug=debug)
420
418
  existing_ix_names = set()
421
419
  existing_primary_keys = []
420
+ existing_clustered_primary_keys = []
422
421
  for col, col_indices in existing_cols_indices.items():
423
422
  for col_ix_doc in col_indices:
424
423
  existing_ix_names.add(col_ix_doc.get('name', None))
425
424
  if col_ix_doc.get('type', None) == 'PRIMARY KEY':
426
425
  existing_primary_keys.append(col)
426
+ if col_ix_doc.get('clustered', True):
427
+ existing_clustered_primary_keys.append(col)
427
428
 
428
429
  _datetime = pipe.get_columns('datetime', error=False)
429
430
  _datetime_name = (
@@ -467,7 +468,7 @@ def get_create_index_queries(
467
468
  primary_key_clustered = "CLUSTERED" if _datetime is None else "NONCLUSTERED"
468
469
  datetime_clustered = (
469
470
  "CLUSTERED"
470
- if not existing_primary_keys and _datetime is not None
471
+ if not existing_clustered_primary_keys and _datetime is not None
471
472
  else "NONCLUSTERED"
472
473
  )
473
474
 
@@ -665,6 +666,8 @@ def get_create_index_queries(
665
666
  cols = indices[ix_key]
666
667
  if not isinstance(cols, (list, tuple)):
667
668
  cols = [cols]
669
+ if ix_key == 'unique' and upsert:
670
+ continue
668
671
  cols_names = [sql_item_name(col, self.flavor, None) for col in cols if col]
669
672
  if not cols_names:
670
673
  continue
@@ -792,8 +795,6 @@ def delete_pipe(
792
795
  """
793
796
  Delete a Pipe's registration.
794
797
  """
795
- from meerschaum.utils.sql import sql_item_name
796
- from meerschaum.utils.debug import dprint
797
798
  from meerschaum.utils.packages import attempt_import
798
799
  sqlalchemy = attempt_import('sqlalchemy')
799
800
 
@@ -876,7 +877,6 @@ def get_pipe_data(
876
877
 
877
878
  """
878
879
  import json
879
- from meerschaum.utils.sql import sql_item_name
880
880
  from meerschaum.utils.misc import parse_df_datetimes, to_pandas_dtype
881
881
  from meerschaum.utils.packages import import_pandas
882
882
  from meerschaum.utils.dtypes import (
@@ -889,7 +889,7 @@ def get_pipe_data(
889
889
  pd = import_pandas()
890
890
  is_dask = 'dask' in pd.__name__
891
891
 
892
- cols_types = pipe.get_columns_types(debug=debug)
892
+ cols_types = pipe.get_columns_types(debug=debug) if pipe.enforce else {}
893
893
  dtypes = {
894
894
  **{
895
895
  p_col: to_pandas_dtype(p_typ)
@@ -904,17 +904,16 @@ def get_pipe_data(
904
904
  if self.flavor == 'sqlite':
905
905
  if not pipe.columns.get('datetime', None):
906
906
  _dt = pipe.guess_datetime()
907
- dt = sql_item_name(_dt, self.flavor, None) if _dt else None
908
907
  else:
909
908
  _dt = pipe.get_columns('datetime')
910
- dt = sql_item_name(_dt, self.flavor, None)
911
909
 
912
910
  if _dt:
913
911
  dt_type = dtypes.get(_dt, 'object').lower()
914
912
  if 'datetime' not in dt_type:
915
913
  if 'int' not in dt_type:
916
914
  dtypes[_dt] = 'datetime64[ns, UTC]'
917
- existing_cols = pipe.get_columns_types(debug=debug)
915
+
916
+ existing_cols = cols_types.keys()
918
917
  select_columns = (
919
918
  [
920
919
  col
@@ -928,7 +927,7 @@ def get_pipe_data(
928
927
  if col in existing_cols
929
928
  and col not in (omit_columns or [])
930
929
  ]
931
- )
930
+ ) if pipe.enforce else select_columns
932
931
  if select_columns:
933
932
  dtypes = {col: typ for col, typ in dtypes.items() if col in select_columns}
934
933
  dtypes = {
@@ -1109,12 +1108,13 @@ def get_pipe_data_query(
1109
1108
  from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
1110
1109
 
1111
1110
  dt_col = pipe.columns.get('datetime', None)
1112
- existing_cols = pipe.get_columns_types(debug=debug)
1111
+ existing_cols = pipe.get_columns_types(debug=debug) if pipe.enforce else []
1112
+ skip_existing_cols_check = skip_existing_cols_check or not pipe.enforce
1113
1113
  dt_typ = get_pd_type_from_db_type(existing_cols[dt_col]) if dt_col in existing_cols else None
1114
1114
  select_columns = (
1115
1115
  [col for col in existing_cols]
1116
1116
  if not select_columns
1117
- else [col for col in select_columns if col in existing_cols or skip_existing_cols_check]
1117
+ else [col for col in select_columns if skip_existing_cols_check or col in existing_cols]
1118
1118
  )
1119
1119
  if omit_columns:
1120
1120
  select_columns = [col for col in select_columns if col not in omit_columns]
@@ -1201,7 +1201,7 @@ def get_pipe_data_query(
1201
1201
  number=begin_add_minutes,
1202
1202
  begin=begin,
1203
1203
  )
1204
- where += f"{dt} >= {begin_da}" + (" AND " if end is not None else "")
1204
+ where += f"\n {dt} >= {begin_da}" + ("\n AND\n " if end is not None else "")
1205
1205
  is_dt_bound = True
1206
1206
 
1207
1207
  if end is not None and (_dt in existing_cols or skip_existing_cols_check):
@@ -1213,7 +1213,7 @@ def get_pipe_data_query(
1213
1213
  number=end_add_minutes,
1214
1214
  begin=end
1215
1215
  )
1216
- where += f"{dt} < {end_da}"
1216
+ where += f"{dt} < {end_da}"
1217
1217
  is_dt_bound = True
1218
1218
 
1219
1219
  if params is not None:
@@ -1225,7 +1225,7 @@ def get_pipe_data_query(
1225
1225
  }
1226
1226
  if valid_params:
1227
1227
  where += build_where(valid_params, self).replace(
1228
- 'WHERE', ('AND' if is_dt_bound else "")
1228
+ 'WHERE', (' AND' if is_dt_bound else " ")
1229
1229
  )
1230
1230
 
1231
1231
  if len(where) > 0:
@@ -1280,7 +1280,6 @@ def get_pipe_id(
1280
1280
  if pipe.temporary:
1281
1281
  return None
1282
1282
  from meerschaum.utils.packages import attempt_import
1283
- import json
1284
1283
  sqlalchemy = attempt_import('sqlalchemy')
1285
1284
  from meerschaum.connectors.sql.tables import get_tables
1286
1285
  pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes']
@@ -1599,6 +1598,7 @@ def sync_pipe(
1599
1598
  'if_exists': if_exists,
1600
1599
  'debug': debug,
1601
1600
  'as_dict': True,
1601
+ 'safe_copy': kw.get('safe_copy', False),
1602
1602
  'chunksize': chunksize,
1603
1603
  'dtype': self.get_to_sql_dtype(pipe, unseen_df, update_dtypes=True),
1604
1604
  'schema': self.get_pipe_schema(pipe),
@@ -1655,35 +1655,37 @@ def sync_pipe(
1655
1655
  and primary_key in unseen_df.columns
1656
1656
  and autoincrement
1657
1657
  )
1658
- with self.engine.connect() as connection:
1659
- with connection.begin():
1660
- if do_identity_insert:
1661
- identity_on_result = self.exec(
1662
- f"SET IDENTITY_INSERT {pipe_name} ON",
1663
- commit=False,
1664
- _connection=connection,
1665
- close=False,
1666
- debug=debug,
1667
- )
1668
- if identity_on_result is None:
1669
- return False, f"Could not enable identity inserts on {pipe}."
1670
-
1671
- stats = self.to_sql(
1672
- unseen_df,
1673
- _connection=connection,
1674
- **unseen_kw
1675
- )
1658
+ stats = {'success': True, 'msg': 'Success'}
1659
+ if len(unseen_df) > 0:
1660
+ with self.engine.connect() as connection:
1661
+ with connection.begin():
1662
+ if do_identity_insert:
1663
+ identity_on_result = self.exec(
1664
+ f"SET IDENTITY_INSERT {pipe_name} ON",
1665
+ commit=False,
1666
+ _connection=connection,
1667
+ close=False,
1668
+ debug=debug,
1669
+ )
1670
+ if identity_on_result is None:
1671
+ return False, f"Could not enable identity inserts on {pipe}."
1676
1672
 
1677
- if do_identity_insert:
1678
- identity_off_result = self.exec(
1679
- f"SET IDENTITY_INSERT {pipe_name} OFF",
1680
- commit=False,
1673
+ stats = self.to_sql(
1674
+ unseen_df,
1681
1675
  _connection=connection,
1682
- close=False,
1683
- debug=debug,
1676
+ **unseen_kw
1684
1677
  )
1685
- if identity_off_result is None:
1686
- return False, f"Could not disable identity inserts on {pipe}."
1678
+
1679
+ if do_identity_insert:
1680
+ identity_off_result = self.exec(
1681
+ f"SET IDENTITY_INSERT {pipe_name} OFF",
1682
+ commit=False,
1683
+ _connection=connection,
1684
+ close=False,
1685
+ debug=debug,
1686
+ )
1687
+ if identity_off_result is None:
1688
+ return False, f"Could not disable identity inserts on {pipe}."
1687
1689
 
1688
1690
  if is_new:
1689
1691
  if not self.create_indices(pipe, debug=debug):
@@ -1722,11 +1724,12 @@ def sync_pipe(
1722
1724
  },
1723
1725
  target=temp_target,
1724
1726
  temporary=True,
1727
+ enforce=False,
1728
+ static=True,
1729
+ autoincrement=False,
1725
1730
  parameters={
1726
- 'static': True,
1727
- 'schema': self.internal_schema,
1731
+ 'schema': (self.internal_schema if self.flavor != 'mssql' else None),
1728
1732
  'hypertable': False,
1729
- 'autoincrement': False,
1730
1733
  },
1731
1734
  )
1732
1735
  temp_pipe.__dict__['_columns_types'] = {
@@ -1747,7 +1750,7 @@ def sync_pipe(
1747
1750
  col
1748
1751
  for col_key, col in pipe.columns.items()
1749
1752
  if col and col in existing_cols
1750
- ] if not primary_key else (
1753
+ ] if not primary_key or self.flavor == 'oracle' else (
1751
1754
  [dt_col, primary_key]
1752
1755
  if self.flavor == 'timescaledb' and dt_col and dt_col in update_df.columns
1753
1756
  else [primary_key]
@@ -1764,9 +1767,13 @@ def sync_pipe(
1764
1767
  identity_insert=(autoincrement and primary_key in update_df.columns),
1765
1768
  debug=debug,
1766
1769
  )
1767
- update_success = all(
1768
- self.exec_queries(update_queries, break_on_error=True, rollback=True, debug=debug)
1770
+ update_results = self.exec_queries(
1771
+ update_queries,
1772
+ break_on_error=True,
1773
+ rollback=True,
1774
+ debug=debug,
1769
1775
  )
1776
+ update_success = all(update_results)
1770
1777
  self._log_temporary_tables_creation(
1771
1778
  temp_target,
1772
1779
  ready_to_drop=True,
@@ -1775,6 +1782,8 @@ def sync_pipe(
1775
1782
  )
1776
1783
  if not update_success:
1777
1784
  warn(f"Failed to apply update to {pipe}.")
1785
+ stats['success'] = stats['success'] and update_success
1786
+ stats['msg'] = (stats.get('msg', '') + f'\nFailed to apply update to {pipe}.').lstrip()
1778
1787
 
1779
1788
  stop = time.perf_counter()
1780
1789
  success = stats['success']
@@ -1951,8 +1960,8 @@ def sync_pipe_inplace(
1951
1960
  autoincrement=autoincrement,
1952
1961
  datetime_column=dt_col,
1953
1962
  )
1954
- result = self.exec_queries(create_pipe_queries, debug=debug)
1955
- if result is None:
1963
+ results = self.exec_queries(create_pipe_queries, debug=debug)
1964
+ if not all(results):
1956
1965
  _ = clean_up_temp_tables()
1957
1966
  return False, f"Could not insert new data into {pipe} from its SQL query definition."
1958
1967
 
@@ -2109,7 +2118,7 @@ def sync_pipe_inplace(
2109
2118
  and col in backtrack_cols_types
2110
2119
  and col in new_cols
2111
2120
  )
2112
- } if not primary_key else {primary_key: new_cols.get(primary_key)}
2121
+ } if not primary_key or self.flavor == 'oracle' else {primary_key: new_cols.get(primary_key)}
2113
2122
 
2114
2123
  null_replace_new_cols_str = (
2115
2124
  ', '.join([
@@ -3376,9 +3385,7 @@ def deduplicate_pipe(
3376
3385
  """
3377
3386
  from meerschaum.utils.sql import (
3378
3387
  sql_item_name,
3379
- NO_CTE_FLAVORS,
3380
3388
  get_rename_table_queries,
3381
- NO_SELECT_INTO_FLAVORS,
3382
3389
  DROP_IF_EXISTS_FLAVORS,
3383
3390
  get_create_table_query,
3384
3391
  format_cte_subquery,
@@ -3500,7 +3507,6 @@ def deduplicate_pipe(
3500
3507
  dedup_table = '-' + session_id + f'_dedup_{pipe.target}'
3501
3508
  temp_old_table = '-' + session_id + f"_old_{pipe.target}"
3502
3509
 
3503
- dedup_table_name = sql_item_name(dedup_table, self.flavor, self.get_pipe_schema(pipe))
3504
3510
  temp_old_table_name = sql_item_name(temp_old_table, self.flavor, self.get_pipe_schema(pipe))
3505
3511
 
3506
3512
  create_temporary_table_query = get_create_table_query(
@@ -624,7 +624,7 @@ def exec_queries(
624
624
  rollback: bool = True,
625
625
  silent: bool = False,
626
626
  debug: bool = False,
627
- ) -> List[sqlalchemy.engine.cursor.LegacyCursorResult]:
627
+ ) -> List[Union[sqlalchemy.engine.cursor.CursorResult, None]]:
628
628
  """
629
629
  Execute a list of queries in a single transaction.
630
630
 
@@ -688,6 +688,7 @@ def exec_queries(
688
688
  if result is None and break_on_error:
689
689
  if rollback:
690
690
  session.rollback()
691
+ results.append(result)
691
692
  break
692
693
  elif result is not None and hook is not None:
693
694
  hook_queries = hook(session)
@@ -715,6 +716,7 @@ def to_sql(
715
716
  method: str = "",
716
717
  chunksize: Optional[int] = -1,
717
718
  schema: Optional[str] = None,
719
+ safe_copy: bool = True,
718
720
  silent: bool = False,
719
721
  debug: bool = False,
720
722
  as_tuple: bool = False,
@@ -729,7 +731,7 @@ def to_sql(
729
731
  Parameters
730
732
  ----------
731
733
  df: pd.DataFrame
732
- The DataFrame to be uploaded.
734
+ The DataFrame to be inserted.
733
735
 
734
736
  name: str
735
737
  The name of the table to be created.
@@ -752,6 +754,9 @@ def to_sql(
752
754
  Optionally override the schema for the table.
753
755
  Defaults to `SQLConnector.schema`.
754
756
 
757
+ safe_copy: bool, defaul True
758
+ If `True`, copy the dataframe before making any changes.
759
+
755
760
  as_tuple: bool, default False
756
761
  If `True`, return a (success_bool, message) tuple instead of a `bool`.
757
762
  Defaults to `False`.
@@ -770,8 +775,7 @@ def to_sql(
770
775
  """
771
776
  import time
772
777
  import json
773
- import decimal
774
- from decimal import Decimal, Context
778
+ from decimal import Decimal
775
779
  from meerschaum.utils.warnings import error, warn
776
780
  import warnings
777
781
  import functools
@@ -796,9 +800,15 @@ def to_sql(
796
800
  get_uuid_cols,
797
801
  get_bytes_cols,
798
802
  )
799
- from meerschaum.utils.dtypes import are_dtypes_equal, quantize_decimal, coerce_timezone
803
+ from meerschaum.utils.dtypes import (
804
+ are_dtypes_equal,
805
+ quantize_decimal,
806
+ coerce_timezone,
807
+ encode_bytes_for_bytea,
808
+ )
800
809
  from meerschaum.utils.dtypes.sql import (
801
810
  NUMERIC_PRECISION_FLAVORS,
811
+ NUMERIC_AS_TEXT_FLAVORS,
802
812
  PD_TO_SQLALCHEMY_DTYPES_FLAVORS,
803
813
  get_db_type_from_pd_type,
804
814
  )
@@ -808,14 +818,35 @@ def to_sql(
808
818
  pd = import_pandas()
809
819
  is_dask = 'dask' in df.__module__
810
820
 
811
- stats = {'target': name, }
821
+ bytes_cols = get_bytes_cols(df)
822
+ numeric_cols = get_numeric_cols(df)
823
+
824
+ stats = {'target': name,}
812
825
  ### resort to defaults if None
826
+ copied = False
827
+ use_psql_copy = False
813
828
  if method == "":
814
829
  if self.flavor in _bulk_flavors:
815
830
  method = functools.partial(psql_insert_copy, schema=self.schema)
831
+ use_psql_copy = True
816
832
  else:
817
833
  ### Should resolve to 'multi' or `None`.
818
834
  method = flavor_configs.get(self.flavor, {}).get('to_sql', {}).get('method', 'multi')
835
+
836
+ if bytes_cols and (use_psql_copy or self.flavor == 'oracle'):
837
+ if safe_copy and not copied:
838
+ df = df.copy()
839
+ copied = True
840
+ for col in bytes_cols:
841
+ df[col] = df[col].apply(encode_bytes_for_bytea, with_prefix=(self.flavor != 'oracle'))
842
+
843
+ if self.flavor in NUMERIC_AS_TEXT_FLAVORS:
844
+ if safe_copy and not copied:
845
+ df = df.copy()
846
+ copied = True
847
+ for col in numeric_cols:
848
+ df[col] = df[col].astype(str)
849
+
819
850
  stats['method'] = method.__name__ if hasattr(method, '__name__') else str(method)
820
851
 
821
852
  default_chunksize = self._sys_config.get('chunksize', None)
@@ -925,7 +956,6 @@ def to_sql(
925
956
  ### Check for numeric columns.
926
957
  numeric_scale, numeric_precision = NUMERIC_PRECISION_FLAVORS.get(self.flavor, (None, None))
927
958
  if numeric_precision is not None and numeric_scale is not None:
928
- numeric_cols = get_numeric_cols(df)
929
959
  for col in numeric_cols:
930
960
  df[col] = df[col].apply(
931
961
  lambda x: (
@@ -200,10 +200,15 @@ def dtypes(self) -> Union[Dict[str, Any], None]:
200
200
  If defined, return the `dtypes` dictionary defined in `meerschaum.Pipe.parameters`.
201
201
  """
202
202
  from meerschaum.config._patch import apply_patch_to_config
203
+ from meerschaum.utils.dtypes import MRSM_ALIAS_DTYPES
203
204
  configured_dtypes = self.parameters.get('dtypes', {})
204
205
  remote_dtypes = self.infer_dtypes(persist=False)
205
206
  patched_dtypes = apply_patch_to_config(remote_dtypes, configured_dtypes)
206
- return patched_dtypes
207
+ return {
208
+ col: MRSM_ALIAS_DTYPES.get(typ, typ)
209
+ for col, typ in patched_dtypes.items()
210
+ if col and typ
211
+ }
207
212
 
208
213
 
209
214
  @dtypes.setter
@@ -15,6 +15,7 @@ from typing import TYPE_CHECKING
15
15
  if TYPE_CHECKING:
16
16
  pd = mrsm.attempt_import('pandas')
17
17
 
18
+
18
19
  def enforce_dtypes(
19
20
  self,
20
21
  df: 'pd.DataFrame',
@@ -30,7 +31,7 @@ def enforce_dtypes(
30
31
  from meerschaum.utils.warnings import warn
31
32
  from meerschaum.utils.debug import dprint
32
33
  from meerschaum.utils.dataframe import parse_df_datetimes, enforce_dtypes as _enforce_dtypes
33
- from meerschaum.utils.dtypes import are_dtypes_equal
34
+ from meerschaum.utils.dtypes import are_dtypes_equal, MRSM_PD_DTYPES
34
35
  from meerschaum.utils.packages import import_pandas
35
36
  pd = import_pandas(debug=debug)
36
37
  if df is None:
@@ -41,7 +42,11 @@ def enforce_dtypes(
41
42
  )
42
43
  return df
43
44
 
44
- pipe_dtypes = self.dtypes if self.enforce else {}
45
+ pipe_dtypes = self.dtypes if self.enforce else {
46
+ col: typ
47
+ for col, typ in self.dtypes.items()
48
+ if typ in MRSM_PD_DTYPES
49
+ }
45
50
 
46
51
  try:
47
52
  if isinstance(df, str):
@@ -105,22 +110,16 @@ def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str,
105
110
  A dictionary of strings containing the pandas data types for this Pipe.
106
111
  """
107
112
  if not self.exists(debug=debug):
108
- dtypes = {}
109
- if not self.columns:
110
- return {}
111
- dt_col = self.columns.get('datetime', None)
112
- if dt_col:
113
- if not self.parameters.get('dtypes', {}).get(dt_col, None):
114
- dtypes[dt_col] = 'datetime64[ns, UTC]'
115
- return dtypes
113
+ return {}
116
114
 
117
115
  from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
118
116
  from meerschaum.utils.dtypes import to_pandas_dtype
119
- columns_types = self.get_columns_types(debug=debug)
120
117
 
121
118
  ### NOTE: get_columns_types() may return either the types as
122
119
  ### PostgreSQL- or Pandas-style.
123
- dtypes = {
120
+ columns_types = self.get_columns_types(debug=debug)
121
+
122
+ remote_pd_dtypes = {
124
123
  c: (
125
124
  get_pd_type_from_db_type(t, allow_custom_dtypes=True)
126
125
  if str(t).isupper()
@@ -128,7 +127,15 @@ def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str,
128
127
  )
129
128
  for c, t in columns_types.items()
130
129
  } if columns_types else {}
131
- if persist:
132
- self.dtypes = dtypes
133
- self.edit(interactive=False, debug=debug)
134
- return dtypes
130
+ if not persist:
131
+ return remote_pd_dtypes
132
+
133
+ dtypes = self.parameters.get('dtypes', {})
134
+ dtypes.update({
135
+ col: typ
136
+ for col, typ in remote_pd_dtypes.items()
137
+ if col not in dtypes
138
+ })
139
+ self.dtypes = dtypes
140
+ self.edit(interactive=False, debug=debug)
141
+ return remote_pd_dtypes
@@ -161,7 +161,7 @@ def sync(
161
161
  self._exists = None
162
162
 
163
163
  def _sync(
164
- p: 'meerschaum.Pipe',
164
+ p: mrsm.Pipe,
165
165
  df: Union[
166
166
  'pd.DataFrame',
167
167
  Dict[str, List[Any]],
@@ -960,10 +960,7 @@ def _persist_new_numeric_columns(self, df, debug: bool = False) -> SuccessTuple:
960
960
  return True, "Success"
961
961
 
962
962
  self._attributes_sync_time = None
963
- dt_col = self.columns.get('datetime', None)
964
963
  dtypes = self.parameters.get('dtypes', {})
965
- if dt_col not in dtypes:
966
- dtypes[dt_col] = 'datetime'
967
964
  dtypes.update({col: 'numeric' for col in numeric_cols})
968
965
  self.parameters['dtypes'] = dtypes
969
966
  if not self.temporary:
@@ -988,10 +985,7 @@ def _persist_new_uuid_columns(self, df, debug: bool = False) -> SuccessTuple:
988
985
  return True, "Success"
989
986
 
990
987
  self._attributes_sync_time = None
991
- dt_col = self.columns.get('datetime', None)
992
988
  dtypes = self.parameters.get('dtypes', {})
993
- if dt_col not in dtypes:
994
- dtypes[dt_col] = 'datetime'
995
989
  dtypes.update({col: 'uuid' for col in uuid_cols})
996
990
  self.parameters['dtypes'] = dtypes
997
991
  if not self.temporary:
@@ -1016,10 +1010,7 @@ def _persist_new_json_columns(self, df, debug: bool = False) -> SuccessTuple:
1016
1010
  return True, "Success"
1017
1011
 
1018
1012
  self._attributes_sync_time = None
1019
- dt_col = self.columns.get('datetime', None)
1020
1013
  dtypes = self.parameters.get('dtypes', {})
1021
- if dt_col not in dtypes:
1022
- dtypes[dt_col] = 'datetime'
1023
1014
  dtypes.update({col: 'json' for col in json_cols})
1024
1015
  self.parameters['dtypes'] = dtypes
1025
1016
 
@@ -1045,10 +1036,7 @@ def _persist_new_bytes_columns(self, df, debug: bool = False) -> SuccessTuple:
1045
1036
  return True, "Success"
1046
1037
 
1047
1038
  self._attributes_sync_time = None
1048
- dt_col = self.columns.get('datetime', None)
1049
1039
  dtypes = self.parameters.get('dtypes', {})
1050
- if dt_col not in dtypes:
1051
- dtypes[dt_col] = 'datetime'
1052
1040
  dtypes.update({col: 'bytes' for col in bytes_cols})
1053
1041
  self.parameters['dtypes'] = dtypes
1054
1042
 
meerschaum/jobs/_Job.py CHANGED
@@ -200,6 +200,8 @@ class Job:
200
200
  if root_dir is None:
201
201
  from meerschaum.config.paths import ROOT_DIR_PATH
202
202
  root_dir = ROOT_DIR_PATH
203
+ else:
204
+ root_dir = pathlib.Path(root_dir)
203
205
  jobs_dir = root_dir / DAEMON_RESOURCES_PATH.name
204
206
  daemon_dir = jobs_dir / daemon_id
205
207
  pid_file = daemon_dir / 'process.pid'
@@ -472,7 +472,7 @@ class Daemon:
472
472
  process.kill()
473
473
  process.wait(timeout=timeout)
474
474
  except Exception as e:
475
- return False, f"Failed to kill job {self} with exception: {e}"
475
+ return False, f"Failed to kill job {self} ({process}) with exception: {e}"
476
476
 
477
477
  try:
478
478
  if process.status():
@@ -734,7 +734,7 @@ class Daemon:
734
734
  time.sleep(check_timeout_interval)
735
735
 
736
736
  return False, (
737
- f"Failed to stop daemon '{self.daemon_id}' within {timeout} second"
737
+ f"Failed to stop daemon '{self.daemon_id}' (PID: {pid}) within {timeout} second"
738
738
  + ('s' if timeout != 1 else '') + '.'
739
739
  )
740
740
 
@@ -494,7 +494,7 @@ def parse_df_datetimes(
494
494
  ### skip parsing if DataFrame is empty
495
495
  if len(pdf) == 0:
496
496
  if debug:
497
- dprint(f"df is empty. Returning original DataFrame without casting datetime columns...")
497
+ dprint("df is empty. Returning original DataFrame without casting datetime columns...")
498
498
  return df
499
499
 
500
500
  ignore_cols = set(
@@ -509,7 +509,7 @@ def parse_df_datetimes(
509
509
  if len(cols_to_inspect) == 0:
510
510
  if debug:
511
511
  dprint("All columns are ignored, skipping datetime detection...")
512
- return df.fillna(pandas.NA)
512
+ return df.infer_objects(copy=False).fillna(pandas.NA)
513
513
 
514
514
  ### apply regex to columns to determine which are ISO datetimes
515
515
  iso_dt_regex = r'\d{4}-\d{2}-\d{2}.\d{2}\:\d{2}\:\d+'
@@ -522,7 +522,7 @@ def parse_df_datetimes(
522
522
  if not datetime_cols:
523
523
  if debug:
524
524
  dprint("No columns detected as datetimes, returning...")
525
- return df.fillna(pandas.NA)
525
+ return df.infer_objects(copy=False).fillna(pandas.NA)
526
526
 
527
527
  if debug:
528
528
  dprint("Converting columns to datetimes: " + str(datetime_cols))