meerschaum 2.1.6__py3-none-any.whl → 2.2.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. meerschaum/_internal/arguments/_parser.py +3 -0
  2. meerschaum/_internal/entry.py +2 -1
  3. meerschaum/_internal/shell/Shell.py +1 -6
  4. meerschaum/actions/api.py +1 -1
  5. meerschaum/actions/install.py +7 -3
  6. meerschaum/actions/sync.py +7 -3
  7. meerschaum/api/dash/callbacks/dashboard.py +77 -8
  8. meerschaum/api/dash/callbacks/jobs.py +55 -3
  9. meerschaum/api/dash/jobs.py +34 -8
  10. meerschaum/api/dash/pipes.py +79 -11
  11. meerschaum/api/resources/static/js/xterm.js +1 -1
  12. meerschaum/config/_shell.py +0 -1
  13. meerschaum/config/_version.py +1 -1
  14. meerschaum/connectors/api/_plugins.py +2 -1
  15. meerschaum/connectors/sql/_create_engine.py +5 -5
  16. meerschaum/connectors/sql/_fetch.py +8 -11
  17. meerschaum/connectors/sql/_pipes.py +7 -1
  18. meerschaum/core/Pipe/_dtypes.py +2 -1
  19. meerschaum/core/Pipe/_sync.py +26 -13
  20. meerschaum/plugins/_Plugin.py +11 -2
  21. meerschaum/utils/daemon/Daemon.py +11 -3
  22. meerschaum/utils/dataframe.py +183 -8
  23. meerschaum/utils/dtypes/__init__.py +9 -5
  24. meerschaum/utils/formatting/_pipes.py +44 -10
  25. meerschaum/utils/misc.py +34 -2
  26. meerschaum/utils/packages/__init__.py +4 -3
  27. meerschaum/utils/packages/_packages.py +1 -1
  28. meerschaum/utils/typing.py +1 -1
  29. {meerschaum-2.1.6.dist-info → meerschaum-2.2.0.dev1.dist-info}/METADATA +5 -5
  30. {meerschaum-2.1.6.dist-info → meerschaum-2.2.0.dev1.dist-info}/RECORD +36 -36
  31. {meerschaum-2.1.6.dist-info → meerschaum-2.2.0.dev1.dist-info}/LICENSE +0 -0
  32. {meerschaum-2.1.6.dist-info → meerschaum-2.2.0.dev1.dist-info}/NOTICE +0 -0
  33. {meerschaum-2.1.6.dist-info → meerschaum-2.2.0.dev1.dist-info}/WHEEL +0 -0
  34. {meerschaum-2.1.6.dist-info → meerschaum-2.2.0.dev1.dist-info}/entry_points.txt +0 -0
  35. {meerschaum-2.1.6.dist-info → meerschaum-2.2.0.dev1.dist-info}/top_level.txt +0 -0
  36. {meerschaum-2.1.6.dist-info → meerschaum-2.2.0.dev1.dist-info}/zip-safe +0 -0
@@ -126,7 +126,6 @@ default_shell_config = {
126
126
  'timeout' : 60,
127
127
  'max_history' : 1000,
128
128
  'clear_screen' : True,
129
- 'cmd' : default_cmd,
130
129
  'bottom_toolbar' : {
131
130
  'enabled' : True,
132
131
  },
@@ -2,4 +2,4 @@
2
2
  Specify the Meerschaum release version.
3
3
  """
4
4
 
5
- __version__ = "2.1.6"
5
+ __version__ = "2.2.0.dev1"
@@ -49,6 +49,7 @@ def register_plugin(
49
49
  def install_plugin(
50
50
  self,
51
51
  name: str,
52
+ skip_deps: bool = False,
52
53
  force: bool = False,
53
54
  debug: bool = False
54
55
  ) -> SuccessTuple:
@@ -78,7 +79,7 @@ def install_plugin(
78
79
  success, msg = False, fail_msg
79
80
  return success, msg
80
81
  plugin = Plugin(name, archive_path=archive_path, repo_connector=self)
81
- return plugin.install(force=force, debug=debug)
82
+ return plugin.install(skip_deps=skip_deps, force=force, debug=debug)
82
83
 
83
84
  def get_plugins(
84
85
  self,
@@ -154,10 +154,10 @@ install_flavor_drivers = {
154
154
  'duckdb': ['duckdb', 'duckdb_engine'],
155
155
  'mysql': ['pymysql'],
156
156
  'mariadb': ['pymysql'],
157
- 'timescaledb': ['psycopg2'],
158
- 'postgresql': ['psycopg2'],
159
- 'citus': ['psycopg2'],
160
- 'cockroachdb': ['psycopg2', 'sqlalchemy_cockroachdb', 'sqlalchemy_cockroachdb.psycopg2'],
157
+ 'timescaledb': ['psycopg'],
158
+ 'postgresql': ['psycopg'],
159
+ 'citus': ['psycopg'],
160
+ 'cockroachdb': ['psycopg', 'sqlalchemy_cockroachdb', 'sqlalchemy_cockroachdb.psycopg'],
161
161
  'mssql': ['pyodbc'],
162
162
  'oracle': ['cx_Oracle'],
163
163
  }
@@ -165,7 +165,7 @@ require_patching_flavors = {'cockroachdb': [('sqlalchemy-cockroachdb', 'sqlalche
165
165
 
166
166
  flavor_dialects = {
167
167
  'cockroachdb': (
168
- 'cockroachdb', 'sqlalchemy_cockroachdb.psycopg2', 'CockroachDBDialect_psycopg2'
168
+ 'cockroachdb', 'sqlalchemy_cockroachdb.psycopg', 'CockroachDBDialect_psycopg'
169
169
  ),
170
170
  'duckdb': ('duckdb', 'duckdb_engine', 'Dialect'),
171
171
  }
@@ -174,9 +174,6 @@ def get_pipe_metadef(
174
174
  )
175
175
 
176
176
 
177
- if 'order by' in definition.lower() and 'over' not in definition.lower():
178
- error("Cannot fetch with an ORDER clause in the definition")
179
-
180
177
  apply_backtrack = begin == '' and check_existing
181
178
  backtrack_interval = pipe.get_backtrack_interval(check_existing=check_existing, debug=debug)
182
179
  btm = (
@@ -308,9 +305,9 @@ def _simple_fetch_query(pipe, debug: bool=False, **kw) -> str:
308
305
  def_name = 'definition'
309
306
  definition = get_pipe_query(pipe)
310
307
  return (
311
- f"WITH {def_name} AS ({definition}) SELECT * FROM {def_name}"
308
+ f"WITH {def_name} AS (\n{definition}\n) SELECT * FROM {def_name}"
312
309
  if pipe.connector.flavor not in ('mysql', 'mariadb')
313
- else f"SELECT * FROM ({definition}) AS {def_name}"
310
+ else f"SELECT * FROM (\n{definition}\n) AS {def_name}"
314
311
  )
315
312
 
316
313
  def _join_fetch_query(
@@ -363,10 +360,10 @@ def _join_fetch_query(
363
360
  )
364
361
  + f") AS {id_remote_name}, "
365
362
  + dateadd_str(
366
- flavor=pipe.connector.flavor,
367
- begin=_st,
368
- datepart='minute',
369
- number=pipe.parameters.get('fetch', {}).get('backtrack_minutes', 0)
363
+ flavor = pipe.connector.flavor,
364
+ begin = _st,
365
+ datepart = 'minute',
366
+ number = pipe.parameters.get('fetch', {}).get('backtrack_minutes', 0)
370
367
  ) + " AS " + dt_remote_name + "\nUNION ALL\n"
371
368
  )
372
369
  _sync_times_q = _sync_times_q[:(-1 * len('UNION ALL\n'))] + ")"
@@ -374,13 +371,13 @@ def _join_fetch_query(
374
371
  definition = get_pipe_query(pipe)
375
372
  query = (
376
373
  f"""
377
- WITH definition AS ({definition}){_sync_times_q}
374
+ WITH definition AS (\n{definition}\n){_sync_times_q}
378
375
  SELECT definition.*
379
376
  FROM definition"""
380
377
  if pipe.connector.flavor not in ('mysql', 'mariadb')
381
378
  else (
382
379
  f"""
383
- SELECT * FROM ({definition}) AS definition"""
380
+ SELECT * FROM (\n{definition}\n) AS definition"""
384
381
  )
385
382
  ) + f"""
386
383
  LEFT OUTER JOIN {sync_times_remote_name} AS st
@@ -1182,7 +1182,12 @@ def sync_pipe(
1182
1182
  dprint("Fetched data:\n" + str(df))
1183
1183
 
1184
1184
  if not isinstance(df, pd.DataFrame):
1185
- df = pipe.enforce_dtypes(df, chunksize=chunksize, debug=debug)
1185
+ df = pipe.enforce_dtypes(
1186
+ df,
1187
+ chunksize = chunksize,
1188
+ safe_copy = kw.get('safe_copy', False),
1189
+ debug = debug,
1190
+ )
1186
1191
 
1187
1192
  ### if table does not exist, create it with indices
1188
1193
  is_new = False
@@ -1226,6 +1231,7 @@ def sync_pipe(
1226
1231
  upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in update_queries
1227
1232
  if upsert:
1228
1233
  check_existing = False
1234
+ kw['safe_copy'] = kw.get('safe_copy', False)
1229
1235
 
1230
1236
  unseen_df, update_df, delta_df = (
1231
1237
  pipe.filter_existing(
@@ -14,6 +14,7 @@ def enforce_dtypes(
14
14
  self,
15
15
  df: 'pd.DataFrame',
16
16
  chunksize: Optional[int] = -1,
17
+ safe_copy: bool = True,
17
18
  debug: bool = False,
18
19
  ) -> 'pd.DataFrame':
19
20
  """
@@ -71,7 +72,7 @@ def enforce_dtypes(
71
72
  )
72
73
  return df
73
74
 
74
- return _enforce_dtypes(df, pipe_dtypes, debug=debug)
75
+ return _enforce_dtypes(df, pipe_dtypes, safe_copy=safe_copy, debug=debug)
75
76
 
76
77
 
77
78
  def infer_dtypes(self, persist: bool=False, debug: bool=False) -> Dict[str, Any]:
@@ -12,6 +12,7 @@ import json
12
12
  import time
13
13
  import threading
14
14
  import multiprocessing
15
+ import functools
15
16
  from datetime import datetime, timedelta
16
17
 
17
18
  from meerschaum.utils.typing import (
@@ -518,6 +519,8 @@ def exists(
518
519
  def filter_existing(
519
520
  self,
520
521
  df: 'pd.DataFrame',
522
+ safe_copy: bool = True,
523
+ date_bound_only: bool = False,
521
524
  chunksize: Optional[int] = -1,
522
525
  debug: bool = False,
523
526
  **kw
@@ -530,6 +533,14 @@ def filter_existing(
530
533
  df: 'pd.DataFrame'
531
534
  The dataframe to inspect and filter.
532
535
 
536
+ safe_copy: bool, default True
537
+ If `True`, create a copy before comparing and modifying the dataframes.
538
+ Setting to `False` may mutate the DataFrames.
539
+ See `meerschaum.utils.dataframe.filter_unseen_df`.
540
+
541
+ date_bound_only: bool, default False
542
+ If `True`, only use the datetime index to fetch the sample dataframe.
543
+
533
544
  chunksize: Optional[int], default -1
534
545
  The `chunksize` used when fetching existing data.
535
546
 
@@ -567,7 +578,8 @@ def filter_existing(
567
578
  else:
568
579
  merge = pd.merge
569
580
  NA = pd.NA
570
-
581
+ if df is None:
582
+ return df, df, df
571
583
  if (df.empty if not is_dask else len(df) == 0):
572
584
  return df, df, df
573
585
 
@@ -617,7 +629,7 @@ def filter_existing(
617
629
  traceback.print_exc()
618
630
  max_dt = None
619
631
 
620
- if not ('datetime' in str(type(max_dt))) or str(min_dt) == 'NaT':
632
+ if ('datetime' not in str(type(max_dt))) or str(min_dt) == 'NaT':
621
633
  if 'int' not in str(type(max_dt)).lower():
622
634
  max_dt = None
623
635
 
@@ -645,7 +657,7 @@ def filter_existing(
645
657
  col: df[col].unique()
646
658
  for col in self.columns
647
659
  if col in df.columns and col != dt_col
648
- }
660
+ } if not date_bound_only else {}
649
661
  filter_params_index_limit = get_config('pipes', 'sync', 'filter_params_index_limit')
650
662
  _ = kw.pop('params', None)
651
663
  params = {
@@ -655,7 +667,7 @@ def filter_existing(
655
667
  ]
656
668
  for col, unique_vals in unique_index_vals.items()
657
669
  if len(unique_vals) <= filter_params_index_limit
658
- }
670
+ } if not date_bound_only else {}
659
671
 
660
672
  if debug:
661
673
  dprint(f"Looking at data between '{begin}' and '{end}':", **kw)
@@ -698,18 +710,23 @@ def filter_existing(
698
710
  col: to_pandas_dtype(typ)
699
711
  for col, typ in self_dtypes.items()
700
712
  },
713
+ safe_copy = safe_copy,
701
714
  debug = debug
702
715
  ),
703
716
  on_cols_dtypes,
704
717
  )
705
718
 
706
719
  ### Cast dicts or lists to strings so we can merge.
720
+ serializer = functools.partial(json.dumps, sort_keys=True, separators=(',', ':'), default=str)
721
+ def deserializer(x):
722
+ return json.loads(x) if isinstance(x, str) else x
723
+
707
724
  unhashable_delta_cols = get_unhashable_cols(delta_df)
708
725
  unhashable_backtrack_cols = get_unhashable_cols(backtrack_df)
709
726
  for col in unhashable_delta_cols:
710
- delta_df[col] = delta_df[col].apply(json.dumps)
727
+ delta_df[col] = delta_df[col].apply(serializer)
711
728
  for col in unhashable_backtrack_cols:
712
- backtrack_df[col] = backtrack_df[col].apply(json.dumps)
729
+ backtrack_df[col] = backtrack_df[col].apply(serializer)
713
730
  casted_cols = set(unhashable_delta_cols + unhashable_backtrack_cols)
714
731
 
715
732
  joined_df = merge(
@@ -722,13 +739,9 @@ def filter_existing(
722
739
  ) if on_cols else delta_df
723
740
  for col in casted_cols:
724
741
  if col in joined_df.columns:
725
- joined_df[col] = joined_df[col].apply(
726
- lambda x: (
727
- json.loads(x)
728
- if isinstance(x, str)
729
- else x
730
- )
731
- )
742
+ joined_df[col] = joined_df[col].apply(deserializer)
743
+ if col in delta_df.columns:
744
+ delta_df[col] = delta_df[col].apply(deserializer)
732
745
 
733
746
  ### Determine which rows are completely new.
734
747
  new_rows_mask = (joined_df['_merge'] == 'left_only') if on_cols else None
@@ -252,6 +252,7 @@ class Plugin:
252
252
 
253
253
  def install(
254
254
  self,
255
+ skip_deps: bool = False,
255
256
  force: bool = False,
256
257
  debug: bool = False,
257
258
  ) -> SuccessTuple:
@@ -263,6 +264,9 @@ class Plugin:
263
264
 
264
265
  Parameters
265
266
  ----------
267
+ skip_deps: bool, default False
268
+ If `True`, do not install dependencies.
269
+
266
270
  force: bool, default False
267
271
  If `True`, continue with installation, even if required packages fail to install.
268
272
 
@@ -366,7 +370,11 @@ class Plugin:
366
370
  plugin_installation_dir_path = path
367
371
  break
368
372
 
369
- success_msg = f"Successfully installed plugin '{self}'."
373
+ success_msg = (
374
+ f"Successfully installed plugin '{self}'"
375
+ + ("\n (skipped dependencies)" if skip_deps else "")
376
+ + "."
377
+ )
370
378
  success, abort = None, None
371
379
 
372
380
  if is_same_version and not force:
@@ -423,7 +431,8 @@ class Plugin:
423
431
  return success, msg
424
432
 
425
433
  ### attempt to install dependencies
426
- if not self.install_dependencies(force=force, debug=debug):
434
+ dependencies_installed = skip_deps or self.install_dependencies(force=force, debug=debug)
435
+ if not dependencies_installed:
427
436
  _ongoing_installations.remove(self.full_name)
428
437
  return False, f"Failed to install dependencies for plugin '{self}'."
429
438
 
@@ -865,21 +865,29 @@ class Daemon:
865
865
  error(_write_pickle_success_tuple[1])
866
866
 
867
867
 
868
- def cleanup(self, keep_logs: bool = False) -> None:
869
- """Remove a daemon's directory after execution.
868
+ def cleanup(self, keep_logs: bool = False) -> SuccessTuple:
869
+ """
870
+ Remove a daemon's directory after execution.
870
871
 
871
872
  Parameters
872
873
  ----------
873
874
  keep_logs: bool, default False
874
875
  If `True`, skip deleting the daemon's log files.
876
+
877
+ Returns
878
+ -------
879
+ A `SuccessTuple` indicating success.
875
880
  """
876
881
  if self.path.exists():
877
882
  try:
878
883
  shutil.rmtree(self.path)
879
884
  except Exception as e:
880
- warn(e)
885
+ msg = f"Failed to clean up '{self.daemon_id}':\n{e}"
886
+ warn(msg)
887
+ return False, msg
881
888
  if not keep_logs:
882
889
  self.rotating_log.delete()
890
+ return True, "Success"
883
891
 
884
892
 
885
893
  def get_timeout_seconds(self, timeout: Union[int, float, None] = None) -> Union[int, float]:
@@ -7,9 +7,10 @@ Utility functions for working with DataFrames.
7
7
  """
8
8
 
9
9
  from __future__ import annotations
10
+ from datetime import datetime
10
11
  from meerschaum.utils.typing import (
11
12
  Optional, Dict, Any, List, Hashable, Generator,
12
- Iterator, Iterable, Union,
13
+ Iterator, Iterable, Union, Tuple,
13
14
  )
14
15
 
15
16
 
@@ -71,6 +72,7 @@ def add_missing_cols_to_df(df: 'pd.DataFrame', dtypes: Dict[str, Any]) -> pd.Dat
71
72
  def filter_unseen_df(
72
73
  old_df: 'pd.DataFrame',
73
74
  new_df: 'pd.DataFrame',
75
+ safe_copy: bool = True,
74
76
  dtypes: Optional[Dict[str, Any]] = None,
75
77
  debug: bool = False,
76
78
  ) -> 'pd.DataFrame':
@@ -84,6 +86,10 @@ def filter_unseen_df(
84
86
 
85
87
  new_df: 'pd.DataFrame'
86
88
  The fetched (source) dataframe. Rows that are contained in `old_df` are removed.
89
+
90
+ safe_copy: bool, default True
91
+ If `True`, create a copy before comparing and modifying the dataframes.
92
+ Setting to `False` may mutate the DataFrames.
87
93
 
88
94
  dtypes: Optional[Dict[str, Any]], default None
89
95
  Optionally specify the datatypes of the dataframe.
@@ -111,6 +117,10 @@ def filter_unseen_df(
111
117
  if old_df is None:
112
118
  return new_df
113
119
 
120
+ if safe_copy:
121
+ old_df = old_df.copy()
122
+ new_df = new_df.copy()
123
+
114
124
  import json
115
125
  import functools
116
126
  import traceback
@@ -118,6 +128,7 @@ def filter_unseen_df(
118
128
  from meerschaum.utils.warnings import warn
119
129
  from meerschaum.utils.packages import import_pandas, attempt_import
120
130
  from meerschaum.utils.dtypes import to_pandas_dtype, are_dtypes_equal, attempt_cast_to_numeric
131
+ from meerschaum.utils.debug import dprint
121
132
  pd = import_pandas(debug=debug)
122
133
  is_dask = 'dask' in new_df.__module__
123
134
  if is_dask:
@@ -243,12 +254,7 @@ def filter_unseen_df(
243
254
  indicator = True,
244
255
  )
245
256
  changed_rows_mask = (joined_df['_merge'] == 'left_only')
246
-
247
- delta_df = joined_df[
248
- list(new_df_dtypes.keys())
249
- ][
250
- changed_rows_mask
251
- ].reset_index(drop=True)
257
+ delta_df = joined_df[list(new_df_dtypes.keys())][changed_rows_mask].reset_index(drop=True)
252
258
 
253
259
  for json_col in json_cols:
254
260
  if json_col not in delta_df.columns:
@@ -535,6 +541,8 @@ def get_numeric_cols(df: 'pd.DataFrame') -> List[str]:
535
541
  def enforce_dtypes(
536
542
  df: 'pd.DataFrame',
537
543
  dtypes: Dict[str, str],
544
+ safe_copy: bool = True,
545
+ coerce_numeric: bool = True,
538
546
  debug: bool = False,
539
547
  ) -> 'pd.DataFrame':
540
548
  """
@@ -548,6 +556,14 @@ def enforce_dtypes(
548
556
  dtypes: Dict[str, str]
549
557
  The data types to attempt to enforce on the DataFrame.
550
558
 
559
+ safe_copy: bool, default True
560
+ If `True`, create a copy before comparing and modifying the dataframes.
561
+ Setting to `False` may mutate the DataFrames.
562
+ See `meerschaum.utils.dataframe.filter_unseen_df`.
563
+
564
+ coerce_numeric: bool, default True
565
+ If `True`, convert float and int collisions to numeric.
566
+
551
567
  debug: bool, default False
552
568
  Verbosity toggle.
553
569
 
@@ -569,6 +585,8 @@ def enforce_dtypes(
569
585
  is_dtype_numeric,
570
586
  attempt_cast_to_numeric,
571
587
  )
588
+ if safe_copy:
589
+ df = df.copy()
572
590
  df_dtypes = {c: str(t) for c, t in df.dtypes.items()}
573
591
  if len(df_dtypes) == 0:
574
592
  if debug:
@@ -674,7 +692,7 @@ def enforce_dtypes(
674
692
  explicitly_numeric
675
693
  or col in df_numeric_cols
676
694
  or (mixed_numeric_types and not explicitly_float)
677
- )
695
+ ) and coerce_numeric
678
696
  if cast_to_numeric:
679
697
  common_dtypes[col] = attempt_cast_to_numeric
680
698
  common_diff_dtypes[col] = attempt_cast_to_numeric
@@ -860,3 +878,160 @@ def get_first_valid_dask_partition(ddf: 'dask.dataframe.DataFrame') -> Union['pd
860
878
  if len(pdf) > 0:
861
879
  return pdf
862
880
  return ddf.compute()
881
+
882
+
883
+ def query_df(
884
+ df: 'pd.DataFrame',
885
+ params: Optional[Dict[str, Any]] = None,
886
+ begin: Union[datetime, int, None] = None,
887
+ end: Union[datetime, int, None] = None,
888
+ datetime_column: Optional[str] = None,
889
+ select_columns: Optional[List[str]] = None,
890
+ omit_columns: Optional[List[str]] = None,
891
+ inplace: bool = False,
892
+ reset_index: bool = False,
893
+ debug: bool = False,
894
+ ) -> 'pd.DataFrame':
895
+ """
896
+ Query the dataframe with the params dictionary.
897
+
898
+ Parameters
899
+ ----------
900
+ df: pd.DataFrame
901
+ The DataFrame to query against.
902
+
903
+ params: Optional[Dict[str, Any]], default None
904
+ The parameters dictionary to use for the query.
905
+
906
+ begin: Union[datetime, int, None], default None
907
+ If `begin` and `datetime_column` are provided, only return rows with a timestamp
908
+ greater than or equal to this value.
909
+
910
+ end: Union[datetime, int, None], default None
911
+ If `begin` and `datetime_column` are provided, only return rows with a timestamp
912
+ less than this value.
913
+
914
+ datetime_column: Optional[str], default None
915
+ A `datetime_column` must be provided to use `begin` and `end`.
916
+
917
+ select_columns: Optional[List[str]], default None
918
+ If provided, only return these columns.
919
+
920
+ omit_columns: Optional[List[str]], default None
921
+ If provided, do not include these columns in the result.
922
+
923
+ inplace: bool, default False
924
+ If `True`, modify the DataFrame inplace rather than creating a new DataFrame.
925
+
926
+ reset_index: bool, default True
927
+ If `True`, reset the index in the resulting DataFrame.
928
+
929
+ Returns
930
+ -------
931
+ A Pandas DataFrame query result.
932
+ """
933
+ if not params and not begin and not end:
934
+ return df
935
+
936
+ import json
937
+ import meerschaum as mrsm
938
+ from meerschaum.utils.debug import dprint
939
+ from meerschaum.utils.misc import get_in_ex_params
940
+ from meerschaum.utils.warnings import warn
941
+
942
+ dtypes = {col: str(typ) for col, typ in df.dtypes.items()}
943
+
944
+ if begin or end:
945
+ if not datetime_column or datetime_column not in df.columns:
946
+ warn(
947
+ f"The datetime column '{datetime_column}' is not present in the Dataframe, "
948
+ + "ignoring begin and end...",
949
+ )
950
+ begin, end = None, None
951
+
952
+ if debug:
953
+ dprint(f"Querying dataframe:\n{params=} {begin=} {end=} {datetime_column=}")
954
+
955
+ in_ex_params = get_in_ex_params(params)
956
+
957
+ def serialize(x: Any) -> str:
958
+ if isinstance(x, (dict, list, tuple)):
959
+ return json.dumps(x, sort_keys=True, separators=(',', ':'), default=str)
960
+ if hasattr(x, 'isoformat'):
961
+ return x.isoformat()
962
+ return str(x)
963
+
964
+ masks = [
965
+ (
966
+ (df[datetime_column] >= begin)
967
+ if begin is not None and datetime_column
968
+ else True
969
+ ) & (
970
+ (df[datetime_column] < end)
971
+ if end is not None and datetime_column
972
+ else True
973
+ )
974
+ ]
975
+
976
+ masks.extend([
977
+ (
978
+ (
979
+ df[col].apply(serialize).isin(
980
+ [
981
+ serialize(_in_val)
982
+ for _in_val in in_vals
983
+ ]
984
+ ) if in_vals else True
985
+ ) & (
986
+ ~df[col].apply(serialize).isin(
987
+ [
988
+ serialize(_ex_val)
989
+ for _ex_val in ex_vals
990
+ ]
991
+ ) if ex_vals else True
992
+ )
993
+ )
994
+ for col, (in_vals, ex_vals) in in_ex_params.items()
995
+ if col in df.columns
996
+ ])
997
+ query_mask = masks[0]
998
+ for mask in masks:
999
+ query_mask = query_mask & mask
1000
+
1001
+ if inplace:
1002
+ df.where(query_mask, inplace=inplace)
1003
+ df.dropna(how='all', inplace=inplace)
1004
+ result_df = df
1005
+ else:
1006
+ result_df = df.where(query_mask).dropna(how='all')
1007
+
1008
+ if reset_index:
1009
+ result_df.reset_index(drop=True, inplace=True)
1010
+
1011
+ result_df = enforce_dtypes(
1012
+ result_df,
1013
+ dtypes,
1014
+ safe_copy = (not inplace),
1015
+ debug = debug,
1016
+ coerce_numeric = False,
1017
+ )
1018
+
1019
+ if select_columns == ['*']:
1020
+ select_columns = None
1021
+
1022
+ if not select_columns and not omit_columns:
1023
+ return result_df
1024
+
1025
+ if select_columns:
1026
+ for col in list(result_df.columns):
1027
+ if col not in select_columns:
1028
+ del result_df[col]
1029
+ return result_df
1030
+
1031
+ if omit_columns:
1032
+ for col in list(result_df.columns):
1033
+ if col in omit_columns:
1034
+ del result_df[col]
1035
+ if debug:
1036
+ dprint(f"{dtypes=}")
1037
+ return result_df
@@ -6,8 +6,10 @@
6
6
  Utility functions for working with data types.
7
7
  """
8
8
 
9
+ import traceback
9
10
  from decimal import Decimal, Context, InvalidOperation
10
11
  from meerschaum.utils.typing import Dict, Union, Any
12
+ from meerschaum.utils.warnings import warn
11
13
 
12
14
  MRSM_PD_DTYPES: Dict[str, str] = {
13
15
  'json': 'object',
@@ -37,9 +39,7 @@ def to_pandas_dtype(dtype: str) -> str:
37
39
  from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
38
40
  return get_pd_type_from_db_type(dtype)
39
41
 
40
- import traceback
41
42
  from meerschaum.utils.packages import attempt_import
42
- from meerschaum.utils.warnings import warn
43
43
  pandas = attempt_import('pandas', lazy=False)
44
44
 
45
45
  try:
@@ -88,8 +88,12 @@ def are_dtypes_equal(
88
88
  return False
89
89
  return True
90
90
 
91
- if ldtype == rdtype:
92
- return True
91
+ try:
92
+ if ldtype == rdtype:
93
+ return True
94
+ except Exception as e:
95
+ warn(f"Exception when comparing dtypes, returning False:\n{traceback.format_exc()}")
96
+ return False
93
97
 
94
98
  ### Sometimes pandas dtype objects are passed.
95
99
  ldtype = str(ldtype)
@@ -177,7 +181,7 @@ def attempt_cast_to_numeric(value: Any) -> Any:
177
181
  return value
178
182
 
179
183
 
180
- def value_is_null(value: Any) -> Any:
184
+ def value_is_null(value: Any) -> bool:
181
185
  """
182
186
  Determine if a value is a null-like string.
183
187
  """