meerschaum 3.0.0rc1__py3-none-any.whl → 3.0.0rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. meerschaum/_internal/arguments/_parser.py +2 -1
  2. meerschaum/_internal/docs/index.py +49 -2
  3. meerschaum/_internal/shell/Shell.py +5 -4
  4. meerschaum/_internal/static.py +8 -24
  5. meerschaum/actions/bootstrap.py +1 -1
  6. meerschaum/actions/edit.py +6 -3
  7. meerschaum/actions/start.py +1 -1
  8. meerschaum/actions/verify.py +5 -8
  9. meerschaum/api/__init__.py +2 -1
  10. meerschaum/api/dash/__init__.py +0 -2
  11. meerschaum/api/dash/callbacks/__init__.py +1 -0
  12. meerschaum/api/dash/callbacks/dashboard.py +20 -19
  13. meerschaum/api/dash/callbacks/jobs.py +11 -5
  14. meerschaum/api/dash/callbacks/pipes.py +106 -5
  15. meerschaum/api/dash/callbacks/settings/__init__.py +0 -1
  16. meerschaum/api/dash/callbacks/{settings/tokens.py → tokens.py} +1 -1
  17. meerschaum/api/dash/jobs.py +1 -1
  18. meerschaum/api/dash/pages/__init__.py +2 -1
  19. meerschaum/api/dash/pages/{job.py → jobs.py} +10 -7
  20. meerschaum/api/dash/pages/pipes.py +4 -3
  21. meerschaum/api/dash/pages/settings/__init__.py +0 -1
  22. meerschaum/api/dash/pages/{settings/tokens.py → tokens.py} +6 -8
  23. meerschaum/api/dash/pipes.py +131 -0
  24. meerschaum/api/dash/tokens.py +28 -31
  25. meerschaum/api/routes/_pipes.py +47 -37
  26. meerschaum/config/_default.py +13 -2
  27. meerschaum/config/_paths.py +1 -0
  28. meerschaum/config/_version.py +1 -1
  29. meerschaum/config/stack/__init__.py +9 -8
  30. meerschaum/connectors/api/_pipes.py +2 -18
  31. meerschaum/connectors/api/_tokens.py +2 -2
  32. meerschaum/connectors/instance/_tokens.py +10 -6
  33. meerschaum/connectors/sql/_SQLConnector.py +14 -0
  34. meerschaum/connectors/sql/_create_engine.py +3 -14
  35. meerschaum/connectors/sql/_pipes.py +175 -185
  36. meerschaum/connectors/sql/_sql.py +38 -20
  37. meerschaum/connectors/sql/tables/__init__.py +237 -122
  38. meerschaum/connectors/valkey/_pipes.py +44 -16
  39. meerschaum/core/Pipe/__init__.py +28 -5
  40. meerschaum/core/Pipe/_attributes.py +273 -46
  41. meerschaum/core/Pipe/_data.py +55 -17
  42. meerschaum/core/Pipe/_dtypes.py +19 -4
  43. meerschaum/core/Pipe/_edit.py +2 -0
  44. meerschaum/core/Pipe/_fetch.py +1 -1
  45. meerschaum/core/Pipe/_sync.py +90 -160
  46. meerschaum/core/Pipe/_verify.py +3 -3
  47. meerschaum/core/Token/_Token.py +4 -5
  48. meerschaum/plugins/bootstrap.py +508 -3
  49. meerschaum/utils/_get_pipes.py +1 -1
  50. meerschaum/utils/dataframe.py +385 -68
  51. meerschaum/utils/debug.py +15 -15
  52. meerschaum/utils/dtypes/__init__.py +387 -22
  53. meerschaum/utils/dtypes/sql.py +327 -31
  54. meerschaum/utils/misc.py +9 -68
  55. meerschaum/utils/packages/__init__.py +7 -21
  56. meerschaum/utils/packages/_packages.py +7 -2
  57. meerschaum/utils/schedule.py +1 -1
  58. meerschaum/utils/sql.py +8 -8
  59. {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/METADATA +5 -17
  60. {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/RECORD +66 -65
  61. meerschaum-3.0.0rc3.dist-info/licenses/NOTICE +2 -0
  62. {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/WHEEL +0 -0
  63. {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/entry_points.txt +0 -0
  64. {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/licenses/LICENSE +0 -0
  65. {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/top_level.txt +0 -0
  66. {meerschaum-3.0.0rc1.dist-info → meerschaum-3.0.0rc3.dist-info}/zip-safe +0 -0
@@ -29,6 +29,7 @@ def get_data(
29
29
  as_iterator: bool = False,
30
30
  as_chunks: bool = False,
31
31
  as_dask: bool = False,
32
+ add_missing_columns: bool = False,
32
33
  chunk_interval: Union[timedelta, int, None] = None,
33
34
  order: Optional[str] = 'asc',
34
35
  limit: Optional[int] = None,
@@ -72,6 +73,9 @@ def get_data(
72
73
  If `True`, return a `dask.DataFrame`
73
74
  (which may be loaded into a Pandas DataFrame with `df.compute()`).
74
75
 
76
+ add_missing_columns: bool, default False
77
+ If `True`, add any missing columns from `Pipe.dtypes` to the dataframe.
78
+
75
79
  chunk_interval: Union[timedelta, int, None], default None
76
80
  If `as_iterator`, then return chunks with `begin` and `end` separated by this interval.
77
81
  This may be set under `pipe.parameters['chunk_minutes']`.
@@ -103,12 +107,13 @@ def get_data(
103
107
  from meerschaum.utils.warnings import warn
104
108
  from meerschaum.utils.venv import Venv
105
109
  from meerschaum.connectors import get_connector_plugin
106
- from meerschaum.utils.misc import iterate_chunks, items_str
107
- from meerschaum.utils.dtypes import to_pandas_dtype, coerce_timezone
110
+ from meerschaum.utils.dtypes import to_pandas_dtype
108
111
  from meerschaum.utils.dataframe import add_missing_cols_to_df, df_is_chunk_generator
109
112
  from meerschaum.utils.packages import attempt_import
113
+ from meerschaum.utils.warnings import dprint
110
114
  dd = attempt_import('dask.dataframe') if as_dask else None
111
115
  dask = attempt_import('dask') if as_dask else None
116
+ _ = attempt_import('partd', lazy=False) if as_dask else None
112
117
 
113
118
  if select_columns == '*':
114
119
  select_columns = None
@@ -187,14 +192,17 @@ def get_data(
187
192
  order=order,
188
193
  limit=limit,
189
194
  fresh=fresh,
195
+ add_missing_columns=True,
190
196
  debug=debug,
191
197
  )
192
198
  for (chunk_begin, chunk_end) in bounds
193
199
  ]
194
200
  dask_meta = {
195
201
  col: to_pandas_dtype(typ)
196
- for col, typ in self.dtypes.items()
202
+ for col, typ in self.get_dtypes(refresh=True, infer=True, debug=debug).items()
197
203
  }
204
+ if debug:
205
+ dprint(f"Dask meta:\n{dask_meta}")
198
206
  return _sort_df(dd.from_delayed(dask_chunks, meta=dask_meta))
199
207
 
200
208
  if not self.exists(debug=debug):
@@ -248,6 +256,7 @@ def get_data(
248
256
  if not select_columns:
249
257
  select_columns = [col for col in df.columns]
250
258
 
259
+ pipe_dtypes = self.get_dtypes(refresh=False, debug=debug)
251
260
  cols_to_omit = [
252
261
  col
253
262
  for col in df.columns
@@ -261,7 +270,11 @@ def get_data(
261
270
  col
262
271
  for col in select_columns
263
272
  if col not in df.columns
264
- ]
273
+ ] + ([
274
+ col
275
+ for col in pipe_dtypes
276
+ if col not in df.columns
277
+ ] if add_missing_columns else [])
265
278
  if cols_to_omit:
266
279
  warn(
267
280
  (
@@ -277,16 +290,26 @@ def get_data(
277
290
  df = df[_cols_to_select]
278
291
 
279
292
  if cols_to_add:
280
- warn(
281
- (
282
- f"Specified columns {items_str(cols_to_add)} were not found on {self}. "
283
- + "Adding these to the DataFrame as null columns."
284
- ),
285
- stack=False,
293
+ if not add_missing_columns:
294
+ from meerschaum.utils.misc import items_str
295
+ warn(
296
+ f"Will add columns {items_str(cols_to_add)} as nulls to dataframe.",
297
+ stack=False,
298
+ )
299
+
300
+ df = add_missing_cols_to_df(
301
+ df,
302
+ {
303
+ col: pipe_dtypes.get(col, 'string')
304
+ for col in cols_to_add
305
+ },
286
306
  )
287
- df = add_missing_cols_to_df(df, {col: 'string' for col in cols_to_add})
288
307
 
289
- enforced_df = self.enforce_dtypes(df, debug=debug)
308
+ enforced_df = self.enforce_dtypes(
309
+ df,
310
+ dtypes=pipe_dtypes,
311
+ debug=debug,
312
+ )
290
313
 
291
314
  if order:
292
315
  return _sort_df(enforced_df)
@@ -310,7 +333,7 @@ def _get_data_as_iterator(
310
333
  """
311
334
  Return a pipe's data as a generator.
312
335
  """
313
- from meerschaum.utils.misc import round_time
336
+ from meerschaum.utils.dtypes import round_time
314
337
  begin, end = self.parse_date_bounds(begin, end)
315
338
  if not self.exists(debug=debug):
316
339
  return
@@ -623,7 +646,7 @@ def get_chunk_interval(
623
646
  if dt_col is None:
624
647
  return timedelta(minutes=chunk_minutes)
625
648
 
626
- dt_dtype = self.dtypes.get(dt_col, 'datetime64[ns, UTC]')
649
+ dt_dtype = self.dtypes.get(dt_col, 'datetime')
627
650
  if 'int' in dt_dtype.lower():
628
651
  return chunk_minutes
629
652
  return timedelta(minutes=chunk_minutes)
@@ -687,11 +710,26 @@ def get_chunk_bounds(
687
710
  elif are_dtypes_equal(str(type(end)), 'int'):
688
711
  end += 1
689
712
  consolidate_end_chunk = True
713
+
690
714
  if begin is None and end is None:
691
715
  return [(None, None)]
692
716
 
693
717
  begin, end = self.parse_date_bounds(begin, end)
694
718
 
719
+ if begin and end:
720
+ if begin >= end:
721
+ return (
722
+ [(begin, begin)]
723
+ if bounded
724
+ else [(begin, None)]
725
+ )
726
+ if end <= begin:
727
+ return (
728
+ [(end, end)]
729
+ if bounded
730
+ else [(None, begin)]
731
+ )
732
+
695
733
  ### Set the chunk interval under `pipe.parameters['verify']['chunk_minutes']`.
696
734
  chunk_interval = self.get_chunk_interval(chunk_interval, debug=debug)
697
735
 
@@ -798,7 +836,7 @@ def parse_date_bounds(self, *dt_vals: Union[datetime, int, None]) -> Union[
798
836
  Given a date bound (begin, end), coerce a timezone if necessary.
799
837
  """
800
838
  from meerschaum.utils.misc import is_int
801
- from meerschaum.utils.dtypes import coerce_timezone
839
+ from meerschaum.utils.dtypes import coerce_timezone, MRSM_PD_DTYPES
802
840
  from meerschaum.utils.warnings import warn
803
841
  dateutil_parser = mrsm.attempt_import('dateutil.parser')
804
842
 
@@ -823,9 +861,9 @@ def parse_date_bounds(self, *dt_vals: Union[datetime, int, None]) -> Union[
823
861
  return None
824
862
 
825
863
  dt_col = self.columns.get('datetime', None)
826
- dt_typ = str(self.dtypes.get(dt_col, 'datetime64[ns, UTC]'))
864
+ dt_typ = str(self.dtypes.get(dt_col, 'datetime'))
827
865
  if dt_typ == 'datetime':
828
- dt_typ = 'datetime64[ns, UTC]'
866
+ dt_typ = MRSM_PD_DTYPES['datetime']
829
867
  return coerce_timezone(dt_val, strip_utc=('utc' not in dt_typ.lower()))
830
868
 
831
869
  bounds = tuple(_parse_date_bound(dt_val) for dt_val in dt_vals)
@@ -22,6 +22,7 @@ def enforce_dtypes(
22
22
  chunksize: Optional[int] = -1,
23
23
  enforce: bool = True,
24
24
  safe_copy: bool = True,
25
+ dtypes: Optional[Dict[str, str]] = None,
25
26
  debug: bool = False,
26
27
  ) -> 'pd.DataFrame':
27
28
  """
@@ -50,7 +51,8 @@ def enforce_dtypes(
50
51
  if not self.enforce:
51
52
  enforce = False
52
53
 
53
- pipe_dtypes = self.dtypes if enforce else {}
54
+ explicit_dtypes = self.get_dtypes(infer=False, debug=debug) if enforce else {}
55
+ pipe_dtypes = self.get_dtypes(infer=True, debug=debug) if not dtypes else dtypes
54
56
 
55
57
  try:
56
58
  if isinstance(df, str):
@@ -103,14 +105,21 @@ def enforce_dtypes(
103
105
  return _enforce_dtypes(
104
106
  df,
105
107
  pipe_dtypes,
108
+ explicit_dtypes=explicit_dtypes,
106
109
  safe_copy=safe_copy,
107
110
  strip_timezone=(self.tzinfo is None),
111
+ coerce_numeric=self.mixed_numerics,
108
112
  coerce_timezone=enforce,
109
113
  debug=debug,
110
114
  )
111
115
 
112
116
 
113
- def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str, Any]:
117
+ def infer_dtypes(
118
+ self,
119
+ persist: bool = False,
120
+ refresh: bool = False,
121
+ debug: bool = False,
122
+ ) -> Dict[str, Any]:
114
123
  """
115
124
  If `dtypes` is not set in `meerschaum.Pipe.parameters`,
116
125
  infer the data types from the underlying table if it exists.
@@ -119,6 +128,11 @@ def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str,
119
128
  ----------
120
129
  persist: bool, default False
121
130
  If `True`, persist the inferred data types to `meerschaum.Pipe.parameters`.
131
+ NOTE: Use with caution! Generally `dtypes` is meant to be user-configurable only.
132
+
133
+ refresh: bool, default False
134
+ If `True`, retrieve the latest columns-types for the pipe.
135
+ See `Pipe.get_columns.types()`.
122
136
 
123
137
  Returns
124
138
  -------
@@ -132,7 +146,7 @@ def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str,
132
146
 
133
147
  ### NOTE: get_columns_types() may return either the types as
134
148
  ### PostgreSQL- or Pandas-style.
135
- columns_types = self.get_columns_types(debug=debug)
149
+ columns_types = self.get_columns_types(refresh=refresh, debug=debug)
136
150
 
137
151
  remote_pd_dtypes = {
138
152
  c: (
@@ -145,7 +159,8 @@ def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str,
145
159
  if not persist:
146
160
  return remote_pd_dtypes
147
161
 
148
- dtypes = self.parameters.get('dtypes', {})
162
+ parameters = self.get_parameters(refresh=refresh, debug=debug)
163
+ dtypes = parameters.get('dtypes', {})
149
164
  dtypes.update({
150
165
  col: typ
151
166
  for col, typ in remote_pd_dtypes.items()
@@ -47,6 +47,8 @@ def edit(
47
47
  if self.temporary:
48
48
  return False, "Cannot edit pipes created with `temporary=True` (read-only)."
49
49
 
50
+ self._invalidate_cache(hard=True, debug=debug)
51
+
50
52
  if hasattr(self, '_symlinks'):
51
53
  from meerschaum.utils.misc import get_val_from_dict_path, set_val_in_dict_path
52
54
  for path, vals in self._symlinks.items():
@@ -127,7 +127,7 @@ def get_backtrack_interval(
127
127
  if dt_col is None:
128
128
  return backtrack_interval
129
129
 
130
- dt_dtype = self.dtypes.get(dt_col, 'datetime64[ns, UTC]')
130
+ dt_dtype = self.dtypes.get(dt_col, 'datetime')
131
131
  if 'int' in dt_dtype.lower():
132
132
  return backtrack_minutes
133
133