meerschaum 2.7.6__py3-none-any.whl → 2.7.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. meerschaum/actions/copy.py +1 -0
  2. meerschaum/actions/drop.py +100 -22
  3. meerschaum/actions/index.py +71 -0
  4. meerschaum/actions/register.py +8 -12
  5. meerschaum/actions/sql.py +1 -1
  6. meerschaum/api/routes/_pipes.py +18 -0
  7. meerschaum/api/routes/_plugins.py +1 -1
  8. meerschaum/api/routes/_users.py +62 -61
  9. meerschaum/config/_version.py +1 -1
  10. meerschaum/connectors/api/_pipes.py +20 -0
  11. meerschaum/connectors/sql/_SQLConnector.py +8 -12
  12. meerschaum/connectors/sql/_create_engine.py +1 -1
  13. meerschaum/connectors/sql/_fetch.py +9 -39
  14. meerschaum/connectors/sql/_instance.py +3 -3
  15. meerschaum/connectors/sql/_pipes.py +262 -70
  16. meerschaum/connectors/sql/_plugins.py +11 -16
  17. meerschaum/connectors/sql/_sql.py +60 -39
  18. meerschaum/connectors/sql/_uri.py +9 -9
  19. meerschaum/connectors/sql/_users.py +10 -12
  20. meerschaum/connectors/sql/tables/__init__.py +13 -14
  21. meerschaum/connectors/valkey/_ValkeyConnector.py +2 -2
  22. meerschaum/core/Pipe/__init__.py +12 -2
  23. meerschaum/core/Pipe/_attributes.py +32 -38
  24. meerschaum/core/Pipe/_drop.py +73 -2
  25. meerschaum/core/Pipe/_fetch.py +4 -0
  26. meerschaum/core/Pipe/_index.py +68 -0
  27. meerschaum/core/Pipe/_sync.py +16 -9
  28. meerschaum/utils/daemon/Daemon.py +9 -2
  29. meerschaum/utils/daemon/RotatingFile.py +3 -3
  30. meerschaum/utils/dataframe.py +42 -12
  31. meerschaum/utils/dtypes/__init__.py +144 -24
  32. meerschaum/utils/dtypes/sql.py +52 -9
  33. meerschaum/utils/formatting/__init__.py +2 -2
  34. meerschaum/utils/formatting/_pprint.py +12 -11
  35. meerschaum/utils/misc.py +16 -18
  36. meerschaum/utils/prompt.py +1 -1
  37. meerschaum/utils/sql.py +106 -42
  38. {meerschaum-2.7.6.dist-info → meerschaum-2.7.8.dist-info}/METADATA +14 -2
  39. {meerschaum-2.7.6.dist-info → meerschaum-2.7.8.dist-info}/RECORD +45 -43
  40. {meerschaum-2.7.6.dist-info → meerschaum-2.7.8.dist-info}/WHEEL +1 -1
  41. {meerschaum-2.7.6.dist-info → meerschaum-2.7.8.dist-info}/LICENSE +0 -0
  42. {meerschaum-2.7.6.dist-info → meerschaum-2.7.8.dist-info}/NOTICE +0 -0
  43. {meerschaum-2.7.6.dist-info → meerschaum-2.7.8.dist-info}/entry_points.txt +0 -0
  44. {meerschaum-2.7.6.dist-info → meerschaum-2.7.8.dist-info}/top_level.txt +0 -0
  45. {meerschaum-2.7.6.dist-info → meerschaum-2.7.8.dist-info}/zip-safe +0 -0
@@ -7,7 +7,7 @@ Drop a Pipe's table but keep its registration
7
7
  """
8
8
 
9
9
  from __future__ import annotations
10
- from meerschaum.utils.typing import SuccessTuple, Any
10
+ from meerschaum.utils.typing import SuccessTuple, Any, Optional, List
11
11
 
12
12
 
13
13
  def drop(
@@ -39,9 +39,80 @@ def drop(
39
39
  warn(_drop_cache_tuple[1])
40
40
 
41
41
  with Venv(get_connector_plugin(self.instance_connector)):
42
- result = self.instance_connector.drop_pipe(self, debug=debug, **kw)
42
+ if hasattr(self.instance_connector, 'drop_pipe'):
43
+ result = self.instance_connector.drop_pipe(self, debug=debug, **kw)
44
+ else:
45
+ result = (
46
+ False,
47
+ (
48
+ "Cannot drop pipes for instance connectors of type "
49
+ f"'{self.instance_connector.type}'."
50
+ )
51
+ )
52
+
43
53
 
44
54
  _ = self.__dict__.pop('_exists', None)
45
55
  _ = self.__dict__.pop('_exists_timestamp', None)
46
56
 
47
57
  return result
58
+
59
+
60
+ def drop_indices(
61
+ self,
62
+ columns: Optional[List[str]] = None,
63
+ debug: bool = False,
64
+ **kw: Any
65
+ ) -> SuccessTuple:
66
+ """
67
+ Call the Pipe's instance connector's `drop_indices()` method.
68
+
69
+ Parameters
70
+ ----------
71
+ columns: Optional[List[str]] = None
72
+ If provided, only drop indices in the given list.
73
+
74
+ debug: bool, default False:
75
+ Verbosity toggle.
76
+
77
+ Returns
78
+ -------
79
+ A `SuccessTuple` of success, message.
80
+
81
+ """
82
+ from meerschaum.utils.warnings import warn
83
+ from meerschaum.utils.venv import Venv
84
+ from meerschaum.connectors import get_connector_plugin
85
+
86
+ _ = self.__dict__.pop('_columns_indices', None)
87
+ _ = self.__dict__.pop('_columns_indices_timestamp', None)
88
+ _ = self.__dict__.pop('_columns_types_timestamp', None)
89
+ _ = self.__dict__.pop('_columns_types', None)
90
+
91
+ if self.cache_pipe is not None:
92
+ _drop_cache_tuple = self.cache_pipe.drop_indices(columns=columns, debug=debug, **kw)
93
+ if not _drop_cache_tuple[0]:
94
+ warn(_drop_cache_tuple[1])
95
+
96
+ with Venv(get_connector_plugin(self.instance_connector)):
97
+ if hasattr(self.instance_connector, 'drop_pipe_indices'):
98
+ result = self.instance_connector.drop_pipe_indices(
99
+ self,
100
+ columns=columns,
101
+ debug=debug,
102
+ **kw
103
+ )
104
+ else:
105
+ result = (
106
+ False,
107
+ (
108
+ "Cannot drop indices for instance connectors of type "
109
+ f"'{self.instance_connector.type}'."
110
+ )
111
+ )
112
+
113
+ _ = self.__dict__.pop('_columns_indices', None)
114
+ _ = self.__dict__.pop('_columns_indices_timestamp', None)
115
+ _ = self.__dict__.pop('_columns_types_timestamp', None)
116
+ _ = self.__dict__.pop('_columns_types', None)
117
+
118
+ return result
@@ -84,6 +84,7 @@ def fetch(
84
84
  begin=_determine_begin(
85
85
  self,
86
86
  begin,
87
+ end,
87
88
  check_existing=check_existing,
88
89
  debug=debug,
89
90
  ),
@@ -136,6 +137,7 @@ def get_backtrack_interval(
136
137
  def _determine_begin(
137
138
  pipe: mrsm.Pipe,
138
139
  begin: Union[datetime, int, str, None] = '',
140
+ end: Union[datetime, int, None] = None,
139
141
  check_existing: bool = True,
140
142
  debug: bool = False,
141
143
  ) -> Union[datetime, int, None]:
@@ -157,6 +159,8 @@ def _determine_begin(
157
159
  """
158
160
  if begin != '':
159
161
  return begin
162
+ if end is not None:
163
+ return None
160
164
  sync_time = pipe.get_sync_time(debug=debug)
161
165
  if sync_time is None:
162
166
  return sync_time
@@ -0,0 +1,68 @@
1
+ #! /usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ # vim:fenc=utf-8
4
+
5
+ """
6
+ Index a pipe's table.
7
+ """
8
+
9
+ from __future__ import annotations
10
+ from meerschaum.utils.typing import SuccessTuple, Any, Optional, List
11
+
12
+
13
+ def create_indices(
14
+ self,
15
+ columns: Optional[List[str]] = None,
16
+ debug: bool = False,
17
+ **kw: Any
18
+ ) -> SuccessTuple:
19
+ """
20
+ Call the Pipe's instance connector's `create_pipe_indices()` method.
21
+
22
+ Parameters
23
+ ----------
24
+ debug: bool, default False:
25
+ Verbosity toggle.
26
+
27
+ Returns
28
+ -------
29
+ A `SuccessTuple` of success, message.
30
+
31
+ """
32
+ from meerschaum.utils.warnings import warn
33
+ from meerschaum.utils.venv import Venv
34
+ from meerschaum.connectors import get_connector_plugin
35
+
36
+ _ = self.__dict__.pop('_columns_indices', None)
37
+ _ = self.__dict__.pop('_columns_indices_timestamp', None)
38
+ _ = self.__dict__.pop('_columns_types_timestamp', None)
39
+ _ = self.__dict__.pop('_columns_types', None)
40
+
41
+ if self.cache_pipe is not None:
42
+ cache_success, cache_msg = self.cache_pipe.index(columns=columns, debug=debug, **kw)
43
+ if not cache_success:
44
+ warn(cache_msg)
45
+
46
+ with Venv(get_connector_plugin(self.instance_connector)):
47
+ if hasattr(self.instance_connector, 'create_pipe_indices'):
48
+ result = self.instance_connector.create_pipe_indices(
49
+ self,
50
+ columns=columns,
51
+ debug=debug,
52
+ **kw
53
+ )
54
+ else:
55
+ result = (
56
+ False,
57
+ (
58
+ "Cannot create indices for instance connectors of type "
59
+ f"'{self.instance_connector.type}'."
60
+ )
61
+ )
62
+
63
+ _ = self.__dict__.pop('_columns_indices', None)
64
+ _ = self.__dict__.pop('_columns_indices_timestamp', None)
65
+ _ = self.__dict__.pop('_columns_types_timestamp', None)
66
+ _ = self.__dict__.pop('_columns_types', None)
67
+
68
+ return result
@@ -292,7 +292,6 @@ def sync(
292
292
  message = '\n'.join([_message for _, _message in df])
293
293
  return success, message
294
294
 
295
- ### TODO: Depreciate async?
296
295
  if df is True:
297
296
  p._exists = None
298
297
  return True, f"{p} is being synced in parallel."
@@ -331,8 +330,7 @@ def sync(
331
330
  return (
332
331
  _chunk_success,
333
332
  (
334
- '\n'
335
- + self._get_chunk_label(_chunk, dt_col)
333
+ self._get_chunk_label(_chunk, dt_col)
336
334
  + '\n'
337
335
  + _chunk_msg
338
336
  )
@@ -341,17 +339,25 @@ def sync(
341
339
  results = sorted(
342
340
  [(chunk_success, chunk_msg)] + (
343
341
  list(pool.imap(_process_chunk, df))
344
- if not df_is_chunk_generator(chunk)
345
- else [
342
+ if (
343
+ not df_is_chunk_generator(chunk) # Handle nested generators.
344
+ and kw.get('workers', 1) != 1
345
+ )
346
+ else list(
346
347
  _process_chunk(_child_chunks)
347
348
  for _child_chunks in df
348
- ]
349
+ )
349
350
  )
350
351
  )
351
352
  chunk_messages = [chunk_msg for _, chunk_msg in results]
352
353
  success_bools = [chunk_success for chunk_success, _ in results]
353
354
  success = all(success_bools)
354
- msg = '\n'.join(chunk_messages)
355
+ msg = (
356
+ f'Synced {len(chunk_messages)} chunk'
357
+ + ('s' if len(chunk_messages) != 1 else '')
358
+ + f' to {p}:\n\n'
359
+ + '\n\n'.join(chunk_messages).lstrip().rstrip()
360
+ ).lstrip().rstrip()
355
361
 
356
362
  ### If some chunks succeeded, retry the failures.
357
363
  retry_success = True
@@ -432,7 +438,7 @@ def sync(
432
438
 
433
439
  if blocking:
434
440
  self._exists = None
435
- return _sync(self, df = df)
441
+ return _sync(self, df=df)
436
442
 
437
443
  from meerschaum.utils.threading import Thread
438
444
  def default_callback(result_tuple: SuccessTuple):
@@ -821,6 +827,7 @@ def filter_existing(
821
827
  for col, typ in self_dtypes.items()
822
828
  },
823
829
  safe_copy=safe_copy,
830
+ coerce_mixed_numerics=(not self.static),
824
831
  debug=debug
825
832
  ),
826
833
  on_cols_dtypes,
@@ -962,7 +969,7 @@ def _persist_new_numeric_columns(self, df, debug: bool = False) -> SuccessTuple:
962
969
  """
963
970
  from meerschaum.utils.dataframe import get_numeric_cols
964
971
  numeric_cols = get_numeric_cols(df)
965
- existing_numeric_cols = [col for col, typ in self.dtypes.items() if typ == 'numeric']
972
+ existing_numeric_cols = [col for col, typ in self.dtypes.items() if typ.startswith('numeric')]
966
973
  new_numeric_cols = [col for col in numeric_cols if col not in existing_numeric_cols]
967
974
  if not new_numeric_cols:
968
975
  return True, "Success"
@@ -774,9 +774,16 @@ class Daemon:
774
774
  if '_process' not in self.__dict__ or self.__dict__['_process'].pid != int(pid):
775
775
  try:
776
776
  self._process = psutil.Process(int(pid))
777
+ process_exists = True
777
778
  except Exception:
778
- if self.pid_path.exists():
779
- self.pid_path.unlink()
779
+ process_exists = False
780
+ if not process_exists:
781
+ _ = self.__dict__.pop('_process', None)
782
+ try:
783
+ if self.pid_path.exists():
784
+ self.pid_path.unlink()
785
+ except Exception:
786
+ pass
780
787
  return None
781
788
  return self._process
782
789
 
@@ -13,11 +13,10 @@ import pathlib
13
13
  import traceback
14
14
  import sys
15
15
  import atexit
16
- from datetime import datetime, timezone, timedelta
17
- from typing import List, Union, Optional, Tuple
16
+ from datetime import datetime, timezone
17
+ from typing import List, Optional, Tuple
18
18
  from meerschaum.config import get_config
19
19
  from meerschaum.utils.warnings import warn
20
- from meerschaum.utils.misc import round_time
21
20
  from meerschaum.utils.daemon.FileDescriptorInterceptor import FileDescriptorInterceptor
22
21
  from meerschaum.utils.threading import Thread
23
22
  import meerschaum as mrsm
@@ -517,6 +516,7 @@ class RotatingFile(io.IOBase):
517
516
  else 0
518
517
  )
519
518
 
519
+ subfile_lines = []
520
520
  if (
521
521
  subfile_index in self.subfile_objects
522
522
  and
@@ -85,6 +85,7 @@ def filter_unseen_df(
85
85
  safe_copy: bool = True,
86
86
  dtypes: Optional[Dict[str, Any]] = None,
87
87
  include_unchanged_columns: bool = False,
88
+ coerce_mixed_numerics: bool = True,
88
89
  debug: bool = False,
89
90
  ) -> 'pd.DataFrame':
90
91
  """
@@ -108,6 +109,10 @@ def filter_unseen_df(
108
109
  include_unchanged_columns: bool, default False
109
110
  If `True`, include columns which haven't changed on rows which have changed.
110
111
 
112
+ coerce_mixed_numerics: bool, default True
113
+ If `True`, cast mixed integer and float columns between the old and new dataframes into
114
+ numeric values (`decimal.Decimal`).
115
+
111
116
  debug: bool, default False
112
117
  Verbosity toggle.
113
118
 
@@ -138,7 +143,6 @@ def filter_unseen_df(
138
143
  import json
139
144
  import functools
140
145
  import traceback
141
- from decimal import Decimal
142
146
  from meerschaum.utils.warnings import warn
143
147
  from meerschaum.utils.packages import import_pandas, attempt_import
144
148
  from meerschaum.utils.dtypes import (
@@ -148,7 +152,9 @@ def filter_unseen_df(
148
152
  attempt_cast_to_uuid,
149
153
  attempt_cast_to_bytes,
150
154
  coerce_timezone,
155
+ serialize_decimal,
151
156
  )
157
+ from meerschaum.utils.dtypes.sql import get_numeric_precision_scale
152
158
  pd = import_pandas(debug=debug)
153
159
  is_dask = 'dask' in new_df.__module__
154
160
  if is_dask:
@@ -211,6 +217,12 @@ def filter_unseen_df(
211
217
  if col not in dtypes:
212
218
  dtypes[col] = typ
213
219
 
220
+ numeric_cols_precisions_scales = {
221
+ col: get_numeric_precision_scale(None, typ)
222
+ for col, typ in dtypes.items()
223
+ if col and typ and typ.startswith('numeric')
224
+ }
225
+
214
226
  dt_dtypes = {
215
227
  col: typ
216
228
  for col, typ in dtypes.items()
@@ -259,6 +271,8 @@ def filter_unseen_df(
259
271
  old_is_numeric = col in old_numeric_cols
260
272
 
261
273
  if (
274
+ coerce_mixed_numerics
275
+ and
262
276
  (new_is_float or new_is_int or new_is_numeric)
263
277
  and
264
278
  (old_is_float or old_is_int or old_is_numeric)
@@ -300,13 +314,9 @@ def filter_unseen_df(
300
314
  new_numeric_cols = get_numeric_cols(new_df)
301
315
  numeric_cols = set(new_numeric_cols + old_numeric_cols)
302
316
  for numeric_col in old_numeric_cols:
303
- old_df[numeric_col] = old_df[numeric_col].apply(
304
- lambda x: f'{x:f}' if isinstance(x, Decimal) else x
305
- )
317
+ old_df[numeric_col] = old_df[numeric_col].apply(serialize_decimal)
306
318
  for numeric_col in new_numeric_cols:
307
- new_df[numeric_col] = new_df[numeric_col].apply(
308
- lambda x: f'{x:f}' if isinstance(x, Decimal) else x
309
- )
319
+ new_df[numeric_col] = new_df[numeric_col].apply(serialize_decimal)
310
320
 
311
321
  old_dt_cols = [
312
322
  col
@@ -361,7 +371,14 @@ def filter_unseen_df(
361
371
  if numeric_col not in delta_df.columns:
362
372
  continue
363
373
  try:
364
- delta_df[numeric_col] = delta_df[numeric_col].apply(attempt_cast_to_numeric)
374
+ delta_df[numeric_col] = delta_df[numeric_col].apply(
375
+ functools.partial(
376
+ attempt_cast_to_numeric,
377
+ quantize=True,
378
+ precision=numeric_cols_precisions_scales.get(numeric_col, (None, None)[0]),
379
+ scale=numeric_cols_precisions_scales.get(numeric_col, (None, None)[1]),
380
+ )
381
+ )
365
382
  except Exception:
366
383
  warn(f"Unable to parse numeric column '{numeric_col}':\n{traceback.format_exc()}")
367
384
 
@@ -882,6 +899,7 @@ def enforce_dtypes(
882
899
  The Pandas DataFrame with the types enforced.
883
900
  """
884
901
  import json
902
+ import functools
885
903
  from meerschaum.utils.debug import dprint
886
904
  from meerschaum.utils.formatting import pprint
887
905
  from meerschaum.utils.dtypes import (
@@ -893,6 +911,7 @@ def enforce_dtypes(
893
911
  attempt_cast_to_bytes,
894
912
  coerce_timezone as _coerce_timezone,
895
913
  )
914
+ from meerschaum.utils.dtypes.sql import get_numeric_precision_scale
896
915
  pandas = mrsm.attempt_import('pandas')
897
916
  is_dask = 'dask' in df.__module__
898
917
  if safe_copy:
@@ -914,7 +933,7 @@ def enforce_dtypes(
914
933
  numeric_cols = [
915
934
  col
916
935
  for col, typ in dtypes.items()
917
- if typ == 'numeric'
936
+ if typ.startswith('numeric')
918
937
  ]
919
938
  uuid_cols = [
920
939
  col
@@ -961,9 +980,17 @@ def enforce_dtypes(
961
980
  if debug:
962
981
  dprint(f"Checking for numerics: {numeric_cols}")
963
982
  for col in numeric_cols:
983
+ precision, scale = get_numeric_precision_scale(None, dtypes.get(col, ''))
964
984
  if col in df.columns:
965
985
  try:
966
- df[col] = df[col].apply(attempt_cast_to_numeric)
986
+ df[col] = df[col].apply(
987
+ functools.partial(
988
+ attempt_cast_to_numeric,
989
+ quantize=True,
990
+ precision=precision,
991
+ scale=scale,
992
+ )
993
+ )
967
994
  except Exception as e:
968
995
  if debug:
969
996
  dprint(f"Unable to parse column '{col}' as NUMERIC:\n{e}")
@@ -1040,7 +1067,7 @@ def enforce_dtypes(
1040
1067
  previous_typ = common_dtypes[col]
1041
1068
  mixed_numeric_types = (is_dtype_numeric(typ) and is_dtype_numeric(previous_typ))
1042
1069
  explicitly_float = are_dtypes_equal(dtypes.get(col, 'object'), 'float')
1043
- explicitly_numeric = dtypes.get(col, 'numeric') == 'numeric'
1070
+ explicitly_numeric = dtypes.get(col, 'numeric').startswith('numeric')
1044
1071
  cast_to_numeric = (
1045
1072
  explicitly_numeric
1046
1073
  or col in df_numeric_cols
@@ -1574,16 +1601,19 @@ def to_json(
1574
1601
  A JSON string.
1575
1602
  """
1576
1603
  from meerschaum.utils.packages import import_pandas
1577
- from meerschaum.utils.dtypes import serialize_bytes
1604
+ from meerschaum.utils.dtypes import serialize_bytes, serialize_decimal
1578
1605
  pd = import_pandas()
1579
1606
  uuid_cols = get_uuid_cols(df)
1580
1607
  bytes_cols = get_bytes_cols(df)
1608
+ numeric_cols = get_numeric_cols(df)
1581
1609
  if safe_copy and bool(uuid_cols or bytes_cols):
1582
1610
  df = df.copy()
1583
1611
  for col in uuid_cols:
1584
1612
  df[col] = df[col].astype(str)
1585
1613
  for col in bytes_cols:
1586
1614
  df[col] = df[col].apply(serialize_bytes)
1615
+ for col in numeric_cols:
1616
+ df[col] = df[col].apply(serialize_decimal)
1587
1617
  return df.infer_objects(copy=False).fillna(pd.NA).to_json(
1588
1618
  date_format=date_format,
1589
1619
  date_unit=date_unit,