meerschaum 2.7.6__py3-none-any.whl → 2.7.8__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. meerschaum/actions/copy.py +1 -0
  2. meerschaum/actions/drop.py +100 -22
  3. meerschaum/actions/index.py +71 -0
  4. meerschaum/actions/register.py +8 -12
  5. meerschaum/actions/sql.py +1 -1
  6. meerschaum/api/routes/_pipes.py +18 -0
  7. meerschaum/api/routes/_plugins.py +1 -1
  8. meerschaum/api/routes/_users.py +62 -61
  9. meerschaum/config/_version.py +1 -1
  10. meerschaum/connectors/api/_pipes.py +20 -0
  11. meerschaum/connectors/sql/_SQLConnector.py +8 -12
  12. meerschaum/connectors/sql/_create_engine.py +1 -1
  13. meerschaum/connectors/sql/_fetch.py +9 -39
  14. meerschaum/connectors/sql/_instance.py +3 -3
  15. meerschaum/connectors/sql/_pipes.py +262 -70
  16. meerschaum/connectors/sql/_plugins.py +11 -16
  17. meerschaum/connectors/sql/_sql.py +60 -39
  18. meerschaum/connectors/sql/_uri.py +9 -9
  19. meerschaum/connectors/sql/_users.py +10 -12
  20. meerschaum/connectors/sql/tables/__init__.py +13 -14
  21. meerschaum/connectors/valkey/_ValkeyConnector.py +2 -2
  22. meerschaum/core/Pipe/__init__.py +12 -2
  23. meerschaum/core/Pipe/_attributes.py +32 -38
  24. meerschaum/core/Pipe/_drop.py +73 -2
  25. meerschaum/core/Pipe/_fetch.py +4 -0
  26. meerschaum/core/Pipe/_index.py +68 -0
  27. meerschaum/core/Pipe/_sync.py +16 -9
  28. meerschaum/utils/daemon/Daemon.py +9 -2
  29. meerschaum/utils/daemon/RotatingFile.py +3 -3
  30. meerschaum/utils/dataframe.py +42 -12
  31. meerschaum/utils/dtypes/__init__.py +144 -24
  32. meerschaum/utils/dtypes/sql.py +52 -9
  33. meerschaum/utils/formatting/__init__.py +2 -2
  34. meerschaum/utils/formatting/_pprint.py +12 -11
  35. meerschaum/utils/misc.py +16 -18
  36. meerschaum/utils/prompt.py +1 -1
  37. meerschaum/utils/sql.py +106 -42
  38. {meerschaum-2.7.6.dist-info → meerschaum-2.7.8.dist-info}/METADATA +14 -2
  39. {meerschaum-2.7.6.dist-info → meerschaum-2.7.8.dist-info}/RECORD +45 -43
  40. {meerschaum-2.7.6.dist-info → meerschaum-2.7.8.dist-info}/WHEEL +1 -1
  41. {meerschaum-2.7.6.dist-info → meerschaum-2.7.8.dist-info}/LICENSE +0 -0
  42. {meerschaum-2.7.6.dist-info → meerschaum-2.7.8.dist-info}/NOTICE +0 -0
  43. {meerschaum-2.7.6.dist-info → meerschaum-2.7.8.dist-info}/entry_points.txt +0 -0
  44. {meerschaum-2.7.6.dist-info → meerschaum-2.7.8.dist-info}/top_level.txt +0 -0
  45. {meerschaum-2.7.6.dist-info → meerschaum-2.7.8.dist-info}/zip-safe +0 -0
@@ -7,7 +7,7 @@ Drop a Pipe's table but keep its registration
7
7
  """
8
8
 
9
9
  from __future__ import annotations
10
- from meerschaum.utils.typing import SuccessTuple, Any
10
+ from meerschaum.utils.typing import SuccessTuple, Any, Optional, List
11
11
 
12
12
 
13
13
  def drop(
@@ -39,9 +39,80 @@ def drop(
39
39
  warn(_drop_cache_tuple[1])
40
40
 
41
41
  with Venv(get_connector_plugin(self.instance_connector)):
42
- result = self.instance_connector.drop_pipe(self, debug=debug, **kw)
42
+ if hasattr(self.instance_connector, 'drop_pipe'):
43
+ result = self.instance_connector.drop_pipe(self, debug=debug, **kw)
44
+ else:
45
+ result = (
46
+ False,
47
+ (
48
+ "Cannot drop pipes for instance connectors of type "
49
+ f"'{self.instance_connector.type}'."
50
+ )
51
+ )
52
+
43
53
 
44
54
  _ = self.__dict__.pop('_exists', None)
45
55
  _ = self.__dict__.pop('_exists_timestamp', None)
46
56
 
47
57
  return result
58
+
59
+
60
+ def drop_indices(
61
+ self,
62
+ columns: Optional[List[str]] = None,
63
+ debug: bool = False,
64
+ **kw: Any
65
+ ) -> SuccessTuple:
66
+ """
67
+ Call the Pipe's instance connector's `drop_indices()` method.
68
+
69
+ Parameters
70
+ ----------
71
+ columns: Optional[List[str]] = None
72
+ If provided, only drop indices in the given list.
73
+
74
+ debug: bool, default False:
75
+ Verbosity toggle.
76
+
77
+ Returns
78
+ -------
79
+ A `SuccessTuple` of success, message.
80
+
81
+ """
82
+ from meerschaum.utils.warnings import warn
83
+ from meerschaum.utils.venv import Venv
84
+ from meerschaum.connectors import get_connector_plugin
85
+
86
+ _ = self.__dict__.pop('_columns_indices', None)
87
+ _ = self.__dict__.pop('_columns_indices_timestamp', None)
88
+ _ = self.__dict__.pop('_columns_types_timestamp', None)
89
+ _ = self.__dict__.pop('_columns_types', None)
90
+
91
+ if self.cache_pipe is not None:
92
+ _drop_cache_tuple = self.cache_pipe.drop_indices(columns=columns, debug=debug, **kw)
93
+ if not _drop_cache_tuple[0]:
94
+ warn(_drop_cache_tuple[1])
95
+
96
+ with Venv(get_connector_plugin(self.instance_connector)):
97
+ if hasattr(self.instance_connector, 'drop_pipe_indices'):
98
+ result = self.instance_connector.drop_pipe_indices(
99
+ self,
100
+ columns=columns,
101
+ debug=debug,
102
+ **kw
103
+ )
104
+ else:
105
+ result = (
106
+ False,
107
+ (
108
+ "Cannot drop indices for instance connectors of type "
109
+ f"'{self.instance_connector.type}'."
110
+ )
111
+ )
112
+
113
+ _ = self.__dict__.pop('_columns_indices', None)
114
+ _ = self.__dict__.pop('_columns_indices_timestamp', None)
115
+ _ = self.__dict__.pop('_columns_types_timestamp', None)
116
+ _ = self.__dict__.pop('_columns_types', None)
117
+
118
+ return result
@@ -84,6 +84,7 @@ def fetch(
84
84
  begin=_determine_begin(
85
85
  self,
86
86
  begin,
87
+ end,
87
88
  check_existing=check_existing,
88
89
  debug=debug,
89
90
  ),
@@ -136,6 +137,7 @@ def get_backtrack_interval(
136
137
  def _determine_begin(
137
138
  pipe: mrsm.Pipe,
138
139
  begin: Union[datetime, int, str, None] = '',
140
+ end: Union[datetime, int, None] = None,
139
141
  check_existing: bool = True,
140
142
  debug: bool = False,
141
143
  ) -> Union[datetime, int, None]:
@@ -157,6 +159,8 @@ def _determine_begin(
157
159
  """
158
160
  if begin != '':
159
161
  return begin
162
+ if end is not None:
163
+ return None
160
164
  sync_time = pipe.get_sync_time(debug=debug)
161
165
  if sync_time is None:
162
166
  return sync_time
@@ -0,0 +1,68 @@
1
+ #! /usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ # vim:fenc=utf-8
4
+
5
+ """
6
+ Index a pipe's table.
7
+ """
8
+
9
+ from __future__ import annotations
10
+ from meerschaum.utils.typing import SuccessTuple, Any, Optional, List
11
+
12
+
13
+ def create_indices(
14
+ self,
15
+ columns: Optional[List[str]] = None,
16
+ debug: bool = False,
17
+ **kw: Any
18
+ ) -> SuccessTuple:
19
+ """
20
+ Call the Pipe's instance connector's `create_pipe_indices()` method.
21
+
22
+ Parameters
23
+ ----------
24
+ debug: bool, default False:
25
+ Verbosity toggle.
26
+
27
+ Returns
28
+ -------
29
+ A `SuccessTuple` of success, message.
30
+
31
+ """
32
+ from meerschaum.utils.warnings import warn
33
+ from meerschaum.utils.venv import Venv
34
+ from meerschaum.connectors import get_connector_plugin
35
+
36
+ _ = self.__dict__.pop('_columns_indices', None)
37
+ _ = self.__dict__.pop('_columns_indices_timestamp', None)
38
+ _ = self.__dict__.pop('_columns_types_timestamp', None)
39
+ _ = self.__dict__.pop('_columns_types', None)
40
+
41
+ if self.cache_pipe is not None:
42
+ cache_success, cache_msg = self.cache_pipe.index(columns=columns, debug=debug, **kw)
43
+ if not cache_success:
44
+ warn(cache_msg)
45
+
46
+ with Venv(get_connector_plugin(self.instance_connector)):
47
+ if hasattr(self.instance_connector, 'create_pipe_indices'):
48
+ result = self.instance_connector.create_pipe_indices(
49
+ self,
50
+ columns=columns,
51
+ debug=debug,
52
+ **kw
53
+ )
54
+ else:
55
+ result = (
56
+ False,
57
+ (
58
+ "Cannot create indices for instance connectors of type "
59
+ f"'{self.instance_connector.type}'."
60
+ )
61
+ )
62
+
63
+ _ = self.__dict__.pop('_columns_indices', None)
64
+ _ = self.__dict__.pop('_columns_indices_timestamp', None)
65
+ _ = self.__dict__.pop('_columns_types_timestamp', None)
66
+ _ = self.__dict__.pop('_columns_types', None)
67
+
68
+ return result
@@ -292,7 +292,6 @@ def sync(
292
292
  message = '\n'.join([_message for _, _message in df])
293
293
  return success, message
294
294
 
295
- ### TODO: Depreciate async?
296
295
  if df is True:
297
296
  p._exists = None
298
297
  return True, f"{p} is being synced in parallel."
@@ -331,8 +330,7 @@ def sync(
331
330
  return (
332
331
  _chunk_success,
333
332
  (
334
- '\n'
335
- + self._get_chunk_label(_chunk, dt_col)
333
+ self._get_chunk_label(_chunk, dt_col)
336
334
  + '\n'
337
335
  + _chunk_msg
338
336
  )
@@ -341,17 +339,25 @@ def sync(
341
339
  results = sorted(
342
340
  [(chunk_success, chunk_msg)] + (
343
341
  list(pool.imap(_process_chunk, df))
344
- if not df_is_chunk_generator(chunk)
345
- else [
342
+ if (
343
+ not df_is_chunk_generator(chunk) # Handle nested generators.
344
+ and kw.get('workers', 1) != 1
345
+ )
346
+ else list(
346
347
  _process_chunk(_child_chunks)
347
348
  for _child_chunks in df
348
- ]
349
+ )
349
350
  )
350
351
  )
351
352
  chunk_messages = [chunk_msg for _, chunk_msg in results]
352
353
  success_bools = [chunk_success for chunk_success, _ in results]
353
354
  success = all(success_bools)
354
- msg = '\n'.join(chunk_messages)
355
+ msg = (
356
+ f'Synced {len(chunk_messages)} chunk'
357
+ + ('s' if len(chunk_messages) != 1 else '')
358
+ + f' to {p}:\n\n'
359
+ + '\n\n'.join(chunk_messages).lstrip().rstrip()
360
+ ).lstrip().rstrip()
355
361
 
356
362
  ### If some chunks succeeded, retry the failures.
357
363
  retry_success = True
@@ -432,7 +438,7 @@ def sync(
432
438
 
433
439
  if blocking:
434
440
  self._exists = None
435
- return _sync(self, df = df)
441
+ return _sync(self, df=df)
436
442
 
437
443
  from meerschaum.utils.threading import Thread
438
444
  def default_callback(result_tuple: SuccessTuple):
@@ -821,6 +827,7 @@ def filter_existing(
821
827
  for col, typ in self_dtypes.items()
822
828
  },
823
829
  safe_copy=safe_copy,
830
+ coerce_mixed_numerics=(not self.static),
824
831
  debug=debug
825
832
  ),
826
833
  on_cols_dtypes,
@@ -962,7 +969,7 @@ def _persist_new_numeric_columns(self, df, debug: bool = False) -> SuccessTuple:
962
969
  """
963
970
  from meerschaum.utils.dataframe import get_numeric_cols
964
971
  numeric_cols = get_numeric_cols(df)
965
- existing_numeric_cols = [col for col, typ in self.dtypes.items() if typ == 'numeric']
972
+ existing_numeric_cols = [col for col, typ in self.dtypes.items() if typ.startswith('numeric')]
966
973
  new_numeric_cols = [col for col in numeric_cols if col not in existing_numeric_cols]
967
974
  if not new_numeric_cols:
968
975
  return True, "Success"
@@ -774,9 +774,16 @@ class Daemon:
774
774
  if '_process' not in self.__dict__ or self.__dict__['_process'].pid != int(pid):
775
775
  try:
776
776
  self._process = psutil.Process(int(pid))
777
+ process_exists = True
777
778
  except Exception:
778
- if self.pid_path.exists():
779
- self.pid_path.unlink()
779
+ process_exists = False
780
+ if not process_exists:
781
+ _ = self.__dict__.pop('_process', None)
782
+ try:
783
+ if self.pid_path.exists():
784
+ self.pid_path.unlink()
785
+ except Exception:
786
+ pass
780
787
  return None
781
788
  return self._process
782
789
 
@@ -13,11 +13,10 @@ import pathlib
13
13
  import traceback
14
14
  import sys
15
15
  import atexit
16
- from datetime import datetime, timezone, timedelta
17
- from typing import List, Union, Optional, Tuple
16
+ from datetime import datetime, timezone
17
+ from typing import List, Optional, Tuple
18
18
  from meerschaum.config import get_config
19
19
  from meerschaum.utils.warnings import warn
20
- from meerschaum.utils.misc import round_time
21
20
  from meerschaum.utils.daemon.FileDescriptorInterceptor import FileDescriptorInterceptor
22
21
  from meerschaum.utils.threading import Thread
23
22
  import meerschaum as mrsm
@@ -517,6 +516,7 @@ class RotatingFile(io.IOBase):
517
516
  else 0
518
517
  )
519
518
 
519
+ subfile_lines = []
520
520
  if (
521
521
  subfile_index in self.subfile_objects
522
522
  and
@@ -85,6 +85,7 @@ def filter_unseen_df(
85
85
  safe_copy: bool = True,
86
86
  dtypes: Optional[Dict[str, Any]] = None,
87
87
  include_unchanged_columns: bool = False,
88
+ coerce_mixed_numerics: bool = True,
88
89
  debug: bool = False,
89
90
  ) -> 'pd.DataFrame':
90
91
  """
@@ -108,6 +109,10 @@ def filter_unseen_df(
108
109
  include_unchanged_columns: bool, default False
109
110
  If `True`, include columns which haven't changed on rows which have changed.
110
111
 
112
+ coerce_mixed_numerics: bool, default True
113
+ If `True`, cast mixed integer and float columns between the old and new dataframes into
114
+ numeric values (`decimal.Decimal`).
115
+
111
116
  debug: bool, default False
112
117
  Verbosity toggle.
113
118
 
@@ -138,7 +143,6 @@ def filter_unseen_df(
138
143
  import json
139
144
  import functools
140
145
  import traceback
141
- from decimal import Decimal
142
146
  from meerschaum.utils.warnings import warn
143
147
  from meerschaum.utils.packages import import_pandas, attempt_import
144
148
  from meerschaum.utils.dtypes import (
@@ -148,7 +152,9 @@ def filter_unseen_df(
148
152
  attempt_cast_to_uuid,
149
153
  attempt_cast_to_bytes,
150
154
  coerce_timezone,
155
+ serialize_decimal,
151
156
  )
157
+ from meerschaum.utils.dtypes.sql import get_numeric_precision_scale
152
158
  pd = import_pandas(debug=debug)
153
159
  is_dask = 'dask' in new_df.__module__
154
160
  if is_dask:
@@ -211,6 +217,12 @@ def filter_unseen_df(
211
217
  if col not in dtypes:
212
218
  dtypes[col] = typ
213
219
 
220
+ numeric_cols_precisions_scales = {
221
+ col: get_numeric_precision_scale(None, typ)
222
+ for col, typ in dtypes.items()
223
+ if col and typ and typ.startswith('numeric')
224
+ }
225
+
214
226
  dt_dtypes = {
215
227
  col: typ
216
228
  for col, typ in dtypes.items()
@@ -259,6 +271,8 @@ def filter_unseen_df(
259
271
  old_is_numeric = col in old_numeric_cols
260
272
 
261
273
  if (
274
+ coerce_mixed_numerics
275
+ and
262
276
  (new_is_float or new_is_int or new_is_numeric)
263
277
  and
264
278
  (old_is_float or old_is_int or old_is_numeric)
@@ -300,13 +314,9 @@ def filter_unseen_df(
300
314
  new_numeric_cols = get_numeric_cols(new_df)
301
315
  numeric_cols = set(new_numeric_cols + old_numeric_cols)
302
316
  for numeric_col in old_numeric_cols:
303
- old_df[numeric_col] = old_df[numeric_col].apply(
304
- lambda x: f'{x:f}' if isinstance(x, Decimal) else x
305
- )
317
+ old_df[numeric_col] = old_df[numeric_col].apply(serialize_decimal)
306
318
  for numeric_col in new_numeric_cols:
307
- new_df[numeric_col] = new_df[numeric_col].apply(
308
- lambda x: f'{x:f}' if isinstance(x, Decimal) else x
309
- )
319
+ new_df[numeric_col] = new_df[numeric_col].apply(serialize_decimal)
310
320
 
311
321
  old_dt_cols = [
312
322
  col
@@ -361,7 +371,14 @@ def filter_unseen_df(
361
371
  if numeric_col not in delta_df.columns:
362
372
  continue
363
373
  try:
364
- delta_df[numeric_col] = delta_df[numeric_col].apply(attempt_cast_to_numeric)
374
+ delta_df[numeric_col] = delta_df[numeric_col].apply(
375
+ functools.partial(
376
+ attempt_cast_to_numeric,
377
+ quantize=True,
378
+ precision=numeric_cols_precisions_scales.get(numeric_col, (None, None)[0]),
379
+ scale=numeric_cols_precisions_scales.get(numeric_col, (None, None)[1]),
380
+ )
381
+ )
365
382
  except Exception:
366
383
  warn(f"Unable to parse numeric column '{numeric_col}':\n{traceback.format_exc()}")
367
384
 
@@ -882,6 +899,7 @@ def enforce_dtypes(
882
899
  The Pandas DataFrame with the types enforced.
883
900
  """
884
901
  import json
902
+ import functools
885
903
  from meerschaum.utils.debug import dprint
886
904
  from meerschaum.utils.formatting import pprint
887
905
  from meerschaum.utils.dtypes import (
@@ -893,6 +911,7 @@ def enforce_dtypes(
893
911
  attempt_cast_to_bytes,
894
912
  coerce_timezone as _coerce_timezone,
895
913
  )
914
+ from meerschaum.utils.dtypes.sql import get_numeric_precision_scale
896
915
  pandas = mrsm.attempt_import('pandas')
897
916
  is_dask = 'dask' in df.__module__
898
917
  if safe_copy:
@@ -914,7 +933,7 @@ def enforce_dtypes(
914
933
  numeric_cols = [
915
934
  col
916
935
  for col, typ in dtypes.items()
917
- if typ == 'numeric'
936
+ if typ.startswith('numeric')
918
937
  ]
919
938
  uuid_cols = [
920
939
  col
@@ -961,9 +980,17 @@ def enforce_dtypes(
961
980
  if debug:
962
981
  dprint(f"Checking for numerics: {numeric_cols}")
963
982
  for col in numeric_cols:
983
+ precision, scale = get_numeric_precision_scale(None, dtypes.get(col, ''))
964
984
  if col in df.columns:
965
985
  try:
966
- df[col] = df[col].apply(attempt_cast_to_numeric)
986
+ df[col] = df[col].apply(
987
+ functools.partial(
988
+ attempt_cast_to_numeric,
989
+ quantize=True,
990
+ precision=precision,
991
+ scale=scale,
992
+ )
993
+ )
967
994
  except Exception as e:
968
995
  if debug:
969
996
  dprint(f"Unable to parse column '{col}' as NUMERIC:\n{e}")
@@ -1040,7 +1067,7 @@ def enforce_dtypes(
1040
1067
  previous_typ = common_dtypes[col]
1041
1068
  mixed_numeric_types = (is_dtype_numeric(typ) and is_dtype_numeric(previous_typ))
1042
1069
  explicitly_float = are_dtypes_equal(dtypes.get(col, 'object'), 'float')
1043
- explicitly_numeric = dtypes.get(col, 'numeric') == 'numeric'
1070
+ explicitly_numeric = dtypes.get(col, 'numeric').startswith('numeric')
1044
1071
  cast_to_numeric = (
1045
1072
  explicitly_numeric
1046
1073
  or col in df_numeric_cols
@@ -1574,16 +1601,19 @@ def to_json(
1574
1601
  A JSON string.
1575
1602
  """
1576
1603
  from meerschaum.utils.packages import import_pandas
1577
- from meerschaum.utils.dtypes import serialize_bytes
1604
+ from meerschaum.utils.dtypes import serialize_bytes, serialize_decimal
1578
1605
  pd = import_pandas()
1579
1606
  uuid_cols = get_uuid_cols(df)
1580
1607
  bytes_cols = get_bytes_cols(df)
1608
+ numeric_cols = get_numeric_cols(df)
1581
1609
  if safe_copy and bool(uuid_cols or bytes_cols):
1582
1610
  df = df.copy()
1583
1611
  for col in uuid_cols:
1584
1612
  df[col] = df[col].astype(str)
1585
1613
  for col in bytes_cols:
1586
1614
  df[col] = df[col].apply(serialize_bytes)
1615
+ for col in numeric_cols:
1616
+ df[col] = df[col].apply(serialize_decimal)
1587
1617
  return df.infer_objects(copy=False).fillna(pd.NA).to_json(
1588
1618
  date_format=date_format,
1589
1619
  date_unit=date_unit,