meerschaum 2.0.0rc7__py3-none-any.whl → 2.0.0rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. meerschaum/actions/__init__.py +97 -48
  2. meerschaum/actions/bootstrap.py +1 -1
  3. meerschaum/actions/clear.py +1 -1
  4. meerschaum/actions/deduplicate.py +1 -1
  5. meerschaum/actions/delete.py +8 -7
  6. meerschaum/actions/drop.py +1 -10
  7. meerschaum/actions/edit.py +1 -1
  8. meerschaum/actions/install.py +1 -1
  9. meerschaum/actions/pause.py +1 -1
  10. meerschaum/actions/register.py +1 -1
  11. meerschaum/actions/setup.py +1 -1
  12. meerschaum/actions/show.py +1 -1
  13. meerschaum/actions/start.py +18 -7
  14. meerschaum/actions/stop.py +5 -4
  15. meerschaum/actions/sync.py +3 -1
  16. meerschaum/actions/uninstall.py +1 -1
  17. meerschaum/actions/upgrade.py +1 -1
  18. meerschaum/actions/verify.py +54 -3
  19. meerschaum/config/_formatting.py +26 -0
  20. meerschaum/config/_jobs.py +28 -5
  21. meerschaum/config/_paths.py +21 -5
  22. meerschaum/config/_version.py +1 -1
  23. meerschaum/connectors/api/_fetch.py +1 -1
  24. meerschaum/connectors/api/_pipes.py +6 -11
  25. meerschaum/connectors/sql/_fetch.py +29 -11
  26. meerschaum/core/Pipe/_deduplicate.py +39 -23
  27. meerschaum/core/Pipe/_dtypes.py +2 -1
  28. meerschaum/core/Pipe/_verify.py +59 -24
  29. meerschaum/plugins/__init__.py +3 -0
  30. meerschaum/utils/daemon/Daemon.py +108 -27
  31. meerschaum/utils/daemon/__init__.py +35 -1
  32. meerschaum/utils/formatting/__init__.py +144 -1
  33. meerschaum/utils/formatting/_pipes.py +28 -5
  34. meerschaum/utils/misc.py +183 -187
  35. meerschaum/utils/packages/__init__.py +1 -1
  36. meerschaum/utils/packages/_packages.py +1 -0
  37. {meerschaum-2.0.0rc7.dist-info → meerschaum-2.0.0rc8.dist-info}/METADATA +4 -1
  38. {meerschaum-2.0.0rc7.dist-info → meerschaum-2.0.0rc8.dist-info}/RECORD +44 -44
  39. {meerschaum-2.0.0rc7.dist-info → meerschaum-2.0.0rc8.dist-info}/LICENSE +0 -0
  40. {meerschaum-2.0.0rc7.dist-info → meerschaum-2.0.0rc8.dist-info}/NOTICE +0 -0
  41. {meerschaum-2.0.0rc7.dist-info → meerschaum-2.0.0rc8.dist-info}/WHEEL +0 -0
  42. {meerschaum-2.0.0rc7.dist-info → meerschaum-2.0.0rc8.dist-info}/entry_points.txt +0 -0
  43. {meerschaum-2.0.0rc7.dist-info → meerschaum-2.0.0rc8.dist-info}/top_level.txt +0 -0
  44. {meerschaum-2.0.0rc7.dist-info → meerschaum-2.0.0rc8.dist-info}/zip-safe +0 -0
@@ -7,6 +7,9 @@ Register or fetch Pipes from the API
7
7
  """
8
8
 
9
9
  from __future__ import annotations
10
+ import time
11
+ import json
12
+ from io import StringIO
10
13
  from datetime import datetime
11
14
  from meerschaum.utils.debug import dprint
12
15
  from meerschaum.utils.warnings import warn, error
@@ -123,7 +126,6 @@ def fetch_pipes_keys(
123
126
  A list of tuples containing pipes' keys.
124
127
  """
125
128
  from meerschaum.config.static import STATIC_CONFIG
126
- import json
127
129
  if connector_keys is None:
128
130
  connector_keys = []
129
131
  if metric_keys is None:
@@ -169,7 +171,6 @@ def sync_pipe(
169
171
  from meerschaum.utils.misc import json_serialize_datetime
170
172
  from meerschaum.config import get_config
171
173
  from meerschaum.utils.packages import attempt_import
172
- import json, time
173
174
  begin = time.time()
174
175
  more_itertools = attempt_import('more_itertools')
175
176
  if df is None:
@@ -310,7 +311,6 @@ def get_pipe_data(
310
311
  **kw: Any
311
312
  ) -> Union[pandas.DataFrame, None]:
312
313
  """Fetch data from the API."""
313
- import json
314
314
  r_url = pipe_r_url(pipe)
315
315
  chunks_list = []
316
316
  while True:
@@ -340,7 +340,7 @@ def get_pipe_data(
340
340
  from meerschaum.utils.dataframe import parse_df_datetimes
341
341
  pd = import_pandas()
342
342
  try:
343
- df = pd.read_json(response.text)
343
+ df = pd.read_json(StringIO(response.text))
344
344
  except Exception as e:
345
345
  warn(f"Failed to parse response for {pipe}:\n{e}")
346
346
  return None
@@ -367,7 +367,6 @@ def get_backtrack_data(
367
367
  **kw: Any,
368
368
  ) -> pandas.DataFrame:
369
369
  """Get a Pipe's backtrack data from the API."""
370
- import json
371
370
  r_url = pipe_r_url(pipe)
372
371
  try:
373
372
  response = self.get(
@@ -389,12 +388,12 @@ def get_backtrack_data(
389
388
  dprint(response.text)
390
389
  pd = import_pandas()
391
390
  try:
392
- df = pd.read_json(response.text)
391
+ df = pd.read_json(StringIO(response.text))
393
392
  except Exception as e:
394
393
  warn(f"Failed to read response into a dataframe:\n{e}")
395
394
  return None
396
395
 
397
- df = parse_df_datetimes(pd.read_json(response.text), debug=debug)
396
+ df = parse_df_datetimes(pd.read_json(StringIO(response.text)), debug=debug)
398
397
  return df
399
398
 
400
399
  def get_pipe_id(
@@ -438,7 +437,6 @@ def get_pipe_attributes(
438
437
  """
439
438
  r_url = pipe_r_url(pipe)
440
439
  response = self.get(r_url + '/attributes', debug=debug)
441
- import json
442
440
  try:
443
441
  return json.loads(response.text)
444
442
  except Exception as e:
@@ -474,7 +472,6 @@ def get_sync_time(
474
472
  """
475
473
  from meerschaum.utils.misc import is_int
476
474
  from meerschaum.utils.warnings import warn
477
- import datetime, json
478
475
  r_url = pipe_r_url(pipe)
479
476
  response = self.get(
480
477
  r_url + '/sync_time',
@@ -545,7 +542,6 @@ def create_metadata(
545
542
  """
546
543
  from meerschaum.utils.debug import dprint
547
544
  from meerschaum.config.static import STATIC_CONFIG
548
- import json
549
545
  r_url = STATIC_CONFIG['api']['endpoints']['metadata']
550
546
  response = self.post(r_url, debug=debug)
551
547
  if debug:
@@ -590,7 +586,6 @@ def get_pipe_rowcount(
590
586
  The number of rows in the pipe's table, bound the given parameters.
591
587
  If the table does not exist, return 0.
592
588
  """
593
- import json
594
589
  r_url = pipe_r_url(pipe)
595
590
  response = self.get(
596
591
  r_url + "/rowcount",
@@ -148,7 +148,7 @@ def get_pipe_metadef(
148
148
  dt_name = sql_item_name(_dt, self.flavor)
149
149
  is_guess = False
150
150
 
151
- if begin is not None or end is not None:
151
+ if begin not in (None, '') or end is not None:
152
152
  if is_guess:
153
153
  if _dt is None:
154
154
  warn(
@@ -168,20 +168,38 @@ def get_pipe_metadef(
168
168
  if 'order by' in definition.lower() and 'over' not in definition.lower():
169
169
  error("Cannot fetch with an ORDER clause in the definition")
170
170
 
171
+ apply_backtrack = begin == ''
171
172
  begin = (
172
- begin if not (isinstance(begin, str) and begin == '')
173
- else pipe.get_sync_time(debug=debug)
173
+ pipe.get_sync_time(debug=debug)
174
+ if begin == ''
175
+ else begin
174
176
  )
175
-
177
+
178
+ if begin and end and begin >= end:
179
+ begin = None
180
+
176
181
  da = None
177
182
  if dt_name:
178
- ### default: do not backtrack
179
- begin_da = dateadd_str(
180
- flavor=self.flavor, datepart='minute', number=(-1 * btm), begin=begin,
181
- ) if begin else None
182
- end_da = dateadd_str(
183
- flavor=self.flavor, datepart='minute', number=1, begin=end,
184
- ) if end else None
183
+ begin_da = (
184
+ dateadd_str(
185
+ flavor = self.flavor,
186
+ datepart = 'minute',
187
+ number = ((-1 * btm) if apply_backtrack else 0),
188
+ begin = begin,
189
+ )
190
+ if begin
191
+ else None
192
+ )
193
+ end_da = (
194
+ dateadd_str(
195
+ flavor = self.flavor,
196
+ datepart = 'minute',
197
+ number = 0,
198
+ begin = end,
199
+ )
200
+ if end
201
+ else None
202
+ )
185
203
 
186
204
  meta_def = (
187
205
  _simple_fetch_query(pipe) if (
@@ -65,6 +65,7 @@ def deduplicate(
65
65
  A `SuccessTuple` corresponding to whether all of the chunks were successfully deduplicated.
66
66
  """
67
67
  from meerschaum.utils.warnings import warn, info
68
+ from meerschaum.utils.misc import interval_str, items_str
68
69
  from meerschaum.utils.venv import Venv
69
70
  from meerschaum.connectors import get_connector_plugin
70
71
  from meerschaum.utils.pool import get_pool
@@ -74,6 +75,7 @@ def deduplicate(
74
75
  begin = begin,
75
76
  end = end,
76
77
  params = params,
78
+ bounded = bounded,
77
79
  debug = debug,
78
80
  **kwargs
79
81
  )
@@ -90,6 +92,7 @@ def deduplicate(
90
92
  begin = begin,
91
93
  end = end,
92
94
  params = params,
95
+ bounded = bounded,
93
96
  debug = debug,
94
97
  **kwargs
95
98
  )
@@ -104,8 +107,18 @@ def deduplicate(
104
107
  begin = (
105
108
  bound_time
106
109
  if bound_time is not None
107
- else self.get_sync_time(debug=debug)
110
+ else self.get_sync_time(newest=False, debug=debug)
108
111
  )
112
+ if bounded and end is None:
113
+ end = self.get_sync_time(newest=True, debug=debug)
114
+
115
+ if bounded and end is not None:
116
+ end += (
117
+ timedelta(minutes=1)
118
+ if isinstance(end, datetime)
119
+ else 1
120
+ )
121
+
109
122
  chunk_bounds = self.get_chunk_bounds(
110
123
  bounded = bounded,
111
124
  begin = begin,
@@ -115,6 +128,8 @@ def deduplicate(
115
128
  )
116
129
 
117
130
  indices = [col for col in self.columns.values() if col]
131
+ if not indices:
132
+ return False, f"Cannot deduplicate without index columns."
118
133
  dt_col = self.columns.get('datetime', None)
119
134
 
120
135
  def process_chunk_bounds(bounds) -> Tuple[
@@ -155,7 +170,15 @@ def deduplicate(
155
170
  return bounds, (True, f"{chunk_msg_header}\nChunk is empty, skipping...")
156
171
 
157
172
  chunk_indices = [ix for ix in indices if ix in full_chunk.columns]
158
- full_chunk = full_chunk.drop_duplicates(subset=chunk_indices, keep='last')
173
+ if not chunk_indices:
174
+ return bounds, (False, f"None of {items_str(indices)} were present in chunk.")
175
+ try:
176
+ full_chunk = full_chunk.drop_duplicates(subset=chunk_indices, keep='last')
177
+ except Exception as e:
178
+ return (
179
+ bounds,
180
+ (False, f"Failed to deduplicate chunk on {items_str(chunk_indices)}:\n({e})")
181
+ )
159
182
 
160
183
  clear_success, clear_msg = self.clear(
161
184
  begin = chunk_begin,
@@ -192,19 +215,16 @@ def deduplicate(
192
215
  True, (
193
216
  chunk_msg_header + "\n"
194
217
  + chunk_msg_body + ("\n" if chunk_msg_body else '')
195
- + f"Chunk succesfully deduplicated to {chunk_rowcount} rows."
218
+ + f"Deduplicated chunk from {existing_chunk_len} to {chunk_rowcount} rows."
196
219
  )
197
220
  )
198
221
 
199
- _start = chunk_bounds[0][(0 if bounded else 1)]
200
- _end = chunk_bounds[-1][(0 if not bounded else 1)]
201
- message_header = f"{_start} - {_end}"
202
222
  info(
203
223
  f"Deduplicating {len(chunk_bounds)} chunk"
204
224
  + ('s' if len(chunk_bounds) != 1 else '')
205
225
  + f" ({'un' if not bounded else ''}bounded)"
206
- + f" of size '{chunk_interval}'"
207
- + f" from '{_start}' to '{_end}'..."
226
+ + f" of size '{interval_str(chunk_interval)}'"
227
+ + f" on {self}."
208
228
  )
209
229
  bounds_success_tuples = dict(pool.map(process_chunk_bounds, chunk_bounds))
210
230
  bounds_successes = {
@@ -223,11 +243,10 @@ def deduplicate(
223
243
  return (
224
244
  False,
225
245
  (
226
- message_header + "\n"
227
- + f"Failed to deduplicate {len(bounds_failures)} chunk"
246
+ f"Failed to deduplicate {len(bounds_failures)} chunk"
228
247
  + ('s' if len(bounds_failures) != 1 else '')
229
- + ":\n"
230
- + "\n".join([msg for _, (_, msg) in bounds_failures.items()])
248
+ + ".\n"
249
+ + "\n".join([msg for _, (_, msg) in bounds_failures.items() if msg])
231
250
  )
232
251
  )
233
252
 
@@ -236,11 +255,10 @@ def deduplicate(
236
255
  return (
237
256
  True,
238
257
  (
239
- message_header + "\n"
240
- + f"Successfully deduplicated {len(bounds_successes)} chunk"
258
+ f"Successfully deduplicated {len(bounds_successes)} chunk"
241
259
  + ('s' if len(bounds_successes) != 1 else '')
242
260
  + ".\n"
243
- + "\n".join([msg for _, (_, msg) in bounds_successes.items()])
261
+ + "\n".join([msg for _, (_, msg) in bounds_successes.items() if msg])
244
262
  ).rstrip('\n')
245
263
  )
246
264
 
@@ -262,21 +280,19 @@ def deduplicate(
262
280
  return (
263
281
  True,
264
282
  (
265
- message_header + "\n"
266
- + f"Successfully deduplicated {len(bounds_successes)} chunk"
283
+ f"Successfully deduplicated {len(bounds_successes)} chunk"
267
284
  + ('s' if len(bounds_successes) != 1 else '')
268
- + f" ({len(retry_bounds_successes)} retried):\n"
269
- + "\n".join([msg for _, (_, msg) in bounds_successes.items()])
285
+ + f"({len(retry_bounds_successes)} retried):\n"
286
+ + "\n".join([msg for _, (_, msg) in bounds_successes.items() if msg])
270
287
  ).rstrip('\n')
271
288
  )
272
289
 
273
290
  return (
274
291
  False,
275
292
  (
276
- message_header + "\n"
277
- + f"Failed to deduplicate {len(bounds_failures)} chunk"
293
+ f"Failed to deduplicate {len(bounds_failures)} chunk"
278
294
  + ('s' if len(retry_bounds_failures) != 1 else '')
279
- + ":\n"
280
- + "\n".join([msg for _, (_, msg) in retry_bounds_failures.items()])
295
+ + ".\n"
296
+ + "\n".join([msg for _, (_, msg) in retry_bounds_failures.items() if msg])
281
297
  ).rstrip('\n')
282
298
  )
@@ -7,6 +7,7 @@ Enforce data types for a pipe's underlying table.
7
7
  """
8
8
 
9
9
  from __future__ import annotations
10
+ from io import StringIO
10
11
  from meerschaum.utils.typing import Dict, Any, Optional
11
12
 
12
13
  def enforce_dtypes(
@@ -38,7 +39,7 @@ def enforce_dtypes(
38
39
  try:
39
40
  if isinstance(df, str):
40
41
  df = parse_df_datetimes(
41
- pd.read_json(df),
42
+ pd.read_json(StringIO(df)),
42
43
  ignore_cols = [
43
44
  col
44
45
  for col, dtype in pipe_dtypes.items()
@@ -62,6 +62,7 @@ def verify(
62
62
  A SuccessTuple indicating whether the pipe was successfully resynced.
63
63
  """
64
64
  from meerschaum.utils.pool import get_pool
65
+ from meerschaum.utils.misc import interval_str
65
66
  workers = self.get_num_workers(workers)
66
67
 
67
68
  ### Skip configured bounding in parameters
@@ -74,16 +75,16 @@ def verify(
74
75
  if bounded is None:
75
76
  bounded = bound_time is not None
76
77
 
77
- if begin is None:
78
+ if bounded and begin is None:
78
79
  begin = (
79
80
  bound_time
80
81
  if bound_time is not None
81
82
  else self.get_sync_time(newest=False, debug=debug)
82
83
  )
83
- if end is None:
84
+ if bounded and end is None:
84
85
  end = self.get_sync_time(newest=True, debug=debug)
85
86
 
86
- if bounded:
87
+ if bounded and end is not None:
87
88
  end += (
88
89
  timedelta(minutes=1)
89
90
  if isinstance(end, datetime)
@@ -93,13 +94,7 @@ def verify(
93
94
  sync_less_than_begin = not bounded and begin is None
94
95
  sync_greater_than_end = not bounded and end is None
95
96
 
96
- cannot_determine_bounds = (
97
- begin is None
98
- or
99
- end is None
100
- or
101
- not self.exists(debug=debug)
102
- )
97
+ cannot_determine_bounds = not self.exists(debug=debug)
103
98
 
104
99
  if cannot_determine_bounds:
105
100
  sync_success, sync_msg = self.sync(
@@ -146,21 +141,48 @@ def verify(
146
141
  )
147
142
  return True, f"Could not determine chunks between '{begin}' and '{end}'; nothing to do."
148
143
 
144
+ begin_to_print = (
145
+ begin
146
+ if begin is not None
147
+ else (
148
+ chunk_bounds[0][0]
149
+ if bounded
150
+ else chunk_bounds[0][1]
151
+ )
152
+ )
153
+ end_to_print = (
154
+ end
155
+ if end is not None
156
+ else (
157
+ chunk_bounds[-1][1]
158
+ if bounded
159
+ else chunk_bounds[-1][0]
160
+ )
161
+ )
162
+
149
163
  info(
150
164
  f"Syncing {len(chunk_bounds)} chunk" + ('s' if len(chunk_bounds) != 1 else '')
151
165
  + f" ({'un' if not bounded else ''}bounded)"
152
- + f" of size '{chunk_interval}'"
153
- + f" between '{begin}' and '{end}'."
166
+ + f" of size '{interval_str(chunk_interval)}'"
167
+ + f" between '{begin_to_print}' and '{end_to_print}'."
154
168
  )
155
169
 
156
170
  pool = get_pool(workers=workers)
157
171
 
172
+ ### Dictionary of the form bounds -> success_tuple, e.g.:
173
+ ### {
174
+ ### (2023-01-01, 2023-01-02): (True, "Success")
175
+ ### }
176
+ bounds_success_tuples = {}
158
177
  def process_chunk_bounds(
159
178
  chunk_begin_and_end: Tuple[
160
179
  Union[int, datetime],
161
180
  Union[int, datetime]
162
181
  ]
163
182
  ):
183
+ if chunk_begin_and_end in bounds_success_tuples:
184
+ return chunk_begin_and_end, bounds_success_tuples[chunk_begin_and_end]
185
+
164
186
  chunk_begin, chunk_end = chunk_begin_and_end
165
187
  return chunk_begin_and_end, self.sync(
166
188
  begin = chunk_begin,
@@ -171,11 +193,22 @@ def verify(
171
193
  **kwargs
172
194
  )
173
195
 
174
- ### Dictionary of the form bounds -> success_tuple, e.g.:
175
- ### {
176
- ### (2023-01-01, 2023-01-02): (True, "Success")
177
- ### }
178
- bounds_success_tuples = dict(pool.map(process_chunk_bounds, chunk_bounds))
196
+ ### If we have more than one chunk, attempt to sync the first one and return if its fails.
197
+ if len(chunk_bounds) > 1:
198
+ first_chunk_bounds = chunk_bounds[0]
199
+ (
200
+ (first_begin, first_end),
201
+ (first_success, first_msg)
202
+ ) = process_chunk_bounds(first_chunk_bounds)
203
+ if not first_success:
204
+ return (
205
+ first_success,
206
+ f"\n{first_begin} - {first_end}\n"
207
+ + f"Failed to sync first chunk:\n{first_msg}"
208
+ )
209
+ bounds_success_tuples[first_chunk_bounds] = (first_success, first_msg)
210
+
211
+ bounds_success_tuples.update(dict(pool.map(process_chunk_bounds, chunk_bounds)))
179
212
  bounds_success_bools = {bounds: tup[0] for bounds, tup in bounds_success_tuples.items()}
180
213
 
181
214
  message_header = f"{begin} - {end}"
@@ -195,18 +228,19 @@ def verify(
195
228
 
196
229
  chunk_bounds_to_resync = [
197
230
  bounds
198
- for bounds, success in zip(chunk_bounds, chunk_success_bools)
231
+ for bounds, success in zip(chunk_bounds, bounds_success_bools)
199
232
  if not success
200
233
  ]
201
234
  bounds_to_print = [
202
235
  f"{bounds[0]} - {bounds[1]}"
203
236
  for bounds in chunk_bounds_to_resync
204
237
  ]
205
- warn(
206
- f"Will resync the following failed chunks:\n "
207
- + '\n '.join(bounds_to_print),
208
- stack = False,
209
- )
238
+ if bounds_to_print:
239
+ warn(
240
+ f"Will resync the following failed chunks:\n "
241
+ + '\n '.join(bounds_to_print),
242
+ stack = False,
243
+ )
210
244
 
211
245
  retry_bounds_success_tuples = dict(pool.map(process_chunk_bounds, chunk_bounds_to_resync))
212
246
  bounds_success_tuples.update(retry_bounds_success_tuples)
@@ -289,7 +323,8 @@ def get_chunks_success_message(
289
323
  ''
290
324
  if num_fails == 0
291
325
  else (
292
- f"\n\nFailed to sync {num_fails} chunks:\n"
326
+ f"\n\nFailed to sync {num_fails} chunk"
327
+ + ('s' if num_fails != 1 else '') + ":\n"
293
328
  + '\n'.join([
294
329
  f"{fail_begin} - {fail_end}\n{msg}\n"
295
330
  for (fail_begin, fail_end), (_, msg) in fail_chunk_bounds_tuples.items()
@@ -254,6 +254,9 @@ def sync_plugins_symlinks(debug: bool = False, warn: bool = True) -> None:
254
254
  try:
255
255
  if PLUGINS_INTERNAL_LOCK_PATH.exists():
256
256
  PLUGINS_INTERNAL_LOCK_PATH.unlink()
257
+ ### Sometimes competing threads will delete the lock file at the same time.
258
+ except FileNotFoundError:
259
+ pass
257
260
  except Exception as e:
258
261
  if warn:
259
262
  _warn(f"Error cleaning up lockfile {PLUGINS_INTERNAL_LOCK_PATH}:\n{e}")