meerschaum 2.0.0rc6__py3-none-any.whl → 2.0.0rc8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/_internal/arguments/_parse_arguments.py +12 -1
- meerschaum/_internal/arguments/_parser.py +23 -1
- meerschaum/actions/__init__.py +97 -48
- meerschaum/actions/bootstrap.py +1 -1
- meerschaum/actions/clear.py +1 -1
- meerschaum/actions/deduplicate.py +1 -1
- meerschaum/actions/delete.py +8 -7
- meerschaum/actions/drop.py +1 -10
- meerschaum/actions/edit.py +1 -1
- meerschaum/actions/install.py +1 -1
- meerschaum/actions/pause.py +1 -1
- meerschaum/actions/register.py +1 -1
- meerschaum/actions/setup.py +1 -1
- meerschaum/actions/show.py +1 -1
- meerschaum/actions/start.py +18 -7
- meerschaum/actions/stop.py +5 -4
- meerschaum/actions/sync.py +17 -2
- meerschaum/actions/uninstall.py +1 -1
- meerschaum/actions/upgrade.py +1 -1
- meerschaum/actions/verify.py +54 -3
- meerschaum/config/_default.py +71 -65
- meerschaum/config/_formatting.py +26 -0
- meerschaum/config/_jobs.py +28 -5
- meerschaum/config/_paths.py +21 -5
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/api/_fetch.py +1 -1
- meerschaum/connectors/api/_pipes.py +6 -11
- meerschaum/connectors/sql/_fetch.py +29 -11
- meerschaum/connectors/sql/_pipes.py +11 -4
- meerschaum/connectors/sql/_sql.py +1 -6
- meerschaum/core/Pipe/__init__.py +5 -1
- meerschaum/core/Pipe/_data.py +58 -9
- meerschaum/core/Pipe/_deduplicate.py +61 -11
- meerschaum/core/Pipe/_dtypes.py +2 -1
- meerschaum/core/Pipe/_verify.py +174 -34
- meerschaum/plugins/__init__.py +3 -0
- meerschaum/utils/daemon/Daemon.py +108 -27
- meerschaum/utils/daemon/__init__.py +35 -1
- meerschaum/utils/dataframe.py +10 -5
- meerschaum/utils/formatting/__init__.py +144 -1
- meerschaum/utils/formatting/_pipes.py +28 -5
- meerschaum/utils/misc.py +183 -187
- meerschaum/utils/packages/__init__.py +1 -1
- meerschaum/utils/packages/_packages.py +1 -0
- {meerschaum-2.0.0rc6.dist-info → meerschaum-2.0.0rc8.dist-info}/METADATA +4 -1
- {meerschaum-2.0.0rc6.dist-info → meerschaum-2.0.0rc8.dist-info}/RECORD +52 -52
- {meerschaum-2.0.0rc6.dist-info → meerschaum-2.0.0rc8.dist-info}/LICENSE +0 -0
- {meerschaum-2.0.0rc6.dist-info → meerschaum-2.0.0rc8.dist-info}/NOTICE +0 -0
- {meerschaum-2.0.0rc6.dist-info → meerschaum-2.0.0rc8.dist-info}/WHEEL +0 -0
- {meerschaum-2.0.0rc6.dist-info → meerschaum-2.0.0rc8.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.0.0rc6.dist-info → meerschaum-2.0.0rc8.dist-info}/top_level.txt +0 -0
- {meerschaum-2.0.0rc6.dist-info → meerschaum-2.0.0rc8.dist-info}/zip-safe +0 -0
@@ -23,6 +23,7 @@ def deduplicate(
|
|
23
23
|
end: Union[datetime, int, None] = None,
|
24
24
|
params: Optional[Dict[str, Any]] = None,
|
25
25
|
chunk_interval: Union[datetime, int, None] = None,
|
26
|
+
bounded: Optional[bool] = None,
|
26
27
|
workers: Optional[int] = None,
|
27
28
|
debug: bool = False,
|
28
29
|
**kwargs: Any
|
@@ -46,6 +47,9 @@ def deduplicate(
|
|
46
47
|
If provided, use this for the chunk bounds.
|
47
48
|
Defaults to the value set in `pipe.parameters['chunk_minutes']` (1440).
|
48
49
|
|
50
|
+
bounded: Optional[bool], default None
|
51
|
+
Only check outside the oldest and newest sync times if bounded is explicitly `False`.
|
52
|
+
|
49
53
|
workers: Optional[int], default None
|
50
54
|
If the instance connector is thread-safe, limit concurrenct syncs to this many threads.
|
51
55
|
|
@@ -61,6 +65,7 @@ def deduplicate(
|
|
61
65
|
A `SuccessTuple` corresponding to whether all of the chunks were successfully deduplicated.
|
62
66
|
"""
|
63
67
|
from meerschaum.utils.warnings import warn, info
|
68
|
+
from meerschaum.utils.misc import interval_str, items_str
|
64
69
|
from meerschaum.utils.venv import Venv
|
65
70
|
from meerschaum.connectors import get_connector_plugin
|
66
71
|
from meerschaum.utils.pool import get_pool
|
@@ -70,6 +75,7 @@ def deduplicate(
|
|
70
75
|
begin = begin,
|
71
76
|
end = end,
|
72
77
|
params = params,
|
78
|
+
bounded = bounded,
|
73
79
|
debug = debug,
|
74
80
|
**kwargs
|
75
81
|
)
|
@@ -86,17 +92,44 @@ def deduplicate(
|
|
86
92
|
begin = begin,
|
87
93
|
end = end,
|
88
94
|
params = params,
|
95
|
+
bounded = bounded,
|
89
96
|
debug = debug,
|
90
97
|
**kwargs
|
91
98
|
)
|
92
99
|
|
100
|
+
### Only unbound if explicitly False.
|
101
|
+
if bounded is None:
|
102
|
+
bounded = True
|
103
|
+
chunk_interval = self.get_chunk_interval(chunk_interval, debug=debug)
|
104
|
+
|
105
|
+
bound_time = self.get_bound_time(debug=debug)
|
106
|
+
if bounded and begin is None:
|
107
|
+
begin = (
|
108
|
+
bound_time
|
109
|
+
if bound_time is not None
|
110
|
+
else self.get_sync_time(newest=False, debug=debug)
|
111
|
+
)
|
112
|
+
if bounded and end is None:
|
113
|
+
end = self.get_sync_time(newest=True, debug=debug)
|
114
|
+
|
115
|
+
if bounded and end is not None:
|
116
|
+
end += (
|
117
|
+
timedelta(minutes=1)
|
118
|
+
if isinstance(end, datetime)
|
119
|
+
else 1
|
120
|
+
)
|
121
|
+
|
93
122
|
chunk_bounds = self.get_chunk_bounds(
|
94
|
-
bounded =
|
123
|
+
bounded = bounded,
|
124
|
+
begin = begin,
|
125
|
+
end = end,
|
95
126
|
chunk_interval = chunk_interval,
|
96
127
|
debug = debug,
|
97
128
|
)
|
98
129
|
|
99
130
|
indices = [col for col in self.columns.values() if col]
|
131
|
+
if not indices:
|
132
|
+
return False, f"Cannot deduplicate without index columns."
|
100
133
|
dt_col = self.columns.get('datetime', None)
|
101
134
|
|
102
135
|
def process_chunk_bounds(bounds) -> Tuple[
|
@@ -115,6 +148,8 @@ def deduplicate(
|
|
115
148
|
params = params,
|
116
149
|
debug = debug,
|
117
150
|
)
|
151
|
+
if chunk_df is None:
|
152
|
+
return bounds, (True, "")
|
118
153
|
existing_chunk_len = len(chunk_df)
|
119
154
|
deduped_chunk_df = chunk_df.drop_duplicates(keep='last')
|
120
155
|
deduped_chunk_len = len(deduped_chunk_df)
|
@@ -131,11 +166,19 @@ def deduplicate(
|
|
131
166
|
params = params,
|
132
167
|
debug = debug,
|
133
168
|
)
|
134
|
-
if len(full_chunk) == 0:
|
169
|
+
if full_chunk is None or len(full_chunk) == 0:
|
135
170
|
return bounds, (True, f"{chunk_msg_header}\nChunk is empty, skipping...")
|
136
171
|
|
137
172
|
chunk_indices = [ix for ix in indices if ix in full_chunk.columns]
|
138
|
-
|
173
|
+
if not chunk_indices:
|
174
|
+
return bounds, (False, f"None of {items_str(indices)} were present in chunk.")
|
175
|
+
try:
|
176
|
+
full_chunk = full_chunk.drop_duplicates(subset=chunk_indices, keep='last')
|
177
|
+
except Exception as e:
|
178
|
+
return (
|
179
|
+
bounds,
|
180
|
+
(False, f"Failed to deduplicate chunk on {items_str(chunk_indices)}:\n({e})")
|
181
|
+
)
|
139
182
|
|
140
183
|
clear_success, clear_msg = self.clear(
|
141
184
|
begin = chunk_begin,
|
@@ -172,10 +215,17 @@ def deduplicate(
|
|
172
215
|
True, (
|
173
216
|
chunk_msg_header + "\n"
|
174
217
|
+ chunk_msg_body + ("\n" if chunk_msg_body else '')
|
175
|
-
+ f"
|
218
|
+
+ f"Deduplicated chunk from {existing_chunk_len} to {chunk_rowcount} rows."
|
176
219
|
)
|
177
220
|
)
|
178
221
|
|
222
|
+
info(
|
223
|
+
f"Deduplicating {len(chunk_bounds)} chunk"
|
224
|
+
+ ('s' if len(chunk_bounds) != 1 else '')
|
225
|
+
+ f" ({'un' if not bounded else ''}bounded)"
|
226
|
+
+ f" of size '{interval_str(chunk_interval)}'"
|
227
|
+
+ f" on {self}."
|
228
|
+
)
|
179
229
|
bounds_success_tuples = dict(pool.map(process_chunk_bounds, chunk_bounds))
|
180
230
|
bounds_successes = {
|
181
231
|
bounds: success_tuple
|
@@ -195,8 +245,8 @@ def deduplicate(
|
|
195
245
|
(
|
196
246
|
f"Failed to deduplicate {len(bounds_failures)} chunk"
|
197
247
|
+ ('s' if len(bounds_failures) != 1 else '')
|
198
|
-
+ "
|
199
|
-
+ "\n".join([msg for _, (_, msg) in bounds_failures.items()])
|
248
|
+
+ ".\n"
|
249
|
+
+ "\n".join([msg for _, (_, msg) in bounds_failures.items() if msg])
|
200
250
|
)
|
201
251
|
)
|
202
252
|
|
@@ -208,7 +258,7 @@ def deduplicate(
|
|
208
258
|
f"Successfully deduplicated {len(bounds_successes)} chunk"
|
209
259
|
+ ('s' if len(bounds_successes) != 1 else '')
|
210
260
|
+ ".\n"
|
211
|
-
+ "\n".join([msg for _, (_, msg) in bounds_successes.items()])
|
261
|
+
+ "\n".join([msg for _, (_, msg) in bounds_successes.items() if msg])
|
212
262
|
).rstrip('\n')
|
213
263
|
)
|
214
264
|
|
@@ -232,8 +282,8 @@ def deduplicate(
|
|
232
282
|
(
|
233
283
|
f"Successfully deduplicated {len(bounds_successes)} chunk"
|
234
284
|
+ ('s' if len(bounds_successes) != 1 else '')
|
235
|
-
+ f"
|
236
|
-
+ "\n".join([msg for _, (_, msg) in bounds_successes.items()])
|
285
|
+
+ f"({len(retry_bounds_successes)} retried):\n"
|
286
|
+
+ "\n".join([msg for _, (_, msg) in bounds_successes.items() if msg])
|
237
287
|
).rstrip('\n')
|
238
288
|
)
|
239
289
|
|
@@ -242,7 +292,7 @@ def deduplicate(
|
|
242
292
|
(
|
243
293
|
f"Failed to deduplicate {len(bounds_failures)} chunk"
|
244
294
|
+ ('s' if len(retry_bounds_failures) != 1 else '')
|
245
|
-
+ "
|
246
|
-
+ "\n".join([msg for _, (_, msg) in retry_bounds_failures.items()])
|
295
|
+
+ ".\n"
|
296
|
+
+ "\n".join([msg for _, (_, msg) in retry_bounds_failures.items() if msg])
|
247
297
|
).rstrip('\n')
|
248
298
|
)
|
meerschaum/core/Pipe/_dtypes.py
CHANGED
@@ -7,6 +7,7 @@ Enforce data types for a pipe's underlying table.
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
from __future__ import annotations
|
10
|
+
from io import StringIO
|
10
11
|
from meerschaum.utils.typing import Dict, Any, Optional
|
11
12
|
|
12
13
|
def enforce_dtypes(
|
@@ -38,7 +39,7 @@ def enforce_dtypes(
|
|
38
39
|
try:
|
39
40
|
if isinstance(df, str):
|
40
41
|
df = parse_df_datetimes(
|
41
|
-
pd.read_json(df),
|
42
|
+
pd.read_json(StringIO(df)),
|
42
43
|
ignore_cols = [
|
43
44
|
col
|
44
45
|
for col, dtype in pipe_dtypes.items()
|
meerschaum/core/Pipe/_verify.py
CHANGED
@@ -17,7 +17,7 @@ def verify(
|
|
17
17
|
end: Union[datetime, int, None] = None,
|
18
18
|
params: Optional[Dict[str, Any]] = None,
|
19
19
|
chunk_interval: Union[timedelta, int, None] = None,
|
20
|
-
bounded: bool =
|
20
|
+
bounded: Optional[bool] = None,
|
21
21
|
deduplicate: bool = False,
|
22
22
|
workers: Optional[int] = None,
|
23
23
|
debug: bool = False,
|
@@ -38,10 +38,11 @@ def verify(
|
|
38
38
|
If provided, use this as the size of the chunk boundaries.
|
39
39
|
Default to the value set in `pipe.parameters['chunk_minutes']` (1440).
|
40
40
|
|
41
|
-
bounded: bool, default
|
42
|
-
If `True`, do not verify older than the oldest
|
43
|
-
|
44
|
-
|
41
|
+
bounded: Optional[bool], default None
|
42
|
+
If `True`, do not verify older than the oldest sync time or newer than the newest.
|
43
|
+
If `False`, verify unbounded syncs outside of the new and old sync times.
|
44
|
+
The default behavior (`None`) is to bound only if a bound interval is set
|
45
|
+
(e.g. `pipe.parameters['verify']['bound_days']`).
|
45
46
|
|
46
47
|
deduplicate: bool, default False
|
47
48
|
If `True`, deduplicate the pipe's table after the verification syncs.
|
@@ -61,29 +62,39 @@ def verify(
|
|
61
62
|
A SuccessTuple indicating whether the pipe was successfully resynced.
|
62
63
|
"""
|
63
64
|
from meerschaum.utils.pool import get_pool
|
65
|
+
from meerschaum.utils.misc import interval_str
|
64
66
|
workers = self.get_num_workers(workers)
|
65
|
-
sync_less_than_begin = not bounded and begin is None
|
66
|
-
sync_greater_than_end = not bounded and end is None
|
67
67
|
|
68
|
-
|
69
|
-
|
70
|
-
|
68
|
+
### Skip configured bounding in parameters
|
69
|
+
### if `bounded` is explicitly `False`.
|
70
|
+
bound_time = (
|
71
|
+
self.get_bound_time(debug=debug)
|
72
|
+
if bounded is not False
|
73
|
+
else None
|
74
|
+
)
|
75
|
+
if bounded is None:
|
76
|
+
bounded = bound_time is not None
|
77
|
+
|
78
|
+
if bounded and begin is None:
|
79
|
+
begin = (
|
80
|
+
bound_time
|
81
|
+
if bound_time is not None
|
82
|
+
else self.get_sync_time(newest=False, debug=debug)
|
83
|
+
)
|
84
|
+
if bounded and end is None:
|
71
85
|
end = self.get_sync_time(newest=True, debug=debug)
|
72
86
|
|
73
|
-
if bounded:
|
87
|
+
if bounded and end is not None:
|
74
88
|
end += (
|
75
89
|
timedelta(minutes=1)
|
76
90
|
if isinstance(end, datetime)
|
77
91
|
else 1
|
78
92
|
)
|
79
93
|
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
or
|
85
|
-
not self.exists(debug=debug)
|
86
|
-
)
|
94
|
+
sync_less_than_begin = not bounded and begin is None
|
95
|
+
sync_greater_than_end = not bounded and end is None
|
96
|
+
|
97
|
+
cannot_determine_bounds = not self.exists(debug=debug)
|
87
98
|
|
88
99
|
if cannot_determine_bounds:
|
89
100
|
sync_success, sync_msg = self.sync(
|
@@ -108,9 +119,10 @@ def verify(
|
|
108
119
|
return sync_success, sync_msg
|
109
120
|
|
110
121
|
|
111
|
-
|
112
|
-
chunk_interval = self.get_chunk_interval(debug=debug)
|
122
|
+
chunk_interval = self.get_chunk_interval(chunk_interval, debug=debug)
|
113
123
|
chunk_bounds = self.get_chunk_bounds(
|
124
|
+
begin = begin,
|
125
|
+
end = end,
|
114
126
|
chunk_interval = chunk_interval,
|
115
127
|
bounded = bounded,
|
116
128
|
debug = debug,
|
@@ -129,20 +141,48 @@ def verify(
|
|
129
141
|
)
|
130
142
|
return True, f"Could not determine chunks between '{begin}' and '{end}'; nothing to do."
|
131
143
|
|
144
|
+
begin_to_print = (
|
145
|
+
begin
|
146
|
+
if begin is not None
|
147
|
+
else (
|
148
|
+
chunk_bounds[0][0]
|
149
|
+
if bounded
|
150
|
+
else chunk_bounds[0][1]
|
151
|
+
)
|
152
|
+
)
|
153
|
+
end_to_print = (
|
154
|
+
end
|
155
|
+
if end is not None
|
156
|
+
else (
|
157
|
+
chunk_bounds[-1][1]
|
158
|
+
if bounded
|
159
|
+
else chunk_bounds[-1][0]
|
160
|
+
)
|
161
|
+
)
|
162
|
+
|
132
163
|
info(
|
133
164
|
f"Syncing {len(chunk_bounds)} chunk" + ('s' if len(chunk_bounds) != 1 else '')
|
134
|
-
+ f"
|
135
|
-
+ f"
|
165
|
+
+ f" ({'un' if not bounded else ''}bounded)"
|
166
|
+
+ f" of size '{interval_str(chunk_interval)}'"
|
167
|
+
+ f" between '{begin_to_print}' and '{end_to_print}'."
|
136
168
|
)
|
137
169
|
|
138
170
|
pool = get_pool(workers=workers)
|
139
171
|
|
172
|
+
### Dictionary of the form bounds -> success_tuple, e.g.:
|
173
|
+
### {
|
174
|
+
### (2023-01-01, 2023-01-02): (True, "Success")
|
175
|
+
### }
|
176
|
+
bounds_success_tuples = {}
|
140
177
|
def process_chunk_bounds(
|
141
178
|
chunk_begin_and_end: Tuple[
|
142
179
|
Union[int, datetime],
|
143
180
|
Union[int, datetime]
|
144
181
|
]
|
145
182
|
):
|
183
|
+
if chunk_begin_and_end in bounds_success_tuples:
|
184
|
+
return chunk_begin_and_end, bounds_success_tuples[chunk_begin_and_end]
|
185
|
+
|
146
186
|
chunk_begin, chunk_end = chunk_begin_and_end
|
147
187
|
return chunk_begin_and_end, self.sync(
|
148
188
|
begin = chunk_begin,
|
@@ -153,11 +193,22 @@ def verify(
|
|
153
193
|
**kwargs
|
154
194
|
)
|
155
195
|
|
156
|
-
###
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
196
|
+
### If we have more than one chunk, attempt to sync the first one and return if its fails.
|
197
|
+
if len(chunk_bounds) > 1:
|
198
|
+
first_chunk_bounds = chunk_bounds[0]
|
199
|
+
(
|
200
|
+
(first_begin, first_end),
|
201
|
+
(first_success, first_msg)
|
202
|
+
) = process_chunk_bounds(first_chunk_bounds)
|
203
|
+
if not first_success:
|
204
|
+
return (
|
205
|
+
first_success,
|
206
|
+
f"\n{first_begin} - {first_end}\n"
|
207
|
+
+ f"Failed to sync first chunk:\n{first_msg}"
|
208
|
+
)
|
209
|
+
bounds_success_tuples[first_chunk_bounds] = (first_success, first_msg)
|
210
|
+
|
211
|
+
bounds_success_tuples.update(dict(pool.map(process_chunk_bounds, chunk_bounds)))
|
161
212
|
bounds_success_bools = {bounds: tup[0] for bounds, tup in bounds_success_tuples.items()}
|
162
213
|
|
163
214
|
message_header = f"{begin} - {end}"
|
@@ -177,18 +228,19 @@ def verify(
|
|
177
228
|
|
178
229
|
chunk_bounds_to_resync = [
|
179
230
|
bounds
|
180
|
-
for bounds, success in zip(chunk_bounds,
|
231
|
+
for bounds, success in zip(chunk_bounds, bounds_success_bools)
|
181
232
|
if not success
|
182
233
|
]
|
183
234
|
bounds_to_print = [
|
184
235
|
f"{bounds[0]} - {bounds[1]}"
|
185
236
|
for bounds in chunk_bounds_to_resync
|
186
237
|
]
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
238
|
+
if bounds_to_print:
|
239
|
+
warn(
|
240
|
+
f"Will resync the following failed chunks:\n "
|
241
|
+
+ '\n '.join(bounds_to_print),
|
242
|
+
stack = False,
|
243
|
+
)
|
192
244
|
|
193
245
|
retry_bounds_success_tuples = dict(pool.map(process_chunk_bounds, chunk_bounds_to_resync))
|
194
246
|
bounds_success_tuples.update(retry_bounds_success_tuples)
|
@@ -271,7 +323,8 @@ def get_chunks_success_message(
|
|
271
323
|
''
|
272
324
|
if num_fails == 0
|
273
325
|
else (
|
274
|
-
f"\n\nFailed to sync {num_fails}
|
326
|
+
f"\n\nFailed to sync {num_fails} chunk"
|
327
|
+
+ ('s' if num_fails != 1 else '') + ":\n"
|
275
328
|
+ '\n'.join([
|
276
329
|
f"{fail_begin} - {fail_end}\n{msg}\n"
|
277
330
|
for (fail_begin, fail_end), (_, msg) in fail_chunk_bounds_tuples.items()
|
@@ -280,3 +333,90 @@ def get_chunks_success_message(
|
|
280
333
|
)
|
281
334
|
|
282
335
|
return header + success_msg + fail_msg
|
336
|
+
|
337
|
+
|
338
|
+
def get_bound_interval(self, debug: bool = False) -> Union[timedelta, int, None]:
|
339
|
+
"""
|
340
|
+
Return the interval used to determine the bound time (limit for verification syncs).
|
341
|
+
If the datetime axis is an integer, just return its value.
|
342
|
+
|
343
|
+
Below are the supported keys for the bound interval:
|
344
|
+
|
345
|
+
- `pipe.parameters['verify']['bound_minutes']`
|
346
|
+
- `pipe.parameters['verify']['bound_hours']`
|
347
|
+
- `pipe.parameters['verify']['bound_days']`
|
348
|
+
- `pipe.parameters['verify']['bound_weeks']`
|
349
|
+
- `pipe.parameters['verify']['bound_years']`
|
350
|
+
- `pipe.parameters['verify']['bound_seconds']`
|
351
|
+
|
352
|
+
If multiple keys are present, the first on this priority list will be used.
|
353
|
+
|
354
|
+
Returns
|
355
|
+
-------
|
356
|
+
A `timedelta` or `int` value to be used to determine the bound time.
|
357
|
+
"""
|
358
|
+
verify_params = self.parameters.get('verify', {})
|
359
|
+
prefix = 'bound_'
|
360
|
+
suffixes_to_check = ('minutes', 'hours', 'days', 'weeks', 'years', 'seconds')
|
361
|
+
keys_to_search = {
|
362
|
+
key: val
|
363
|
+
for key, val in verify_params.items()
|
364
|
+
if key.startswith(prefix)
|
365
|
+
}
|
366
|
+
bound_time_key, bound_time_value = None, None
|
367
|
+
for key, value in keys_to_search.items():
|
368
|
+
for suffix in suffixes_to_check:
|
369
|
+
if key == prefix + suffix:
|
370
|
+
bound_time_key = key
|
371
|
+
bound_time_value = value
|
372
|
+
break
|
373
|
+
if bound_time_key is not None:
|
374
|
+
break
|
375
|
+
|
376
|
+
if bound_time_value is None:
|
377
|
+
return bound_time_value
|
378
|
+
|
379
|
+
dt_col = self.columns.get('datetime', None)
|
380
|
+
if not dt_col:
|
381
|
+
return bound_time_value
|
382
|
+
|
383
|
+
dt_typ = self.dtypes.get(dt_col, 'datetime64[ns]')
|
384
|
+
if 'int' in dt_typ.lower():
|
385
|
+
return int(bound_time_value)
|
386
|
+
|
387
|
+
interval_type = bound_time_key.replace(prefix, '')
|
388
|
+
return timedelta(**{interval_type: bound_time_value})
|
389
|
+
|
390
|
+
|
391
|
+
def get_bound_time(self, debug: bool = False) -> Union[datetime, int, None]:
|
392
|
+
"""
|
393
|
+
The bound time is the limit at which long-running verification syncs should stop.
|
394
|
+
A value of `None` means verification syncs should be unbounded.
|
395
|
+
|
396
|
+
Like deriving a backtrack time from `pipe.get_sync_time()`,
|
397
|
+
the bound time is the sync time minus a large window (e.g. 366 days).
|
398
|
+
|
399
|
+
Unbound verification syncs (i.e. `bound_time is None`)
|
400
|
+
if the oldest sync time is less than the bound interval.
|
401
|
+
|
402
|
+
Returns
|
403
|
+
-------
|
404
|
+
A `datetime` or `int` corresponding to the
|
405
|
+
`begin` bound for verification and deduplication syncs.
|
406
|
+
"""
|
407
|
+
bound_interval = self.get_bound_interval(debug=debug)
|
408
|
+
if bound_interval is None:
|
409
|
+
return None
|
410
|
+
|
411
|
+
sync_time = self.get_sync_time(debug=debug)
|
412
|
+
if sync_time is None:
|
413
|
+
return None
|
414
|
+
|
415
|
+
bound_time = sync_time - bound_interval
|
416
|
+
oldest_sync_time = self.get_sync_time(newest=False, debug=debug)
|
417
|
+
|
418
|
+
return (
|
419
|
+
bound_time
|
420
|
+
if bound_time > oldest_sync_time
|
421
|
+
else None
|
422
|
+
)
|
meerschaum/plugins/__init__.py
CHANGED
@@ -254,6 +254,9 @@ def sync_plugins_symlinks(debug: bool = False, warn: bool = True) -> None:
|
|
254
254
|
try:
|
255
255
|
if PLUGINS_INTERNAL_LOCK_PATH.exists():
|
256
256
|
PLUGINS_INTERNAL_LOCK_PATH.unlink()
|
257
|
+
### Sometimes competing threads will delete the lock file at the same time.
|
258
|
+
except FileNotFoundError:
|
259
|
+
pass
|
257
260
|
except Exception as e:
|
258
261
|
if warn:
|
259
262
|
_warn(f"Error cleaning up lockfile {PLUGINS_INTERNAL_LOCK_PATH}:\n{e}")
|