meerschaum 2.9.5__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/__init__.py +5 -2
- meerschaum/_internal/__init__.py +1 -0
- meerschaum/_internal/arguments/_parse_arguments.py +4 -4
- meerschaum/_internal/arguments/_parser.py +33 -4
- meerschaum/_internal/cli/__init__.py +6 -0
- meerschaum/_internal/cli/daemons.py +103 -0
- meerschaum/_internal/cli/entry.py +220 -0
- meerschaum/_internal/cli/workers.py +435 -0
- meerschaum/_internal/docs/index.py +48 -2
- meerschaum/_internal/entry.py +50 -14
- meerschaum/_internal/shell/Shell.py +121 -29
- meerschaum/_internal/shell/__init__.py +4 -1
- meerschaum/_internal/static.py +359 -0
- meerschaum/_internal/term/TermPageHandler.py +1 -2
- meerschaum/_internal/term/__init__.py +40 -6
- meerschaum/_internal/term/tools.py +33 -8
- meerschaum/actions/__init__.py +6 -4
- meerschaum/actions/api.py +53 -13
- meerschaum/actions/attach.py +1 -0
- meerschaum/actions/bootstrap.py +8 -8
- meerschaum/actions/delete.py +4 -2
- meerschaum/actions/edit.py +171 -25
- meerschaum/actions/login.py +8 -8
- meerschaum/actions/register.py +143 -6
- meerschaum/actions/reload.py +22 -5
- meerschaum/actions/restart.py +14 -0
- meerschaum/actions/show.py +184 -31
- meerschaum/actions/start.py +166 -17
- meerschaum/actions/stop.py +38 -2
- meerschaum/actions/sync.py +7 -2
- meerschaum/actions/tag.py +9 -8
- meerschaum/actions/verify.py +5 -8
- meerschaum/api/__init__.py +45 -15
- meerschaum/api/_events.py +46 -4
- meerschaum/api/_oauth2.py +162 -9
- meerschaum/api/_tokens.py +102 -0
- meerschaum/api/dash/__init__.py +0 -3
- meerschaum/api/dash/callbacks/__init__.py +1 -0
- meerschaum/api/dash/callbacks/custom.py +4 -3
- meerschaum/api/dash/callbacks/dashboard.py +198 -118
- meerschaum/api/dash/callbacks/jobs.py +14 -7
- meerschaum/api/dash/callbacks/login.py +10 -1
- meerschaum/api/dash/callbacks/pipes.py +194 -14
- meerschaum/api/dash/callbacks/plugins.py +0 -1
- meerschaum/api/dash/callbacks/register.py +10 -3
- meerschaum/api/dash/callbacks/settings/password_reset.py +2 -2
- meerschaum/api/dash/callbacks/tokens.py +389 -0
- meerschaum/api/dash/components.py +36 -15
- meerschaum/api/dash/jobs.py +1 -1
- meerschaum/api/dash/keys.py +35 -93
- meerschaum/api/dash/pages/__init__.py +2 -1
- meerschaum/api/dash/pages/dashboard.py +1 -20
- meerschaum/api/dash/pages/{job.py → jobs.py} +10 -7
- meerschaum/api/dash/pages/login.py +2 -2
- meerschaum/api/dash/pages/pipes.py +16 -5
- meerschaum/api/dash/pages/settings/password_reset.py +1 -1
- meerschaum/api/dash/pages/tokens.py +53 -0
- meerschaum/api/dash/pipes.py +382 -95
- meerschaum/api/dash/sessions.py +12 -0
- meerschaum/api/dash/tokens.py +603 -0
- meerschaum/api/dash/websockets.py +1 -1
- meerschaum/api/dash/webterm.py +18 -6
- meerschaum/api/models/__init__.py +23 -3
- meerschaum/api/models/_actions.py +22 -0
- meerschaum/api/models/_pipes.py +91 -7
- meerschaum/api/models/_tokens.py +81 -0
- meerschaum/api/resources/static/js/terminado.js +3 -0
- meerschaum/api/resources/static/js/xterm-addon-unicode11.js +2 -0
- meerschaum/api/resources/templates/termpage.html +13 -0
- meerschaum/api/routes/__init__.py +1 -0
- meerschaum/api/routes/_actions.py +3 -4
- meerschaum/api/routes/_connectors.py +3 -7
- meerschaum/api/routes/_jobs.py +26 -35
- meerschaum/api/routes/_login.py +120 -15
- meerschaum/api/routes/_misc.py +5 -10
- meerschaum/api/routes/_pipes.py +178 -143
- meerschaum/api/routes/_plugins.py +38 -28
- meerschaum/api/routes/_tokens.py +236 -0
- meerschaum/api/routes/_users.py +47 -35
- meerschaum/api/routes/_version.py +3 -3
- meerschaum/api/routes/_webterm.py +3 -3
- meerschaum/config/__init__.py +100 -30
- meerschaum/config/_default.py +132 -64
- meerschaum/config/_edit.py +38 -32
- meerschaum/config/_formatting.py +2 -0
- meerschaum/config/_patch.py +10 -8
- meerschaum/config/_paths.py +133 -13
- meerschaum/config/_read_config.py +87 -36
- meerschaum/config/_sync.py +6 -3
- meerschaum/config/_version.py +1 -1
- meerschaum/config/environment.py +262 -0
- meerschaum/config/stack/__init__.py +37 -15
- meerschaum/config/static.py +18 -0
- meerschaum/connectors/_Connector.py +11 -6
- meerschaum/connectors/__init__.py +41 -22
- meerschaum/connectors/api/_APIConnector.py +34 -6
- meerschaum/connectors/api/_actions.py +2 -2
- meerschaum/connectors/api/_jobs.py +12 -1
- meerschaum/connectors/api/_login.py +33 -7
- meerschaum/connectors/api/_misc.py +2 -2
- meerschaum/connectors/api/_pipes.py +23 -32
- meerschaum/connectors/api/_plugins.py +2 -2
- meerschaum/connectors/api/_request.py +1 -1
- meerschaum/connectors/api/_tokens.py +146 -0
- meerschaum/connectors/api/_users.py +70 -58
- meerschaum/connectors/instance/_InstanceConnector.py +83 -0
- meerschaum/connectors/instance/__init__.py +10 -0
- meerschaum/connectors/instance/_pipes.py +442 -0
- meerschaum/connectors/instance/_plugins.py +159 -0
- meerschaum/connectors/instance/_tokens.py +317 -0
- meerschaum/connectors/instance/_users.py +188 -0
- meerschaum/connectors/parse.py +5 -2
- meerschaum/connectors/sql/_SQLConnector.py +22 -5
- meerschaum/connectors/sql/_cli.py +12 -11
- meerschaum/connectors/sql/_create_engine.py +12 -168
- meerschaum/connectors/sql/_fetch.py +2 -18
- meerschaum/connectors/sql/_pipes.py +295 -278
- meerschaum/connectors/sql/_plugins.py +29 -0
- meerschaum/connectors/sql/_sql.py +46 -21
- meerschaum/connectors/sql/_users.py +36 -2
- meerschaum/connectors/sql/tables/__init__.py +254 -122
- meerschaum/connectors/valkey/_ValkeyConnector.py +5 -7
- meerschaum/connectors/valkey/_pipes.py +60 -31
- meerschaum/connectors/valkey/_plugins.py +2 -26
- meerschaum/core/Pipe/__init__.py +115 -85
- meerschaum/core/Pipe/_attributes.py +425 -124
- meerschaum/core/Pipe/_bootstrap.py +54 -24
- meerschaum/core/Pipe/_cache.py +555 -0
- meerschaum/core/Pipe/_clear.py +0 -11
- meerschaum/core/Pipe/_data.py +96 -68
- meerschaum/core/Pipe/_deduplicate.py +0 -13
- meerschaum/core/Pipe/_delete.py +12 -21
- meerschaum/core/Pipe/_drop.py +11 -23
- meerschaum/core/Pipe/_dtypes.py +49 -19
- meerschaum/core/Pipe/_edit.py +14 -4
- meerschaum/core/Pipe/_fetch.py +1 -1
- meerschaum/core/Pipe/_index.py +8 -14
- meerschaum/core/Pipe/_show.py +5 -5
- meerschaum/core/Pipe/_sync.py +123 -204
- meerschaum/core/Pipe/_verify.py +4 -4
- meerschaum/{plugins → core/Plugin}/_Plugin.py +16 -12
- meerschaum/core/Plugin/__init__.py +1 -1
- meerschaum/core/Token/_Token.py +220 -0
- meerschaum/core/Token/__init__.py +12 -0
- meerschaum/core/User/_User.py +35 -10
- meerschaum/core/User/__init__.py +9 -1
- meerschaum/core/__init__.py +1 -0
- meerschaum/jobs/_Executor.py +88 -4
- meerschaum/jobs/_Job.py +149 -38
- meerschaum/jobs/__init__.py +3 -2
- meerschaum/jobs/systemd.py +8 -3
- meerschaum/models/__init__.py +35 -0
- meerschaum/models/pipes.py +247 -0
- meerschaum/models/tokens.py +38 -0
- meerschaum/models/users.py +26 -0
- meerschaum/plugins/__init__.py +301 -88
- meerschaum/plugins/bootstrap.py +510 -4
- meerschaum/utils/_get_pipes.py +97 -30
- meerschaum/utils/daemon/Daemon.py +199 -43
- meerschaum/utils/daemon/FileDescriptorInterceptor.py +0 -1
- meerschaum/utils/daemon/RotatingFile.py +63 -36
- meerschaum/utils/daemon/StdinFile.py +53 -13
- meerschaum/utils/daemon/__init__.py +47 -6
- meerschaum/utils/daemon/_names.py +6 -3
- meerschaum/utils/dataframe.py +479 -81
- meerschaum/utils/debug.py +49 -19
- meerschaum/utils/dtypes/__init__.py +476 -34
- meerschaum/utils/dtypes/sql.py +369 -29
- meerschaum/utils/formatting/__init__.py +5 -2
- meerschaum/utils/formatting/_jobs.py +1 -1
- meerschaum/utils/formatting/_pipes.py +52 -50
- meerschaum/utils/formatting/_pprint.py +1 -0
- meerschaum/utils/formatting/_shell.py +44 -18
- meerschaum/utils/misc.py +268 -186
- meerschaum/utils/packages/__init__.py +25 -40
- meerschaum/utils/packages/_packages.py +42 -34
- meerschaum/utils/pipes.py +213 -0
- meerschaum/utils/process.py +2 -2
- meerschaum/utils/prompt.py +175 -144
- meerschaum/utils/schedule.py +2 -1
- meerschaum/utils/sql.py +134 -47
- meerschaum/utils/threading.py +42 -0
- meerschaum/utils/typing.py +1 -4
- meerschaum/utils/venv/_Venv.py +2 -2
- meerschaum/utils/venv/__init__.py +7 -7
- meerschaum/utils/warnings.py +19 -13
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0.dist-info}/METADATA +94 -96
- meerschaum-3.0.0.dist-info/RECORD +289 -0
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0.dist-info}/WHEEL +1 -1
- meerschaum-3.0.0.dist-info/licenses/NOTICE +2 -0
- meerschaum/api/models/_interfaces.py +0 -15
- meerschaum/api/models/_locations.py +0 -15
- meerschaum/api/models/_metrics.py +0 -15
- meerschaum/config/_environment.py +0 -145
- meerschaum/config/static/__init__.py +0 -186
- meerschaum-2.9.5.dist-info/RECORD +0 -263
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0.dist-info}/licenses/LICENSE +0 -0
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0.dist-info}/top_level.txt +0 -0
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0.dist-info}/zip-safe +0 -0
meerschaum/core/Pipe/_sync.py
CHANGED
@@ -13,7 +13,7 @@ import time
|
|
13
13
|
import threading
|
14
14
|
import multiprocessing
|
15
15
|
import functools
|
16
|
-
from datetime import datetime, timedelta
|
16
|
+
from datetime import datetime, timedelta, timezone
|
17
17
|
from typing import TYPE_CHECKING
|
18
18
|
|
19
19
|
import meerschaum as mrsm
|
@@ -28,6 +28,7 @@ from meerschaum.utils.typing import (
|
|
28
28
|
List,
|
29
29
|
)
|
30
30
|
from meerschaum.utils.warnings import warn, error
|
31
|
+
from meerschaum._internal.static import STATIC_CONFIG
|
31
32
|
|
32
33
|
if TYPE_CHECKING:
|
33
34
|
pd = mrsm.attempt_import('pandas')
|
@@ -42,6 +43,7 @@ def sync(
|
|
42
43
|
pd.DataFrame,
|
43
44
|
Dict[str, List[Any]],
|
44
45
|
List[Dict[str, Any]],
|
46
|
+
str,
|
45
47
|
InferFetch
|
46
48
|
] = InferFetch,
|
47
49
|
begin: Union[datetime, int, str, None] = '',
|
@@ -71,6 +73,7 @@ def sync(
|
|
71
73
|
----------
|
72
74
|
df: Union[None, pd.DataFrame, Dict[str, List[Any]]], default None
|
73
75
|
An optional DataFrame to sync into the pipe. Defaults to `None`.
|
76
|
+
If `df` is a string, it will be parsed via `meerschaum.utils.dataframe.parse_simple_lines()`.
|
74
77
|
|
75
78
|
begin: Union[datetime, int, str, None], default ''
|
76
79
|
Optionally specify the earliest datetime to search for data.
|
@@ -134,6 +137,7 @@ def sync(
|
|
134
137
|
from meerschaum.utils.misc import df_is_chunk_generator, filter_keywords, filter_arguments
|
135
138
|
from meerschaum.utils.pool import get_pool
|
136
139
|
from meerschaum.config import get_config
|
140
|
+
from meerschaum.utils.dtypes import are_dtypes_equal, get_current_timestamp
|
137
141
|
|
138
142
|
if (callback is not None or error_callback is not None) and blocking:
|
139
143
|
warn("Callback functions are only executed when blocking = False. Ignoring...")
|
@@ -161,8 +165,8 @@ def sync(
|
|
161
165
|
'safe_copy': True,
|
162
166
|
})
|
163
167
|
|
164
|
-
|
165
|
-
self.
|
168
|
+
self._invalidate_cache(debug=debug)
|
169
|
+
self._cache_value('sync_ts', get_current_timestamp('ms'), debug=debug)
|
166
170
|
|
167
171
|
def _sync(
|
168
172
|
p: mrsm.Pipe,
|
@@ -170,11 +174,12 @@ def sync(
|
|
170
174
|
'pd.DataFrame',
|
171
175
|
Dict[str, List[Any]],
|
172
176
|
List[Dict[str, Any]],
|
177
|
+
str,
|
173
178
|
InferFetch
|
174
179
|
] = InferFetch,
|
175
180
|
) -> SuccessTuple:
|
176
181
|
if df is None:
|
177
|
-
p.
|
182
|
+
p._invalidate_cache(debug=debug)
|
178
183
|
return (
|
179
184
|
False,
|
180
185
|
f"You passed `None` instead of data into `sync()` for {p}.\n"
|
@@ -186,9 +191,13 @@ def sync(
|
|
186
191
|
register_success, register_msg = p.register(debug=debug)
|
187
192
|
if not register_success:
|
188
193
|
if 'already' not in register_msg:
|
189
|
-
p.
|
194
|
+
p._invalidate_cache(debug=debug)
|
190
195
|
return register_success, register_msg
|
191
196
|
|
197
|
+
if isinstance(df, str):
|
198
|
+
from meerschaum.utils.dataframe import parse_simple_lines
|
199
|
+
df = parse_simple_lines(df)
|
200
|
+
|
192
201
|
### If connector is a plugin with a `sync()` method, return that instead.
|
193
202
|
### If the plugin does not have a `sync()` method but does have a `fetch()` method,
|
194
203
|
### use that instead.
|
@@ -203,13 +212,13 @@ def sync(
|
|
203
212
|
msg = f"{p} does not have a valid connector."
|
204
213
|
if p.connector_keys.startswith('plugin:'):
|
205
214
|
msg += f"\n Perhaps {p.connector_keys} has a syntax error?"
|
206
|
-
p.
|
215
|
+
p._invalidate_cache(debug=debug)
|
207
216
|
return False, msg
|
208
217
|
except Exception:
|
209
|
-
p.
|
218
|
+
p._invalidate_cache(debug=debug)
|
210
219
|
return False, f"Unable to create the connector for {p}."
|
211
220
|
|
212
|
-
### Sync in place if
|
221
|
+
### Sync in place if possible.
|
213
222
|
if (
|
214
223
|
str(self.connector) == str(self.instance_connector)
|
215
224
|
and
|
@@ -220,7 +229,7 @@ def sync(
|
|
220
229
|
get_config('system', 'experimental', 'inplace_sync')
|
221
230
|
):
|
222
231
|
with Venv(get_connector_plugin(self.instance_connector)):
|
223
|
-
p.
|
232
|
+
p._invalidate_cache(debug=debug)
|
224
233
|
_args, _kwargs = filter_arguments(
|
225
234
|
p.instance_connector.sync_pipe_inplace,
|
226
235
|
p,
|
@@ -243,7 +252,7 @@ def sync(
|
|
243
252
|
**kw
|
244
253
|
)
|
245
254
|
return_tuple = p.connector.sync(*_args, **_kwargs)
|
246
|
-
p.
|
255
|
+
p._invalidate_cache(debug=debug)
|
247
256
|
if not isinstance(return_tuple, tuple):
|
248
257
|
return_tuple = (
|
249
258
|
False,
|
@@ -256,7 +265,7 @@ def sync(
|
|
256
265
|
msg = f"Failed to sync {p} with exception: '" + str(e) + "'"
|
257
266
|
if debug:
|
258
267
|
error(msg, silent=False)
|
259
|
-
p.
|
268
|
+
p._invalidate_cache(debug=debug)
|
260
269
|
return False, msg
|
261
270
|
|
262
271
|
### Fetch the dataframe from the connector's `fetch()` method.
|
@@ -281,7 +290,7 @@ def sync(
|
|
281
290
|
df = None
|
282
291
|
|
283
292
|
if df is None:
|
284
|
-
p.
|
293
|
+
p._invalidate_cache(debug=debug)
|
285
294
|
return False, f"No data were fetched for {p}."
|
286
295
|
|
287
296
|
if isinstance(df, list):
|
@@ -295,7 +304,7 @@ def sync(
|
|
295
304
|
return success, message
|
296
305
|
|
297
306
|
if df is True:
|
298
|
-
p.
|
307
|
+
p._invalidate_cache(debug=debug)
|
299
308
|
return True, f"{p} is being synced in parallel."
|
300
309
|
|
301
310
|
### CHECKPOINT: Retrieved the DataFrame.
|
@@ -339,7 +348,7 @@ def sync(
|
|
339
348
|
+ f"(attempt {_chunk_attempts} / {_max_chunk_attempts}).\n"
|
340
349
|
+ f"Sleeping for {_sleep_seconds} second"
|
341
350
|
+ ('s' if _sleep_seconds != 1 else '')
|
342
|
-
+ ":\n{_chunk_msg}"
|
351
|
+
+ f":\n{_chunk_msg}"
|
343
352
|
),
|
344
353
|
stack=False,
|
345
354
|
)
|
@@ -392,19 +401,45 @@ def sync(
|
|
392
401
|
return success, msg
|
393
402
|
|
394
403
|
### Cast to a dataframe and ensure datatypes are what we expect.
|
395
|
-
|
404
|
+
dtypes = p.get_dtypes(debug=debug)
|
405
|
+
df = p.enforce_dtypes(
|
396
406
|
df,
|
397
407
|
chunksize=chunksize,
|
398
408
|
enforce=enforce_dtypes,
|
409
|
+
dtypes=dtypes,
|
399
410
|
debug=debug,
|
400
411
|
)
|
412
|
+
if p.autotime:
|
413
|
+
dt_col = p.columns.get('datetime', None)
|
414
|
+
ts_col = dt_col or mrsm.get_config(
|
415
|
+
'pipes', 'autotime', 'column_name_if_datetime_missing'
|
416
|
+
)
|
417
|
+
ts_typ = dtypes.get(ts_col, 'datetime') if ts_col else 'datetime'
|
418
|
+
if ts_col and hasattr(df, 'columns') and ts_col not in df.columns:
|
419
|
+
precision = p.get_precision(debug=debug)
|
420
|
+
now = get_current_timestamp(
|
421
|
+
precision_unit=precision.get(
|
422
|
+
'unit',
|
423
|
+
STATIC_CONFIG['dtypes']['datetime']['default_precision_unit']
|
424
|
+
),
|
425
|
+
precision_interval=precision.get('interval', 1),
|
426
|
+
round_to=(precision.get('round_to', 'down')),
|
427
|
+
as_int=(are_dtypes_equal(ts_typ, 'int')),
|
428
|
+
)
|
429
|
+
if debug:
|
430
|
+
dprint(f"Adding current timestamp to dataframe synced to {p}: {now}")
|
401
431
|
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
self.
|
407
|
-
|
432
|
+
df[ts_col] = now
|
433
|
+
kw['check_existing'] = dt_col is not None
|
434
|
+
|
435
|
+
### Capture special columns.
|
436
|
+
capture_success, capture_msg = self._persist_new_special_columns(
|
437
|
+
df,
|
438
|
+
dtypes=dtypes,
|
439
|
+
debug=debug,
|
440
|
+
)
|
441
|
+
if not capture_success:
|
442
|
+
warn(f"Failed to capture new special columns for {self}:\n{capture_msg}")
|
408
443
|
|
409
444
|
if debug:
|
410
445
|
dprint(
|
@@ -442,20 +477,12 @@ def sync(
|
|
442
477
|
("s" if retries != 1 else "") + "!"
|
443
478
|
)
|
444
479
|
|
445
|
-
### CHECKPOINT: Finished syncing.
|
480
|
+
### CHECKPOINT: Finished syncing.
|
446
481
|
_checkpoint(**kw)
|
447
|
-
|
448
|
-
if debug:
|
449
|
-
dprint("Caching retrieved dataframe.", **kw)
|
450
|
-
_sync_cache_tuple = self.cache_pipe.sync(df, debug=debug, **kw)
|
451
|
-
if not _sync_cache_tuple[0]:
|
452
|
-
warn(f"Failed to sync local cache for {self}.")
|
453
|
-
|
454
|
-
self._exists = None
|
482
|
+
p._invalidate_cache(debug=debug)
|
455
483
|
return return_tuple
|
456
484
|
|
457
485
|
if blocking:
|
458
|
-
self._exists = None
|
459
486
|
return _sync(self, df=df)
|
460
487
|
|
461
488
|
from meerschaum.utils.threading import Thread
|
@@ -480,10 +507,10 @@ def sync(
|
|
480
507
|
)
|
481
508
|
thread.start()
|
482
509
|
except Exception as e:
|
483
|
-
self.
|
510
|
+
self._invalidate_cache(debug=debug)
|
484
511
|
return False, str(e)
|
485
512
|
|
486
|
-
self.
|
513
|
+
self._invalidate_cache(debug=debug)
|
487
514
|
return True, f"Spawned asyncronous sync for {self}."
|
488
515
|
|
489
516
|
|
@@ -529,7 +556,8 @@ def get_sync_time(
|
|
529
556
|
"""
|
530
557
|
from meerschaum.utils.venv import Venv
|
531
558
|
from meerschaum.connectors import get_connector_plugin
|
532
|
-
from meerschaum.utils.misc import
|
559
|
+
from meerschaum.utils.misc import filter_keywords
|
560
|
+
from meerschaum.utils.dtypes import round_time
|
533
561
|
from meerschaum.utils.warnings import warn
|
534
562
|
|
535
563
|
if not self.columns.get('datetime', None):
|
@@ -585,20 +613,19 @@ def exists(
|
|
585
613
|
A `bool` corresponding to whether a pipe's underlying table exists.
|
586
614
|
|
587
615
|
"""
|
588
|
-
import time
|
589
616
|
from meerschaum.utils.venv import Venv
|
590
617
|
from meerschaum.connectors import get_connector_plugin
|
591
|
-
from meerschaum.config import STATIC_CONFIG
|
592
618
|
from meerschaum.utils.debug import dprint
|
593
|
-
|
594
|
-
|
619
|
+
from meerschaum.utils.dtypes import get_current_timestamp
|
620
|
+
now = get_current_timestamp('ms', as_int=True) / 1000
|
621
|
+
cache_seconds = mrsm.get_config('pipes', 'sync', 'exists_cache_seconds')
|
595
622
|
|
596
|
-
_exists = self.
|
623
|
+
_exists = self._get_cached_value('_exists', debug=debug)
|
597
624
|
if _exists:
|
598
|
-
exists_timestamp = self.
|
625
|
+
exists_timestamp = self._get_cached_value('_exists_timestamp', debug=debug)
|
599
626
|
if exists_timestamp is not None:
|
600
627
|
delta = now - exists_timestamp
|
601
|
-
if delta <
|
628
|
+
if delta < cache_seconds:
|
602
629
|
if debug:
|
603
630
|
dprint(f"Returning cached `exists` for {self} ({round(delta, 2)} seconds old).")
|
604
631
|
return _exists
|
@@ -610,8 +637,8 @@ def exists(
|
|
610
637
|
else False
|
611
638
|
)
|
612
639
|
|
613
|
-
self.
|
614
|
-
self.
|
640
|
+
self._cache_value('_exists', _exists, debug=debug)
|
641
|
+
self._cache_value('_exists_timestamp', now, debug=debug)
|
615
642
|
return _exists
|
616
643
|
|
617
644
|
|
@@ -663,7 +690,6 @@ def filter_existing(
|
|
663
690
|
from meerschaum.utils.warnings import warn
|
664
691
|
from meerschaum.utils.debug import dprint
|
665
692
|
from meerschaum.utils.packages import attempt_import, import_pandas
|
666
|
-
from meerschaum.utils.misc import round_time
|
667
693
|
from meerschaum.utils.dataframe import (
|
668
694
|
filter_unseen_df,
|
669
695
|
add_missing_cols_to_df,
|
@@ -675,6 +701,7 @@ def filter_existing(
|
|
675
701
|
to_datetime,
|
676
702
|
are_dtypes_equal,
|
677
703
|
value_is_null,
|
704
|
+
round_time,
|
678
705
|
)
|
679
706
|
from meerschaum.config import get_config
|
680
707
|
pd = import_pandas()
|
@@ -690,9 +717,13 @@ def filter_existing(
|
|
690
717
|
merge = pd.merge
|
691
718
|
NA = pd.NA
|
692
719
|
|
693
|
-
|
694
|
-
|
695
|
-
|
720
|
+
parameters = self.parameters
|
721
|
+
pipe_columns = parameters.get('columns', {})
|
722
|
+
primary_key = pipe_columns.get('primary', None)
|
723
|
+
dt_col = pipe_columns.get('datetime', None)
|
724
|
+
dt_type = parameters.get('dtypes', {}).get(dt_col, 'datetime') if dt_col else None
|
725
|
+
autoincrement = parameters.get('autoincrement', False)
|
726
|
+
autotime = parameters.get('autotime', False)
|
696
727
|
|
697
728
|
if primary_key and autoincrement and df is not None and primary_key in df.columns:
|
698
729
|
if safe_copy:
|
@@ -702,10 +733,18 @@ def filter_existing(
|
|
702
733
|
del df[primary_key]
|
703
734
|
_ = self.columns.pop(primary_key, None)
|
704
735
|
|
736
|
+
if dt_col and autotime and df is not None and dt_col in df.columns:
|
737
|
+
if safe_copy:
|
738
|
+
df = df.copy()
|
739
|
+
safe_copy = False
|
740
|
+
if df[dt_col].isnull().all():
|
741
|
+
del df[dt_col]
|
742
|
+
_ = self.columns.pop(dt_col, None)
|
743
|
+
|
705
744
|
def get_empty_df():
|
706
745
|
empty_df = pd.DataFrame([])
|
707
746
|
dtypes = dict(df.dtypes) if df is not None else {}
|
708
|
-
dtypes.update(self.dtypes)
|
747
|
+
dtypes.update(self.dtypes) if self.enforce else {}
|
709
748
|
pd_dtypes = {
|
710
749
|
col: to_pandas_dtype(str(typ))
|
711
750
|
for col, typ in dtypes.items()
|
@@ -721,15 +760,17 @@ def filter_existing(
|
|
721
760
|
|
722
761
|
### begin is the oldest data in the new dataframe
|
723
762
|
begin, end = None, None
|
724
|
-
dt_col = pipe_columns.get('datetime', None)
|
725
|
-
primary_key = pipe_columns.get('primary', None)
|
726
|
-
dt_type = self.dtypes.get(dt_col, 'datetime64[ns, UTC]') if dt_col else None
|
727
763
|
|
728
764
|
if autoincrement and primary_key == dt_col and dt_col not in df.columns:
|
729
765
|
if enforce_dtypes:
|
730
766
|
df = self.enforce_dtypes(df, chunksize=chunksize, debug=debug)
|
731
767
|
return df, get_empty_df(), df
|
732
768
|
|
769
|
+
if autotime and dt_col and dt_col not in df.columns:
|
770
|
+
if enforce_dtypes:
|
771
|
+
df = self.enforce_dtypes(df, chunksize=chunksize, debug=debug)
|
772
|
+
return df, get_empty_df(), df
|
773
|
+
|
733
774
|
try:
|
734
775
|
min_dt_val = df[dt_col].min(skipna=True) if dt_col and dt_col in df.columns else None
|
735
776
|
if is_dask and min_dt_val is not None:
|
@@ -846,7 +887,8 @@ def filter_existing(
|
|
846
887
|
and col in backtrack_df.columns
|
847
888
|
)
|
848
889
|
] if not primary_key else [primary_key]
|
849
|
-
|
890
|
+
|
891
|
+
self_dtypes = self.get_dtypes(debug=debug) if self.enforce else {}
|
850
892
|
on_cols_dtypes = {
|
851
893
|
col: to_pandas_dtype(typ)
|
852
894
|
for col, typ in self_dtypes.items()
|
@@ -999,160 +1041,37 @@ def get_num_workers(self, workers: Optional[int] = None) -> int:
|
|
999
1041
|
)
|
1000
1042
|
|
1001
1043
|
|
1002
|
-
def
|
1003
|
-
|
1004
|
-
|
1005
|
-
|
1006
|
-
|
1007
|
-
|
1008
|
-
existing_numeric_cols = [col for col, typ in self.dtypes.items() if typ.startswith('numeric')]
|
1009
|
-
new_numeric_cols = [col for col in numeric_cols if col not in existing_numeric_cols]
|
1010
|
-
if not new_numeric_cols:
|
1011
|
-
return True, "Success"
|
1012
|
-
|
1013
|
-
self._attributes_sync_time = None
|
1014
|
-
dtypes = self.parameters.get('dtypes', {})
|
1015
|
-
dtypes.update({col: 'numeric' for col in new_numeric_cols})
|
1016
|
-
self.parameters['dtypes'] = dtypes
|
1017
|
-
if not self.temporary:
|
1018
|
-
edit_success, edit_msg = self.edit(interactive=False, debug=debug)
|
1019
|
-
if not edit_success:
|
1020
|
-
warn(f"Unable to update NUMERIC dtypes for {self}:\n{edit_msg}")
|
1021
|
-
|
1022
|
-
return edit_success, edit_msg
|
1023
|
-
|
1024
|
-
return True, "Success"
|
1025
|
-
|
1026
|
-
|
1027
|
-
def _persist_new_uuid_columns(self, df, debug: bool = False) -> SuccessTuple:
|
1028
|
-
"""
|
1029
|
-
Check for new numeric columns and update the parameters.
|
1030
|
-
"""
|
1031
|
-
from meerschaum.utils.dataframe import get_uuid_cols
|
1032
|
-
uuid_cols = get_uuid_cols(df)
|
1033
|
-
existing_uuid_cols = [col for col, typ in self.dtypes.items() if typ == 'uuid']
|
1034
|
-
new_uuid_cols = [col for col in uuid_cols if col not in existing_uuid_cols]
|
1035
|
-
if not new_uuid_cols:
|
1036
|
-
return True, "Success"
|
1037
|
-
|
1038
|
-
self._attributes_sync_time = None
|
1039
|
-
dtypes = self.parameters.get('dtypes', {})
|
1040
|
-
dtypes.update({col: 'uuid' for col in new_uuid_cols})
|
1041
|
-
self.parameters['dtypes'] = dtypes
|
1042
|
-
if not self.temporary:
|
1043
|
-
edit_success, edit_msg = self.edit(interactive=False, debug=debug)
|
1044
|
-
if not edit_success:
|
1045
|
-
warn(f"Unable to update UUID dtypes for {self}:\n{edit_msg}")
|
1046
|
-
|
1047
|
-
return edit_success, edit_msg
|
1048
|
-
|
1049
|
-
return True, "Success"
|
1050
|
-
|
1051
|
-
|
1052
|
-
def _persist_new_json_columns(self, df, debug: bool = False) -> SuccessTuple:
|
1053
|
-
"""
|
1054
|
-
Check for new JSON columns and update the parameters.
|
1055
|
-
"""
|
1056
|
-
from meerschaum.utils.dataframe import get_json_cols
|
1057
|
-
json_cols = get_json_cols(df)
|
1058
|
-
existing_json_cols = [col for col, typ in self.dtypes.items() if typ == 'json']
|
1059
|
-
new_json_cols = [col for col in json_cols if col not in existing_json_cols]
|
1060
|
-
if not new_json_cols:
|
1061
|
-
return True, "Success"
|
1062
|
-
|
1063
|
-
self._attributes_sync_time = None
|
1064
|
-
dtypes = self.parameters.get('dtypes', {})
|
1065
|
-
dtypes.update({col: 'json' for col in new_json_cols})
|
1066
|
-
self.parameters['dtypes'] = dtypes
|
1067
|
-
|
1068
|
-
if not self.temporary:
|
1069
|
-
edit_success, edit_msg = self.edit(interactive=False, debug=debug)
|
1070
|
-
if not edit_success:
|
1071
|
-
warn(f"Unable to update JSON dtypes for {self}:\n{edit_msg}")
|
1072
|
-
|
1073
|
-
return edit_success, edit_msg
|
1074
|
-
|
1075
|
-
return True, "Success"
|
1076
|
-
|
1077
|
-
|
1078
|
-
def _persist_new_bytes_columns(self, df, debug: bool = False) -> SuccessTuple:
|
1079
|
-
"""
|
1080
|
-
Check for new `bytes` columns and update the parameters.
|
1081
|
-
"""
|
1082
|
-
from meerschaum.utils.dataframe import get_bytes_cols
|
1083
|
-
bytes_cols = get_bytes_cols(df)
|
1084
|
-
existing_bytes_cols = [col for col, typ in self.dtypes.items() if typ == 'bytes']
|
1085
|
-
new_bytes_cols = [col for col in bytes_cols if col not in existing_bytes_cols]
|
1086
|
-
if not new_bytes_cols:
|
1087
|
-
return True, "Success"
|
1088
|
-
|
1089
|
-
self._attributes_sync_time = None
|
1090
|
-
dtypes = self.parameters.get('dtypes', {})
|
1091
|
-
dtypes.update({col: 'bytes' for col in new_bytes_cols})
|
1092
|
-
self.parameters['dtypes'] = dtypes
|
1093
|
-
|
1094
|
-
if not self.temporary:
|
1095
|
-
edit_success, edit_msg = self.edit(interactive=False, debug=debug)
|
1096
|
-
if not edit_success:
|
1097
|
-
warn(f"Unable to update bytes dtypes for {self}:\n{edit_msg}")
|
1098
|
-
|
1099
|
-
return edit_success, edit_msg
|
1100
|
-
|
1101
|
-
return True, "Success"
|
1102
|
-
|
1103
|
-
|
1104
|
-
def _persist_new_geometry_columns(self, df, debug: bool = False) -> SuccessTuple:
|
1044
|
+
def _persist_new_special_columns(
|
1045
|
+
self,
|
1046
|
+
df: 'pd.DataFrame',
|
1047
|
+
dtypes: Optional[Dict[str, str]] = None,
|
1048
|
+
debug: bool = False,
|
1049
|
+
) -> mrsm.SuccessTuple:
|
1105
1050
|
"""
|
1106
|
-
Check for new
|
1051
|
+
Check for new special columns and update the parameters accordingly.
|
1107
1052
|
"""
|
1108
|
-
from meerschaum.utils.dataframe import
|
1109
|
-
|
1110
|
-
|
1111
|
-
|
1112
|
-
|
1113
|
-
|
1114
|
-
|
1115
|
-
|
1116
|
-
col
|
1117
|
-
|
1118
|
-
|
1119
|
-
|
1120
|
-
|
1053
|
+
from meerschaum.utils.dataframe import get_special_cols
|
1054
|
+
from meerschaum.utils.dtypes import is_dtype_special
|
1055
|
+
from meerschaum.utils.warnings import dprint
|
1056
|
+
|
1057
|
+
special_cols = get_special_cols(df)
|
1058
|
+
dtypes = dtypes or self.get_dtypes(debug=debug)
|
1059
|
+
existing_special_cols = {
|
1060
|
+
col: typ
|
1061
|
+
for col, typ in dtypes.items()
|
1062
|
+
if is_dtype_special(typ)
|
1063
|
+
}
|
1064
|
+
new_special_cols = {
|
1065
|
+
col: typ
|
1066
|
+
for col, typ in special_cols.items()
|
1067
|
+
if col not in existing_special_cols
|
1068
|
+
}
|
1069
|
+
self._cache_value('new_special_cols', new_special_cols, memory_only=True, debug=debug)
|
1070
|
+
if not new_special_cols:
|
1121
1071
|
return True, "Success"
|
1122
1072
|
|
1123
|
-
|
1124
|
-
|
1125
|
-
|
1126
|
-
new_cols_types = {}
|
1127
|
-
for col, (geometry_type, srid) in geometry_cols_types_srids.items():
|
1128
|
-
if col not in new_geometry_cols:
|
1129
|
-
continue
|
1130
|
-
|
1131
|
-
new_dtype = "geometry"
|
1132
|
-
modifier = ""
|
1133
|
-
if not srid and geometry_type.lower() == 'geometry':
|
1134
|
-
new_cols_types[col] = new_dtype
|
1135
|
-
continue
|
1136
|
-
|
1137
|
-
modifier = "["
|
1138
|
-
if geometry_type.lower() != 'geometry':
|
1139
|
-
modifier += f"{geometry_type}"
|
1140
|
-
|
1141
|
-
if srid:
|
1142
|
-
if modifier != '[':
|
1143
|
-
modifier += ", "
|
1144
|
-
modifier += f"{srid}"
|
1145
|
-
modifier += "]"
|
1146
|
-
new_cols_types[col] = f"{new_dtype}{modifier}"
|
1147
|
-
|
1148
|
-
dtypes.update(new_cols_types)
|
1149
|
-
self.parameters['dtypes'] = dtypes
|
1150
|
-
|
1151
|
-
if not self.temporary:
|
1152
|
-
edit_success, edit_msg = self.edit(interactive=False, debug=debug)
|
1153
|
-
if not edit_success:
|
1154
|
-
warn(f"Unable to update bytes dtypes for {self}:\n{edit_msg}")
|
1155
|
-
|
1156
|
-
return edit_success, edit_msg
|
1073
|
+
if debug:
|
1074
|
+
dprint(f"New special columns:\n{new_special_cols}")
|
1157
1075
|
|
1158
|
-
|
1076
|
+
self._clear_cache_key('_attributes_sync_time', debug=debug)
|
1077
|
+
return self.update_parameters({'dtypes': new_special_cols}, debug=debug)
|
meerschaum/core/Pipe/_verify.py
CHANGED
@@ -12,7 +12,7 @@ import time
|
|
12
12
|
import meerschaum as mrsm
|
13
13
|
from meerschaum.utils.typing import SuccessTuple, Any, Optional, Union, Tuple, Dict
|
14
14
|
from meerschaum.utils.warnings import warn, info
|
15
|
-
from meerschaum.
|
15
|
+
from meerschaum._internal.static import STATIC_CONFIG
|
16
16
|
|
17
17
|
|
18
18
|
def verify(
|
@@ -418,7 +418,7 @@ def verify(
|
|
418
418
|
retry_failed_batch = False
|
419
419
|
|
420
420
|
batch_msg_to_print = (
|
421
|
-
f"{make_header('Completed batch ' + batch_counter_str + ':')}\n{batch_msg}"
|
421
|
+
f"{make_header('Completed batch ' + batch_counter_str + ':', left_pad=0)}\n{batch_msg}"
|
422
422
|
)
|
423
423
|
mrsm.pprint((batch_success, batch_msg_to_print))
|
424
424
|
|
@@ -426,7 +426,7 @@ def verify(
|
|
426
426
|
info(f"Retrying batch {batch_counter_str}...")
|
427
427
|
retry_batch_success, retry_batch_msg = process_batch(batch)
|
428
428
|
retry_batch_msg_to_print = (
|
429
|
-
f"Retried {make_header('batch ' + batch_label)}\n{retry_batch_msg}"
|
429
|
+
f"Retried {make_header('batch ' + batch_label, left_pad=0)}\n{retry_batch_msg}"
|
430
430
|
)
|
431
431
|
mrsm.pprint((retry_batch_success, retry_batch_msg_to_print))
|
432
432
|
|
@@ -587,7 +587,7 @@ def get_bound_interval(self, debug: bool = False) -> Union[timedelta, int, None]
|
|
587
587
|
if not dt_col:
|
588
588
|
return bound_time_value
|
589
589
|
|
590
|
-
dt_typ = self.dtypes.get(dt_col, '
|
590
|
+
dt_typ = self.dtypes.get(dt_col, 'datetime')
|
591
591
|
if 'int' in dt_typ.lower():
|
592
592
|
return int(bound_time_value)
|
593
593
|
|
@@ -22,19 +22,13 @@ from meerschaum.utils.typing import (
|
|
22
22
|
Union,
|
23
23
|
)
|
24
24
|
from meerschaum.utils.warnings import error, warn
|
25
|
-
from meerschaum.config import get_config
|
26
|
-
from meerschaum.config._paths import (
|
27
|
-
PLUGINS_RESOURCES_PATH,
|
28
|
-
PLUGINS_ARCHIVES_RESOURCES_PATH,
|
29
|
-
PLUGINS_TEMP_RESOURCES_PATH,
|
30
|
-
VIRTENV_RESOURCES_PATH,
|
31
|
-
PLUGINS_DIR_PATHS,
|
32
|
-
)
|
33
25
|
_tmpversion = None
|
34
26
|
_ongoing_installations = set()
|
35
27
|
|
28
|
+
|
36
29
|
class Plugin:
|
37
30
|
"""Handle packaging of Meerschaum plugins."""
|
31
|
+
|
38
32
|
def __init__(
|
39
33
|
self,
|
40
34
|
name: str,
|
@@ -47,7 +41,8 @@ class Plugin:
|
|
47
41
|
repo_connector: Optional['mrsm.connectors.api.APIConnector'] = None,
|
48
42
|
repo: Union['mrsm.connectors.api.APIConnector', str, None] = None,
|
49
43
|
):
|
50
|
-
from meerschaum.
|
44
|
+
from meerschaum._internal.static import STATIC_CONFIG
|
45
|
+
from meerschaum.config.paths import PLUGINS_ARCHIVES_RESOURCES_PATH, VIRTENV_RESOURCES_PATH
|
51
46
|
sep = STATIC_CONFIG['plugins']['repo_separator']
|
52
47
|
_repo = None
|
53
48
|
if sep in name:
|
@@ -117,8 +112,10 @@ class Plugin:
|
|
117
112
|
if '_module' not in self.__dict__ or self.__dict__.get('_module', None) is None:
|
118
113
|
if self.__file__ is None:
|
119
114
|
return None
|
115
|
+
|
120
116
|
from meerschaum.plugins import import_plugins
|
121
117
|
self._module = import_plugins(str(self), warn=False)
|
118
|
+
|
122
119
|
return self._module
|
123
120
|
|
124
121
|
|
@@ -130,6 +127,8 @@ class Plugin:
|
|
130
127
|
if self.__dict__.get('_module', None) is not None:
|
131
128
|
return self.module.__file__
|
132
129
|
|
130
|
+
from meerschaum.config.paths import PLUGINS_RESOURCES_PATH
|
131
|
+
|
133
132
|
potential_dir = PLUGINS_RESOURCES_PATH / self.name
|
134
133
|
if (
|
135
134
|
potential_dir.exists()
|
@@ -295,6 +294,7 @@ class Plugin:
|
|
295
294
|
from meerschaum.utils.packages import attempt_import, determine_version, reload_meerschaum
|
296
295
|
from meerschaum.utils.venv import init_venv
|
297
296
|
from meerschaum.utils.misc import safely_extract_tar
|
297
|
+
from meerschaum.config.paths import PLUGINS_TEMP_RESOURCES_PATH, PLUGINS_DIR_PATHS
|
298
298
|
old_cwd = os.getcwd()
|
299
299
|
old_version = ''
|
300
300
|
new_version = ''
|
@@ -365,6 +365,10 @@ class Plugin:
|
|
365
365
|
### Determine where to permanently store the new plugin.
|
366
366
|
plugin_installation_dir_path = PLUGINS_DIR_PATHS[0]
|
367
367
|
for path in PLUGINS_DIR_PATHS:
|
368
|
+
if not path.exists():
|
369
|
+
warn(f"Plugins path does not exist: {path}", stack=False)
|
370
|
+
continue
|
371
|
+
|
368
372
|
files_in_plugins_dir = os.listdir(path)
|
369
373
|
if (
|
370
374
|
self.name in files_in_plugins_dir
|
@@ -722,7 +726,7 @@ class Plugin:
|
|
722
726
|
"""
|
723
727
|
from meerschaum.utils.warnings import warn
|
724
728
|
from meerschaum.config import get_config
|
725
|
-
from meerschaum.
|
729
|
+
from meerschaum._internal.static import STATIC_CONFIG
|
726
730
|
from meerschaum.connectors.parse import is_valid_connector_keys
|
727
731
|
plugins = []
|
728
732
|
_deps = self.get_dependencies(debug=debug)
|
@@ -731,7 +735,7 @@ class Plugin:
|
|
731
735
|
_d[len('plugin:'):] for _d in _deps
|
732
736
|
if _d.startswith('plugin:') and len(_d) > len('plugin:')
|
733
737
|
]
|
734
|
-
default_repo_keys = get_config('meerschaum', '
|
738
|
+
default_repo_keys = get_config('meerschaum', 'repository')
|
735
739
|
skipped_repo_keys = set()
|
736
740
|
|
737
741
|
for _plugin_name in plugin_names:
|
@@ -953,7 +957,7 @@ class Plugin:
|
|
953
957
|
"""
|
954
958
|
Include the repo keys with the plugin's name.
|
955
959
|
"""
|
956
|
-
from meerschaum.
|
960
|
+
from meerschaum._internal.static import STATIC_CONFIG
|
957
961
|
sep = STATIC_CONFIG['plugins']['repo_separator']
|
958
962
|
return self.name + sep + str(self.repo_connector)
|
959
963
|
|