meerschaum 2.9.5__py3-none-any.whl → 3.0.0rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/__init__.py +5 -2
- meerschaum/_internal/__init__.py +1 -0
- meerschaum/_internal/arguments/_parse_arguments.py +4 -4
- meerschaum/_internal/arguments/_parser.py +19 -2
- meerschaum/_internal/docs/index.py +49 -2
- meerschaum/_internal/entry.py +6 -6
- meerschaum/_internal/shell/Shell.py +1 -1
- meerschaum/_internal/static.py +356 -0
- meerschaum/actions/api.py +12 -2
- meerschaum/actions/bootstrap.py +7 -7
- meerschaum/actions/edit.py +142 -18
- meerschaum/actions/register.py +137 -6
- meerschaum/actions/show.py +117 -29
- meerschaum/actions/stop.py +4 -1
- meerschaum/actions/sync.py +1 -1
- meerschaum/actions/tag.py +9 -8
- meerschaum/actions/verify.py +5 -8
- meerschaum/api/__init__.py +11 -3
- meerschaum/api/_events.py +39 -2
- meerschaum/api/_oauth2.py +118 -8
- meerschaum/api/_tokens.py +102 -0
- meerschaum/api/dash/__init__.py +0 -3
- meerschaum/api/dash/callbacks/custom.py +2 -2
- meerschaum/api/dash/callbacks/dashboard.py +103 -19
- meerschaum/api/dash/callbacks/plugins.py +0 -1
- meerschaum/api/dash/callbacks/register.py +1 -1
- meerschaum/api/dash/callbacks/settings/__init__.py +1 -0
- meerschaum/api/dash/callbacks/settings/password_reset.py +2 -2
- meerschaum/api/dash/callbacks/settings/tokens.py +388 -0
- meerschaum/api/dash/components.py +30 -8
- meerschaum/api/dash/keys.py +19 -93
- meerschaum/api/dash/pages/dashboard.py +1 -20
- meerschaum/api/dash/pages/settings/__init__.py +1 -0
- meerschaum/api/dash/pages/settings/password_reset.py +1 -1
- meerschaum/api/dash/pages/settings/tokens.py +55 -0
- meerschaum/api/dash/pipes.py +94 -59
- meerschaum/api/dash/sessions.py +12 -0
- meerschaum/api/dash/tokens.py +606 -0
- meerschaum/api/dash/websockets.py +1 -1
- meerschaum/api/dash/webterm.py +4 -0
- meerschaum/api/models/__init__.py +23 -3
- meerschaum/api/models/_actions.py +22 -0
- meerschaum/api/models/_pipes.py +85 -7
- meerschaum/api/models/_tokens.py +81 -0
- meerschaum/api/resources/templates/termpage.html +12 -0
- meerschaum/api/routes/__init__.py +1 -0
- meerschaum/api/routes/_actions.py +3 -4
- meerschaum/api/routes/_connectors.py +3 -7
- meerschaum/api/routes/_jobs.py +14 -35
- meerschaum/api/routes/_login.py +49 -12
- meerschaum/api/routes/_misc.py +5 -10
- meerschaum/api/routes/_pipes.py +173 -140
- meerschaum/api/routes/_plugins.py +38 -28
- meerschaum/api/routes/_tokens.py +236 -0
- meerschaum/api/routes/_users.py +47 -35
- meerschaum/api/routes/_version.py +3 -3
- meerschaum/config/__init__.py +43 -20
- meerschaum/config/_default.py +43 -6
- meerschaum/config/_edit.py +28 -24
- meerschaum/config/_environment.py +1 -1
- meerschaum/config/_patch.py +6 -6
- meerschaum/config/_paths.py +5 -1
- meerschaum/config/_read_config.py +65 -34
- meerschaum/config/_sync.py +6 -3
- meerschaum/config/_version.py +1 -1
- meerschaum/config/stack/__init__.py +31 -11
- meerschaum/config/static.py +18 -0
- meerschaum/connectors/_Connector.py +10 -4
- meerschaum/connectors/__init__.py +4 -20
- meerschaum/connectors/api/_APIConnector.py +34 -6
- meerschaum/connectors/api/_actions.py +2 -2
- meerschaum/connectors/api/_jobs.py +1 -1
- meerschaum/connectors/api/_login.py +33 -7
- meerschaum/connectors/api/_misc.py +2 -2
- meerschaum/connectors/api/_pipes.py +16 -31
- meerschaum/connectors/api/_plugins.py +2 -2
- meerschaum/connectors/api/_request.py +1 -1
- meerschaum/connectors/api/_tokens.py +146 -0
- meerschaum/connectors/api/_users.py +70 -58
- meerschaum/connectors/instance/_InstanceConnector.py +83 -0
- meerschaum/connectors/instance/__init__.py +10 -0
- meerschaum/connectors/instance/_pipes.py +442 -0
- meerschaum/connectors/instance/_plugins.py +151 -0
- meerschaum/connectors/instance/_tokens.py +296 -0
- meerschaum/connectors/instance/_users.py +181 -0
- meerschaum/connectors/parse.py +4 -1
- meerschaum/connectors/sql/_SQLConnector.py +8 -5
- meerschaum/connectors/sql/_cli.py +12 -11
- meerschaum/connectors/sql/_create_engine.py +9 -168
- meerschaum/connectors/sql/_fetch.py +2 -18
- meerschaum/connectors/sql/_pipes.py +156 -190
- meerschaum/connectors/sql/_plugins.py +29 -0
- meerschaum/connectors/sql/_sql.py +46 -21
- meerschaum/connectors/sql/_users.py +29 -2
- meerschaum/connectors/sql/tables/__init__.py +1 -1
- meerschaum/connectors/valkey/_ValkeyConnector.py +2 -4
- meerschaum/connectors/valkey/_pipes.py +53 -26
- meerschaum/connectors/valkey/_plugins.py +2 -26
- meerschaum/core/Pipe/__init__.py +59 -19
- meerschaum/core/Pipe/_attributes.py +412 -90
- meerschaum/core/Pipe/_bootstrap.py +54 -24
- meerschaum/core/Pipe/_data.py +96 -18
- meerschaum/core/Pipe/_dtypes.py +48 -18
- meerschaum/core/Pipe/_edit.py +14 -4
- meerschaum/core/Pipe/_fetch.py +1 -1
- meerschaum/core/Pipe/_show.py +5 -5
- meerschaum/core/Pipe/_sync.py +118 -193
- meerschaum/core/Pipe/_verify.py +4 -4
- meerschaum/{plugins → core/Plugin}/_Plugin.py +9 -11
- meerschaum/core/Plugin/__init__.py +1 -1
- meerschaum/core/Token/_Token.py +220 -0
- meerschaum/core/Token/__init__.py +12 -0
- meerschaum/core/User/_User.py +34 -8
- meerschaum/core/User/__init__.py +9 -1
- meerschaum/core/__init__.py +1 -0
- meerschaum/jobs/_Job.py +3 -2
- meerschaum/jobs/__init__.py +3 -2
- meerschaum/jobs/systemd.py +1 -1
- meerschaum/models/__init__.py +35 -0
- meerschaum/models/pipes.py +247 -0
- meerschaum/models/tokens.py +38 -0
- meerschaum/models/users.py +26 -0
- meerschaum/plugins/__init__.py +22 -7
- meerschaum/plugins/bootstrap.py +2 -1
- meerschaum/utils/_get_pipes.py +68 -27
- meerschaum/utils/daemon/Daemon.py +2 -1
- meerschaum/utils/daemon/__init__.py +30 -2
- meerschaum/utils/dataframe.py +473 -81
- meerschaum/utils/debug.py +15 -15
- meerschaum/utils/dtypes/__init__.py +473 -34
- meerschaum/utils/dtypes/sql.py +368 -28
- meerschaum/utils/formatting/__init__.py +1 -1
- meerschaum/utils/formatting/_pipes.py +5 -4
- meerschaum/utils/formatting/_shell.py +11 -9
- meerschaum/utils/misc.py +246 -148
- meerschaum/utils/packages/__init__.py +10 -27
- meerschaum/utils/packages/_packages.py +41 -34
- meerschaum/utils/pipes.py +181 -0
- meerschaum/utils/process.py +1 -1
- meerschaum/utils/prompt.py +3 -1
- meerschaum/utils/schedule.py +2 -1
- meerschaum/utils/sql.py +121 -44
- meerschaum/utils/typing.py +1 -4
- meerschaum/utils/venv/_Venv.py +2 -2
- meerschaum/utils/venv/__init__.py +5 -7
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/METADATA +92 -96
- meerschaum-3.0.0rc2.dist-info/RECORD +283 -0
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/WHEEL +1 -1
- meerschaum-3.0.0rc2.dist-info/licenses/NOTICE +2 -0
- meerschaum/api/models/_interfaces.py +0 -15
- meerschaum/api/models/_locations.py +0 -15
- meerschaum/api/models/_metrics.py +0 -15
- meerschaum/config/static/__init__.py +0 -186
- meerschaum-2.9.5.dist-info/RECORD +0 -263
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/licenses/LICENSE +0 -0
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/top_level.txt +0 -0
- {meerschaum-2.9.5.dist-info → meerschaum-3.0.0rc2.dist-info}/zip-safe +0 -0
meerschaum/core/Pipe/_sync.py
CHANGED
@@ -13,7 +13,7 @@ import time
|
|
13
13
|
import threading
|
14
14
|
import multiprocessing
|
15
15
|
import functools
|
16
|
-
from datetime import datetime, timedelta
|
16
|
+
from datetime import datetime, timedelta, timezone
|
17
17
|
from typing import TYPE_CHECKING
|
18
18
|
|
19
19
|
import meerschaum as mrsm
|
@@ -28,6 +28,7 @@ from meerschaum.utils.typing import (
|
|
28
28
|
List,
|
29
29
|
)
|
30
30
|
from meerschaum.utils.warnings import warn, error
|
31
|
+
from meerschaum._internal.static import STATIC_CONFIG
|
31
32
|
|
32
33
|
if TYPE_CHECKING:
|
33
34
|
pd = mrsm.attempt_import('pandas')
|
@@ -42,6 +43,7 @@ def sync(
|
|
42
43
|
pd.DataFrame,
|
43
44
|
Dict[str, List[Any]],
|
44
45
|
List[Dict[str, Any]],
|
46
|
+
str,
|
45
47
|
InferFetch
|
46
48
|
] = InferFetch,
|
47
49
|
begin: Union[datetime, int, str, None] = '',
|
@@ -71,6 +73,7 @@ def sync(
|
|
71
73
|
----------
|
72
74
|
df: Union[None, pd.DataFrame, Dict[str, List[Any]]], default None
|
73
75
|
An optional DataFrame to sync into the pipe. Defaults to `None`.
|
76
|
+
If `df` is a string, it will be parsed via `meerschaum.utils.dataframe.parse_simple_lines()`.
|
74
77
|
|
75
78
|
begin: Union[datetime, int, str, None], default ''
|
76
79
|
Optionally specify the earliest datetime to search for data.
|
@@ -134,6 +137,7 @@ def sync(
|
|
134
137
|
from meerschaum.utils.misc import df_is_chunk_generator, filter_keywords, filter_arguments
|
135
138
|
from meerschaum.utils.pool import get_pool
|
136
139
|
from meerschaum.config import get_config
|
140
|
+
from meerschaum.utils.dtypes import are_dtypes_equal, get_current_timestamp
|
137
141
|
|
138
142
|
if (callback is not None or error_callback is not None) and blocking:
|
139
143
|
warn("Callback functions are only executed when blocking = False. Ignoring...")
|
@@ -161,8 +165,8 @@ def sync(
|
|
161
165
|
'safe_copy': True,
|
162
166
|
})
|
163
167
|
|
164
|
-
|
165
|
-
self.
|
168
|
+
self._invalidate_cache(debug=debug)
|
169
|
+
self._sync_ts = get_current_timestamp('ms')
|
166
170
|
|
167
171
|
def _sync(
|
168
172
|
p: mrsm.Pipe,
|
@@ -170,11 +174,12 @@ def sync(
|
|
170
174
|
'pd.DataFrame',
|
171
175
|
Dict[str, List[Any]],
|
172
176
|
List[Dict[str, Any]],
|
177
|
+
str,
|
173
178
|
InferFetch
|
174
179
|
] = InferFetch,
|
175
180
|
) -> SuccessTuple:
|
176
181
|
if df is None:
|
177
|
-
p.
|
182
|
+
p._invalidate_cache(debug=debug)
|
178
183
|
return (
|
179
184
|
False,
|
180
185
|
f"You passed `None` instead of data into `sync()` for {p}.\n"
|
@@ -186,9 +191,13 @@ def sync(
|
|
186
191
|
register_success, register_msg = p.register(debug=debug)
|
187
192
|
if not register_success:
|
188
193
|
if 'already' not in register_msg:
|
189
|
-
p.
|
194
|
+
p._invalidate_cache(debug=debug)
|
190
195
|
return register_success, register_msg
|
191
196
|
|
197
|
+
if isinstance(df, str):
|
198
|
+
from meerschaum.utils.dataframe import parse_simple_lines
|
199
|
+
df = parse_simple_lines(df)
|
200
|
+
|
192
201
|
### If connector is a plugin with a `sync()` method, return that instead.
|
193
202
|
### If the plugin does not have a `sync()` method but does have a `fetch()` method,
|
194
203
|
### use that instead.
|
@@ -203,13 +212,13 @@ def sync(
|
|
203
212
|
msg = f"{p} does not have a valid connector."
|
204
213
|
if p.connector_keys.startswith('plugin:'):
|
205
214
|
msg += f"\n Perhaps {p.connector_keys} has a syntax error?"
|
206
|
-
p.
|
215
|
+
p._invalidate_cache(debug=debug)
|
207
216
|
return False, msg
|
208
217
|
except Exception:
|
209
|
-
p.
|
218
|
+
p._invalidate_cache(debug=debug)
|
210
219
|
return False, f"Unable to create the connector for {p}."
|
211
220
|
|
212
|
-
### Sync in place if
|
221
|
+
### Sync in place if possible.
|
213
222
|
if (
|
214
223
|
str(self.connector) == str(self.instance_connector)
|
215
224
|
and
|
@@ -220,7 +229,7 @@ def sync(
|
|
220
229
|
get_config('system', 'experimental', 'inplace_sync')
|
221
230
|
):
|
222
231
|
with Venv(get_connector_plugin(self.instance_connector)):
|
223
|
-
p.
|
232
|
+
p._invalidate_cache(debug=debug)
|
224
233
|
_args, _kwargs = filter_arguments(
|
225
234
|
p.instance_connector.sync_pipe_inplace,
|
226
235
|
p,
|
@@ -243,7 +252,7 @@ def sync(
|
|
243
252
|
**kw
|
244
253
|
)
|
245
254
|
return_tuple = p.connector.sync(*_args, **_kwargs)
|
246
|
-
p.
|
255
|
+
p._invalidate_cache(debug=debug)
|
247
256
|
if not isinstance(return_tuple, tuple):
|
248
257
|
return_tuple = (
|
249
258
|
False,
|
@@ -256,7 +265,7 @@ def sync(
|
|
256
265
|
msg = f"Failed to sync {p} with exception: '" + str(e) + "'"
|
257
266
|
if debug:
|
258
267
|
error(msg, silent=False)
|
259
|
-
p.
|
268
|
+
p._invalidate_cache(debug=debug)
|
260
269
|
return False, msg
|
261
270
|
|
262
271
|
### Fetch the dataframe from the connector's `fetch()` method.
|
@@ -281,7 +290,7 @@ def sync(
|
|
281
290
|
df = None
|
282
291
|
|
283
292
|
if df is None:
|
284
|
-
p.
|
293
|
+
p._invalidate_cache(debug=debug)
|
285
294
|
return False, f"No data were fetched for {p}."
|
286
295
|
|
287
296
|
if isinstance(df, list):
|
@@ -295,7 +304,7 @@ def sync(
|
|
295
304
|
return success, message
|
296
305
|
|
297
306
|
if df is True:
|
298
|
-
p.
|
307
|
+
p._invalidate_cache(debug=debug)
|
299
308
|
return True, f"{p} is being synced in parallel."
|
300
309
|
|
301
310
|
### CHECKPOINT: Retrieved the DataFrame.
|
@@ -339,7 +348,7 @@ def sync(
|
|
339
348
|
+ f"(attempt {_chunk_attempts} / {_max_chunk_attempts}).\n"
|
340
349
|
+ f"Sleeping for {_sleep_seconds} second"
|
341
350
|
+ ('s' if _sleep_seconds != 1 else '')
|
342
|
-
+ ":\n{_chunk_msg}"
|
351
|
+
+ f":\n{_chunk_msg}"
|
343
352
|
),
|
344
353
|
stack=False,
|
345
354
|
)
|
@@ -392,19 +401,45 @@ def sync(
|
|
392
401
|
return success, msg
|
393
402
|
|
394
403
|
### Cast to a dataframe and ensure datatypes are what we expect.
|
395
|
-
|
404
|
+
dtypes = p.get_dtypes(debug=debug)
|
405
|
+
df = p.enforce_dtypes(
|
396
406
|
df,
|
397
407
|
chunksize=chunksize,
|
398
408
|
enforce=enforce_dtypes,
|
409
|
+
dtypes=dtypes,
|
399
410
|
debug=debug,
|
400
411
|
)
|
412
|
+
if p.autotime:
|
413
|
+
dt_col = p.columns.get('datetime', None)
|
414
|
+
ts_col = dt_col or mrsm.get_config(
|
415
|
+
'pipes', 'autotime', 'column_name_if_datetime_missing'
|
416
|
+
)
|
417
|
+
ts_typ = dtypes.get(ts_col, 'datetime') if ts_col else 'datetime'
|
418
|
+
if ts_col and hasattr(df, 'columns') and ts_col not in df.columns:
|
419
|
+
precision = p.get_precision(debug=debug)
|
420
|
+
now = get_current_timestamp(
|
421
|
+
precision_unit=precision.get(
|
422
|
+
'unit',
|
423
|
+
STATIC_CONFIG['dtypes']['datetime']['default_precision_unit']
|
424
|
+
),
|
425
|
+
precision_interval=precision.get('interval', 1),
|
426
|
+
round_to=(precision.get('round_to', 'down')),
|
427
|
+
as_int=(are_dtypes_equal(ts_typ, 'int')),
|
428
|
+
)
|
429
|
+
if debug:
|
430
|
+
dprint(f"Adding current timestamp to dataframe synced to {p}: {now}")
|
401
431
|
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
self.
|
407
|
-
|
432
|
+
df[ts_col] = now
|
433
|
+
kw['check_existing'] = dt_col is not None
|
434
|
+
|
435
|
+
### Capture special columns.
|
436
|
+
capture_success, capture_msg = self._persist_new_special_columns(
|
437
|
+
df,
|
438
|
+
dtypes=dtypes,
|
439
|
+
debug=debug,
|
440
|
+
)
|
441
|
+
if not capture_success:
|
442
|
+
warn(f"Failed to capture new special columns for {self}:\n{capture_msg}")
|
408
443
|
|
409
444
|
if debug:
|
410
445
|
dprint(
|
@@ -444,18 +479,17 @@ def sync(
|
|
444
479
|
|
445
480
|
### CHECKPOINT: Finished syncing. Handle caching.
|
446
481
|
_checkpoint(**kw)
|
447
|
-
if
|
482
|
+
if p.cache_pipe is not None:
|
448
483
|
if debug:
|
449
484
|
dprint("Caching retrieved dataframe.", **kw)
|
450
|
-
|
451
|
-
|
452
|
-
|
485
|
+
_sync_cache_tuple = p.cache_pipe.sync(df, debug=debug, **kw)
|
486
|
+
if not _sync_cache_tuple[0]:
|
487
|
+
warn(f"Failed to sync local cache for {self}.")
|
453
488
|
|
454
|
-
|
489
|
+
p._invalidate_cache(debug=debug)
|
455
490
|
return return_tuple
|
456
491
|
|
457
492
|
if blocking:
|
458
|
-
self._exists = None
|
459
493
|
return _sync(self, df=df)
|
460
494
|
|
461
495
|
from meerschaum.utils.threading import Thread
|
@@ -480,10 +514,10 @@ def sync(
|
|
480
514
|
)
|
481
515
|
thread.start()
|
482
516
|
except Exception as e:
|
483
|
-
self.
|
517
|
+
self._invalidate_cache(debug=debug)
|
484
518
|
return False, str(e)
|
485
519
|
|
486
|
-
self.
|
520
|
+
self._invalidate_cache(debug=debug)
|
487
521
|
return True, f"Spawned asyncronous sync for {self}."
|
488
522
|
|
489
523
|
|
@@ -529,7 +563,8 @@ def get_sync_time(
|
|
529
563
|
"""
|
530
564
|
from meerschaum.utils.venv import Venv
|
531
565
|
from meerschaum.connectors import get_connector_plugin
|
532
|
-
from meerschaum.utils.misc import
|
566
|
+
from meerschaum.utils.misc import filter_keywords
|
567
|
+
from meerschaum.utils.dtypes import round_time
|
533
568
|
from meerschaum.utils.warnings import warn
|
534
569
|
|
535
570
|
if not self.columns.get('datetime', None):
|
@@ -588,17 +623,16 @@ def exists(
|
|
588
623
|
import time
|
589
624
|
from meerschaum.utils.venv import Venv
|
590
625
|
from meerschaum.connectors import get_connector_plugin
|
591
|
-
from meerschaum.config import STATIC_CONFIG
|
592
626
|
from meerschaum.utils.debug import dprint
|
593
627
|
now = time.perf_counter()
|
594
|
-
|
628
|
+
cache_seconds = mrsm.get_config('pipes', 'sync', 'exists_cache_seconds')
|
595
629
|
|
596
630
|
_exists = self.__dict__.get('_exists', None)
|
597
631
|
if _exists:
|
598
632
|
exists_timestamp = self.__dict__.get('_exists_timestamp', None)
|
599
633
|
if exists_timestamp is not None:
|
600
634
|
delta = now - exists_timestamp
|
601
|
-
if delta <
|
635
|
+
if delta < cache_seconds:
|
602
636
|
if debug:
|
603
637
|
dprint(f"Returning cached `exists` for {self} ({round(delta, 2)} seconds old).")
|
604
638
|
return _exists
|
@@ -663,7 +697,6 @@ def filter_existing(
|
|
663
697
|
from meerschaum.utils.warnings import warn
|
664
698
|
from meerschaum.utils.debug import dprint
|
665
699
|
from meerschaum.utils.packages import attempt_import, import_pandas
|
666
|
-
from meerschaum.utils.misc import round_time
|
667
700
|
from meerschaum.utils.dataframe import (
|
668
701
|
filter_unseen_df,
|
669
702
|
add_missing_cols_to_df,
|
@@ -675,6 +708,7 @@ def filter_existing(
|
|
675
708
|
to_datetime,
|
676
709
|
are_dtypes_equal,
|
677
710
|
value_is_null,
|
711
|
+
round_time,
|
678
712
|
)
|
679
713
|
from meerschaum.config import get_config
|
680
714
|
pd = import_pandas()
|
@@ -690,9 +724,13 @@ def filter_existing(
|
|
690
724
|
merge = pd.merge
|
691
725
|
NA = pd.NA
|
692
726
|
|
693
|
-
|
694
|
-
|
695
|
-
|
727
|
+
parameters = self.parameters
|
728
|
+
pipe_columns = parameters.get('columns', {})
|
729
|
+
primary_key = pipe_columns.get('primary', None)
|
730
|
+
dt_col = pipe_columns.get('datetime', None)
|
731
|
+
dt_type = parameters.get('dtypes', {}).get(dt_col, 'datetime') if dt_col else None
|
732
|
+
autoincrement = parameters.get('autoincrement', False)
|
733
|
+
autotime = parameters.get('autotime', False)
|
696
734
|
|
697
735
|
if primary_key and autoincrement and df is not None and primary_key in df.columns:
|
698
736
|
if safe_copy:
|
@@ -702,10 +740,18 @@ def filter_existing(
|
|
702
740
|
del df[primary_key]
|
703
741
|
_ = self.columns.pop(primary_key, None)
|
704
742
|
|
743
|
+
if dt_col and autotime and df is not None and dt_col in df.columns:
|
744
|
+
if safe_copy:
|
745
|
+
df = df.copy()
|
746
|
+
safe_copy = False
|
747
|
+
if df[dt_col].isnull().all():
|
748
|
+
del df[dt_col]
|
749
|
+
_ = self.columns.pop(dt_col, None)
|
750
|
+
|
705
751
|
def get_empty_df():
|
706
752
|
empty_df = pd.DataFrame([])
|
707
753
|
dtypes = dict(df.dtypes) if df is not None else {}
|
708
|
-
dtypes.update(self.dtypes)
|
754
|
+
dtypes.update(self.dtypes) if self.enforce else {}
|
709
755
|
pd_dtypes = {
|
710
756
|
col: to_pandas_dtype(str(typ))
|
711
757
|
for col, typ in dtypes.items()
|
@@ -721,15 +767,17 @@ def filter_existing(
|
|
721
767
|
|
722
768
|
### begin is the oldest data in the new dataframe
|
723
769
|
begin, end = None, None
|
724
|
-
dt_col = pipe_columns.get('datetime', None)
|
725
|
-
primary_key = pipe_columns.get('primary', None)
|
726
|
-
dt_type = self.dtypes.get(dt_col, 'datetime64[ns, UTC]') if dt_col else None
|
727
770
|
|
728
771
|
if autoincrement and primary_key == dt_col and dt_col not in df.columns:
|
729
772
|
if enforce_dtypes:
|
730
773
|
df = self.enforce_dtypes(df, chunksize=chunksize, debug=debug)
|
731
774
|
return df, get_empty_df(), df
|
732
775
|
|
776
|
+
if autotime and dt_col and dt_col not in df.columns:
|
777
|
+
if enforce_dtypes:
|
778
|
+
df = self.enforce_dtypes(df, chunksize=chunksize, debug=debug)
|
779
|
+
return df, get_empty_df(), df
|
780
|
+
|
733
781
|
try:
|
734
782
|
min_dt_val = df[dt_col].min(skipna=True) if dt_col and dt_col in df.columns else None
|
735
783
|
if is_dask and min_dt_val is not None:
|
@@ -846,7 +894,8 @@ def filter_existing(
|
|
846
894
|
and col in backtrack_df.columns
|
847
895
|
)
|
848
896
|
] if not primary_key else [primary_key]
|
849
|
-
|
897
|
+
|
898
|
+
self_dtypes = self.get_dtypes(debug=debug) if self.enforce else {}
|
850
899
|
on_cols_dtypes = {
|
851
900
|
col: to_pandas_dtype(typ)
|
852
901
|
for col, typ in self_dtypes.items()
|
@@ -999,160 +1048,36 @@ def get_num_workers(self, workers: Optional[int] = None) -> int:
|
|
999
1048
|
)
|
1000
1049
|
|
1001
1050
|
|
1002
|
-
def
|
1003
|
-
|
1004
|
-
|
1005
|
-
|
1006
|
-
|
1007
|
-
|
1008
|
-
existing_numeric_cols = [col for col, typ in self.dtypes.items() if typ.startswith('numeric')]
|
1009
|
-
new_numeric_cols = [col for col in numeric_cols if col not in existing_numeric_cols]
|
1010
|
-
if not new_numeric_cols:
|
1011
|
-
return True, "Success"
|
1012
|
-
|
1013
|
-
self._attributes_sync_time = None
|
1014
|
-
dtypes = self.parameters.get('dtypes', {})
|
1015
|
-
dtypes.update({col: 'numeric' for col in new_numeric_cols})
|
1016
|
-
self.parameters['dtypes'] = dtypes
|
1017
|
-
if not self.temporary:
|
1018
|
-
edit_success, edit_msg = self.edit(interactive=False, debug=debug)
|
1019
|
-
if not edit_success:
|
1020
|
-
warn(f"Unable to update NUMERIC dtypes for {self}:\n{edit_msg}")
|
1021
|
-
|
1022
|
-
return edit_success, edit_msg
|
1023
|
-
|
1024
|
-
return True, "Success"
|
1025
|
-
|
1026
|
-
|
1027
|
-
def _persist_new_uuid_columns(self, df, debug: bool = False) -> SuccessTuple:
|
1028
|
-
"""
|
1029
|
-
Check for new numeric columns and update the parameters.
|
1030
|
-
"""
|
1031
|
-
from meerschaum.utils.dataframe import get_uuid_cols
|
1032
|
-
uuid_cols = get_uuid_cols(df)
|
1033
|
-
existing_uuid_cols = [col for col, typ in self.dtypes.items() if typ == 'uuid']
|
1034
|
-
new_uuid_cols = [col for col in uuid_cols if col not in existing_uuid_cols]
|
1035
|
-
if not new_uuid_cols:
|
1036
|
-
return True, "Success"
|
1037
|
-
|
1038
|
-
self._attributes_sync_time = None
|
1039
|
-
dtypes = self.parameters.get('dtypes', {})
|
1040
|
-
dtypes.update({col: 'uuid' for col in new_uuid_cols})
|
1041
|
-
self.parameters['dtypes'] = dtypes
|
1042
|
-
if not self.temporary:
|
1043
|
-
edit_success, edit_msg = self.edit(interactive=False, debug=debug)
|
1044
|
-
if not edit_success:
|
1045
|
-
warn(f"Unable to update UUID dtypes for {self}:\n{edit_msg}")
|
1046
|
-
|
1047
|
-
return edit_success, edit_msg
|
1048
|
-
|
1049
|
-
return True, "Success"
|
1050
|
-
|
1051
|
-
|
1052
|
-
def _persist_new_json_columns(self, df, debug: bool = False) -> SuccessTuple:
|
1053
|
-
"""
|
1054
|
-
Check for new JSON columns and update the parameters.
|
1055
|
-
"""
|
1056
|
-
from meerschaum.utils.dataframe import get_json_cols
|
1057
|
-
json_cols = get_json_cols(df)
|
1058
|
-
existing_json_cols = [col for col, typ in self.dtypes.items() if typ == 'json']
|
1059
|
-
new_json_cols = [col for col in json_cols if col not in existing_json_cols]
|
1060
|
-
if not new_json_cols:
|
1061
|
-
return True, "Success"
|
1062
|
-
|
1063
|
-
self._attributes_sync_time = None
|
1064
|
-
dtypes = self.parameters.get('dtypes', {})
|
1065
|
-
dtypes.update({col: 'json' for col in new_json_cols})
|
1066
|
-
self.parameters['dtypes'] = dtypes
|
1067
|
-
|
1068
|
-
if not self.temporary:
|
1069
|
-
edit_success, edit_msg = self.edit(interactive=False, debug=debug)
|
1070
|
-
if not edit_success:
|
1071
|
-
warn(f"Unable to update JSON dtypes for {self}:\n{edit_msg}")
|
1072
|
-
|
1073
|
-
return edit_success, edit_msg
|
1074
|
-
|
1075
|
-
return True, "Success"
|
1076
|
-
|
1077
|
-
|
1078
|
-
def _persist_new_bytes_columns(self, df, debug: bool = False) -> SuccessTuple:
|
1051
|
+
def _persist_new_special_columns(
|
1052
|
+
self,
|
1053
|
+
df: 'pd.DataFrame',
|
1054
|
+
dtypes: Optional[Dict[str, str]] = None,
|
1055
|
+
debug: bool = False,
|
1056
|
+
) -> mrsm.SuccessTuple:
|
1079
1057
|
"""
|
1080
|
-
Check for new
|
1058
|
+
Check for new special columns and update the parameters accordingly.
|
1081
1059
|
"""
|
1082
|
-
from meerschaum.utils.dataframe import
|
1083
|
-
|
1084
|
-
|
1085
|
-
|
1086
|
-
|
1060
|
+
from meerschaum.utils.dataframe import get_special_cols
|
1061
|
+
from meerschaum.utils.dtypes import dtype_is_special
|
1062
|
+
from meerschaum.utils.warnings import dprint
|
1063
|
+
|
1064
|
+
special_cols = get_special_cols(df)
|
1065
|
+
dtypes = dtypes or self.get_dtypes(debug=debug)
|
1066
|
+
existing_special_cols = {
|
1067
|
+
col: typ
|
1068
|
+
for col, typ in dtypes.items()
|
1069
|
+
if dtype_is_special(typ)
|
1070
|
+
}
|
1071
|
+
new_special_cols = {
|
1072
|
+
col: typ
|
1073
|
+
for col, typ in special_cols.items()
|
1074
|
+
if col not in existing_special_cols
|
1075
|
+
}
|
1076
|
+
if not new_special_cols:
|
1087
1077
|
return True, "Success"
|
1088
1078
|
|
1089
|
-
|
1090
|
-
|
1091
|
-
dtypes.update({col: 'bytes' for col in new_bytes_cols})
|
1092
|
-
self.parameters['dtypes'] = dtypes
|
1093
|
-
|
1094
|
-
if not self.temporary:
|
1095
|
-
edit_success, edit_msg = self.edit(interactive=False, debug=debug)
|
1096
|
-
if not edit_success:
|
1097
|
-
warn(f"Unable to update bytes dtypes for {self}:\n{edit_msg}")
|
1098
|
-
|
1099
|
-
return edit_success, edit_msg
|
1100
|
-
|
1101
|
-
return True, "Success"
|
1102
|
-
|
1103
|
-
|
1104
|
-
def _persist_new_geometry_columns(self, df, debug: bool = False) -> SuccessTuple:
|
1105
|
-
"""
|
1106
|
-
Check for new `geometry` columns and update the parameters.
|
1107
|
-
"""
|
1108
|
-
from meerschaum.utils.dataframe import get_geometry_cols
|
1109
|
-
geometry_cols_types_srids = get_geometry_cols(df, with_types_srids=True)
|
1110
|
-
existing_geometry_cols = [
|
1111
|
-
col
|
1112
|
-
for col, typ in self.dtypes.items()
|
1113
|
-
if typ.startswith('geometry') or typ.startswith('geography')
|
1114
|
-
]
|
1115
|
-
new_geometry_cols = [
|
1116
|
-
col
|
1117
|
-
for col in geometry_cols_types_srids
|
1118
|
-
if col not in existing_geometry_cols
|
1119
|
-
]
|
1120
|
-
if not new_geometry_cols:
|
1121
|
-
return True, "Success"
|
1079
|
+
if debug:
|
1080
|
+
dprint(f"New special columns:\n{new_special_cols}")
|
1122
1081
|
|
1123
1082
|
self._attributes_sync_time = None
|
1124
|
-
|
1125
|
-
|
1126
|
-
new_cols_types = {}
|
1127
|
-
for col, (geometry_type, srid) in geometry_cols_types_srids.items():
|
1128
|
-
if col not in new_geometry_cols:
|
1129
|
-
continue
|
1130
|
-
|
1131
|
-
new_dtype = "geometry"
|
1132
|
-
modifier = ""
|
1133
|
-
if not srid and geometry_type.lower() == 'geometry':
|
1134
|
-
new_cols_types[col] = new_dtype
|
1135
|
-
continue
|
1136
|
-
|
1137
|
-
modifier = "["
|
1138
|
-
if geometry_type.lower() != 'geometry':
|
1139
|
-
modifier += f"{geometry_type}"
|
1140
|
-
|
1141
|
-
if srid:
|
1142
|
-
if modifier != '[':
|
1143
|
-
modifier += ", "
|
1144
|
-
modifier += f"{srid}"
|
1145
|
-
modifier += "]"
|
1146
|
-
new_cols_types[col] = f"{new_dtype}{modifier}"
|
1147
|
-
|
1148
|
-
dtypes.update(new_cols_types)
|
1149
|
-
self.parameters['dtypes'] = dtypes
|
1150
|
-
|
1151
|
-
if not self.temporary:
|
1152
|
-
edit_success, edit_msg = self.edit(interactive=False, debug=debug)
|
1153
|
-
if not edit_success:
|
1154
|
-
warn(f"Unable to update bytes dtypes for {self}:\n{edit_msg}")
|
1155
|
-
|
1156
|
-
return edit_success, edit_msg
|
1157
|
-
|
1158
|
-
return True, "Success"
|
1083
|
+
return self.update_parameters({'dtypes': new_special_cols}, debug=debug)
|
meerschaum/core/Pipe/_verify.py
CHANGED
@@ -12,7 +12,7 @@ import time
|
|
12
12
|
import meerschaum as mrsm
|
13
13
|
from meerschaum.utils.typing import SuccessTuple, Any, Optional, Union, Tuple, Dict
|
14
14
|
from meerschaum.utils.warnings import warn, info
|
15
|
-
from meerschaum.
|
15
|
+
from meerschaum._internal.static import STATIC_CONFIG
|
16
16
|
|
17
17
|
|
18
18
|
def verify(
|
@@ -418,7 +418,7 @@ def verify(
|
|
418
418
|
retry_failed_batch = False
|
419
419
|
|
420
420
|
batch_msg_to_print = (
|
421
|
-
f"{make_header('Completed batch ' + batch_counter_str + ':')}\n{batch_msg}"
|
421
|
+
f"{make_header('Completed batch ' + batch_counter_str + ':', left_pad=0)}\n{batch_msg}"
|
422
422
|
)
|
423
423
|
mrsm.pprint((batch_success, batch_msg_to_print))
|
424
424
|
|
@@ -426,7 +426,7 @@ def verify(
|
|
426
426
|
info(f"Retrying batch {batch_counter_str}...")
|
427
427
|
retry_batch_success, retry_batch_msg = process_batch(batch)
|
428
428
|
retry_batch_msg_to_print = (
|
429
|
-
f"Retried {make_header('batch ' + batch_label)}\n{retry_batch_msg}"
|
429
|
+
f"Retried {make_header('batch ' + batch_label, left_pad=0)}\n{retry_batch_msg}"
|
430
430
|
)
|
431
431
|
mrsm.pprint((retry_batch_success, retry_batch_msg_to_print))
|
432
432
|
|
@@ -587,7 +587,7 @@ def get_bound_interval(self, debug: bool = False) -> Union[timedelta, int, None]
|
|
587
587
|
if not dt_col:
|
588
588
|
return bound_time_value
|
589
589
|
|
590
|
-
dt_typ = self.dtypes.get(dt_col, '
|
590
|
+
dt_typ = self.dtypes.get(dt_col, 'datetime')
|
591
591
|
if 'int' in dt_typ.lower():
|
592
592
|
return int(bound_time_value)
|
593
593
|
|
@@ -22,19 +22,13 @@ from meerschaum.utils.typing import (
|
|
22
22
|
Union,
|
23
23
|
)
|
24
24
|
from meerschaum.utils.warnings import error, warn
|
25
|
-
from meerschaum.config import get_config
|
26
|
-
from meerschaum.config._paths import (
|
27
|
-
PLUGINS_RESOURCES_PATH,
|
28
|
-
PLUGINS_ARCHIVES_RESOURCES_PATH,
|
29
|
-
PLUGINS_TEMP_RESOURCES_PATH,
|
30
|
-
VIRTENV_RESOURCES_PATH,
|
31
|
-
PLUGINS_DIR_PATHS,
|
32
|
-
)
|
33
25
|
_tmpversion = None
|
34
26
|
_ongoing_installations = set()
|
35
27
|
|
28
|
+
|
36
29
|
class Plugin:
|
37
30
|
"""Handle packaging of Meerschaum plugins."""
|
31
|
+
|
38
32
|
def __init__(
|
39
33
|
self,
|
40
34
|
name: str,
|
@@ -47,7 +41,8 @@ class Plugin:
|
|
47
41
|
repo_connector: Optional['mrsm.connectors.api.APIConnector'] = None,
|
48
42
|
repo: Union['mrsm.connectors.api.APIConnector', str, None] = None,
|
49
43
|
):
|
50
|
-
from meerschaum.
|
44
|
+
from meerschaum._internal.static import STATIC_CONFIG
|
45
|
+
from meerschaum.config.paths import PLUGINS_ARCHIVES_RESOURCES_PATH, VIRTENV_RESOURCES_PATH
|
51
46
|
sep = STATIC_CONFIG['plugins']['repo_separator']
|
52
47
|
_repo = None
|
53
48
|
if sep in name:
|
@@ -130,6 +125,8 @@ class Plugin:
|
|
130
125
|
if self.__dict__.get('_module', None) is not None:
|
131
126
|
return self.module.__file__
|
132
127
|
|
128
|
+
from meerschaum.config.paths import PLUGINS_RESOURCES_PATH
|
129
|
+
|
133
130
|
potential_dir = PLUGINS_RESOURCES_PATH / self.name
|
134
131
|
if (
|
135
132
|
potential_dir.exists()
|
@@ -295,6 +292,7 @@ class Plugin:
|
|
295
292
|
from meerschaum.utils.packages import attempt_import, determine_version, reload_meerschaum
|
296
293
|
from meerschaum.utils.venv import init_venv
|
297
294
|
from meerschaum.utils.misc import safely_extract_tar
|
295
|
+
from meerschaum.config.paths import PLUGINS_TEMP_RESOURCES_PATH, PLUGINS_DIR_PATHS
|
298
296
|
old_cwd = os.getcwd()
|
299
297
|
old_version = ''
|
300
298
|
new_version = ''
|
@@ -722,7 +720,7 @@ class Plugin:
|
|
722
720
|
"""
|
723
721
|
from meerschaum.utils.warnings import warn
|
724
722
|
from meerschaum.config import get_config
|
725
|
-
from meerschaum.
|
723
|
+
from meerschaum._internal.static import STATIC_CONFIG
|
726
724
|
from meerschaum.connectors.parse import is_valid_connector_keys
|
727
725
|
plugins = []
|
728
726
|
_deps = self.get_dependencies(debug=debug)
|
@@ -953,7 +951,7 @@ class Plugin:
|
|
953
951
|
"""
|
954
952
|
Include the repo keys with the plugin's name.
|
955
953
|
"""
|
956
|
-
from meerschaum.
|
954
|
+
from meerschaum._internal.static import STATIC_CONFIG
|
957
955
|
sep = STATIC_CONFIG['plugins']['repo_separator']
|
958
956
|
return self.name + sep + str(self.repo_connector)
|
959
957
|
|