meerschaum 2.5.0__py3-none-any.whl → 2.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/_internal/arguments/_parser.py +6 -1
- meerschaum/_internal/entry.py +16 -5
- meerschaum/actions/edit.py +6 -6
- meerschaum/actions/sql.py +12 -11
- meerschaum/api/dash/pages/login.py +17 -17
- meerschaum/api/dash/pipes.py +104 -13
- meerschaum/api/routes/_pipes.py +58 -40
- meerschaum/api/routes/_webterm.py +1 -0
- meerschaum/config/_edit.py +46 -19
- meerschaum/config/_read_config.py +20 -9
- meerschaum/config/_version.py +1 -1
- meerschaum/config/stack/__init__.py +1 -1
- meerschaum/config/static/__init__.py +1 -0
- meerschaum/connectors/api/_APIConnector.py +1 -0
- meerschaum/connectors/api/_pipes.py +39 -8
- meerschaum/connectors/sql/_SQLConnector.py +4 -3
- meerschaum/connectors/sql/_pipes.py +511 -118
- meerschaum/connectors/sql/_sql.py +55 -15
- meerschaum/connectors/valkey/_ValkeyConnector.py +3 -2
- meerschaum/connectors/valkey/_pipes.py +11 -5
- meerschaum/core/Pipe/__init__.py +27 -9
- meerschaum/core/Pipe/_attributes.py +181 -18
- meerschaum/core/Pipe/_clear.py +10 -8
- meerschaum/core/Pipe/_copy.py +2 -0
- meerschaum/core/Pipe/_data.py +65 -17
- meerschaum/core/Pipe/_deduplicate.py +30 -28
- meerschaum/core/Pipe/_dtypes.py +4 -4
- meerschaum/core/Pipe/_fetch.py +12 -10
- meerschaum/core/Pipe/_sync.py +28 -11
- meerschaum/core/Pipe/_verify.py +52 -49
- meerschaum/utils/dataframe.py +64 -34
- meerschaum/utils/dtypes/__init__.py +25 -6
- meerschaum/utils/dtypes/sql.py +76 -33
- meerschaum/utils/misc.py +57 -24
- meerschaum/utils/packages/_packages.py +2 -1
- meerschaum/utils/schedule.py +7 -5
- meerschaum/utils/sql.py +697 -44
- {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/METADATA +5 -3
- {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/RECORD +45 -45
- {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/WHEEL +1 -1
- {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/LICENSE +0 -0
- {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/NOTICE +0 -0
- {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/top_level.txt +0 -0
- {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/zip-safe +0 -0
@@ -18,7 +18,7 @@ from meerschaum.utils.warnings import warn
|
|
18
18
|
_bulk_flavors = {'postgresql', 'timescaledb', 'citus'}
|
19
19
|
### flavors that do not support chunks
|
20
20
|
_disallow_chunks_flavors = ['duckdb']
|
21
|
-
_max_chunks_flavors = {'sqlite': 1000
|
21
|
+
_max_chunks_flavors = {'sqlite': 1000}
|
22
22
|
SKIP_READ_TRANSACTION_FLAVORS: list[str] = ['mssql']
|
23
23
|
|
24
24
|
|
@@ -123,7 +123,8 @@ def read(
|
|
123
123
|
if chunks is not None and chunks <= 0:
|
124
124
|
return []
|
125
125
|
from meerschaum.utils.sql import sql_item_name, truncate_item_name
|
126
|
-
from meerschaum.utils.dtypes
|
126
|
+
from meerschaum.utils.dtypes import are_dtypes_equal, coerce_timezone
|
127
|
+
from meerschaum.utils.dtypes.sql import NUMERIC_PRECISION_FLAVORS, TIMEZONE_NAIVE_FLAVORS
|
127
128
|
from meerschaum.utils.packages import attempt_import, import_pandas
|
128
129
|
from meerschaum.utils.pool import get_pool
|
129
130
|
from meerschaum.utils.dataframe import chunksize_to_npartitions, get_numeric_cols
|
@@ -133,12 +134,22 @@ def read(
|
|
133
134
|
pd = import_pandas()
|
134
135
|
dd = None
|
135
136
|
is_dask = 'dask' in pd.__name__
|
136
|
-
|
137
|
+
pandas = attempt_import('pandas')
|
137
138
|
is_dask = dd is not None
|
138
139
|
npartitions = chunksize_to_npartitions(chunksize)
|
139
140
|
if is_dask:
|
140
141
|
chunksize = None
|
141
142
|
schema = schema or self.schema
|
143
|
+
utc_dt_cols = [
|
144
|
+
col
|
145
|
+
for col, typ in dtype.items()
|
146
|
+
if are_dtypes_equal(typ, 'datetime') and 'utc' in typ.lower()
|
147
|
+
] if dtype else []
|
148
|
+
|
149
|
+
if dtype and utc_dt_cols and self.flavor in TIMEZONE_NAIVE_FLAVORS:
|
150
|
+
dtype = dtype.copy()
|
151
|
+
for col in utc_dt_cols:
|
152
|
+
dtype[col] = 'datetime64[ns]'
|
142
153
|
|
143
154
|
pool = get_pool(workers=workers)
|
144
155
|
sqlalchemy = attempt_import("sqlalchemy")
|
@@ -162,7 +173,6 @@ def read(
|
|
162
173
|
)
|
163
174
|
chunksize = _max_chunks_flavors[self.flavor]
|
164
175
|
|
165
|
-
### NOTE: A bug in duckdb_engine does not allow for chunks.
|
166
176
|
if chunksize is not None and self.flavor in _disallow_chunks_flavors:
|
167
177
|
chunksize = None
|
168
178
|
|
@@ -206,6 +216,9 @@ def read(
|
|
206
216
|
chunk_list = []
|
207
217
|
chunk_hook_results = []
|
208
218
|
def _process_chunk(_chunk, _retry_on_failure: bool = True):
|
219
|
+
if self.flavor in TIMEZONE_NAIVE_FLAVORS:
|
220
|
+
for col in utc_dt_cols:
|
221
|
+
_chunk[col] = coerce_timezone(_chunk[col], strip_timezone=False)
|
209
222
|
if not as_hook_results:
|
210
223
|
chunk_list.append(_chunk)
|
211
224
|
if chunk_hook is None:
|
@@ -485,6 +498,8 @@ def exec(
|
|
485
498
|
commit: Optional[bool] = None,
|
486
499
|
close: Optional[bool] = None,
|
487
500
|
with_connection: bool = False,
|
501
|
+
_connection=None,
|
502
|
+
_transaction=None,
|
488
503
|
**kw: Any
|
489
504
|
) -> Union[
|
490
505
|
sqlalchemy.engine.result.resultProxy,
|
@@ -495,7 +510,7 @@ def exec(
|
|
495
510
|
]:
|
496
511
|
"""
|
497
512
|
Execute SQL code and return the `sqlalchemy` result, e.g. when calling stored procedures.
|
498
|
-
|
513
|
+
|
499
514
|
If inserting data, please use bind variables to avoid SQL injection!
|
500
515
|
|
501
516
|
Parameters
|
@@ -552,15 +567,24 @@ def exec(
|
|
552
567
|
if not hasattr(query, 'compile'):
|
553
568
|
query = sqlalchemy.text(query)
|
554
569
|
|
555
|
-
connection = self.get_connection()
|
570
|
+
connection = _connection if _connection is not None else self.get_connection()
|
556
571
|
|
557
572
|
try:
|
558
|
-
transaction =
|
559
|
-
|
573
|
+
transaction = (
|
574
|
+
_transaction
|
575
|
+
if _transaction is not None else (
|
576
|
+
connection.begin()
|
577
|
+
if _commit
|
578
|
+
else None
|
579
|
+
)
|
580
|
+
)
|
581
|
+
except sqlalchemy.exc.InvalidRequestError as e:
|
582
|
+
if _connection is not None or _transaction is not None:
|
583
|
+
raise e
|
560
584
|
connection = self.get_connection(rebuild=True)
|
561
585
|
transaction = connection.begin()
|
562
586
|
|
563
|
-
if transaction is not None and not transaction.is_active:
|
587
|
+
if transaction is not None and not transaction.is_active and _transaction is not None:
|
564
588
|
connection = self.get_connection(rebuild=True)
|
565
589
|
transaction = connection.begin() if _commit else None
|
566
590
|
|
@@ -695,6 +719,8 @@ def to_sql(
|
|
695
719
|
debug: bool = False,
|
696
720
|
as_tuple: bool = False,
|
697
721
|
as_dict: bool = False,
|
722
|
+
_connection=None,
|
723
|
+
_transaction=None,
|
698
724
|
**kw
|
699
725
|
) -> Union[bool, SuccessTuple]:
|
700
726
|
"""
|
@@ -765,10 +791,11 @@ def to_sql(
|
|
765
791
|
DROP_IF_EXISTS_FLAVORS,
|
766
792
|
)
|
767
793
|
from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
|
768
|
-
from meerschaum.utils.dtypes import are_dtypes_equal, quantize_decimal
|
794
|
+
from meerschaum.utils.dtypes import are_dtypes_equal, quantize_decimal, coerce_timezone
|
769
795
|
from meerschaum.utils.dtypes.sql import (
|
770
796
|
NUMERIC_PRECISION_FLAVORS,
|
771
797
|
PD_TO_SQLALCHEMY_DTYPES_FLAVORS,
|
798
|
+
get_db_type_from_pd_type,
|
772
799
|
)
|
773
800
|
from meerschaum.connectors.sql._create_engine import flavor_configs
|
774
801
|
from meerschaum.utils.packages import attempt_import, import_pandas
|
@@ -836,6 +863,8 @@ def to_sql(
|
|
836
863
|
to_sql_kw.update({
|
837
864
|
'parallel': True,
|
838
865
|
})
|
866
|
+
elif _connection is not None:
|
867
|
+
to_sql_kw['con'] = _connection
|
839
868
|
|
840
869
|
if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
|
841
870
|
if self.flavor == 'oracle':
|
@@ -848,7 +877,6 @@ def to_sql(
|
|
848
877
|
if not success:
|
849
878
|
warn(f"Unable to drop {name}")
|
850
879
|
|
851
|
-
|
852
880
|
### Enforce NVARCHAR(2000) as text instead of CLOB.
|
853
881
|
dtype = to_sql_kw.get('dtype', {})
|
854
882
|
for col, typ in df.dtypes.items():
|
@@ -857,11 +885,23 @@ def to_sql(
|
|
857
885
|
elif are_dtypes_equal(str(typ), 'int'):
|
858
886
|
dtype[col] = sqlalchemy.types.INTEGER
|
859
887
|
to_sql_kw['dtype'] = dtype
|
888
|
+
elif self.flavor == 'duckdb':
|
889
|
+
dtype = to_sql_kw.get('dtype', {})
|
890
|
+
dt_cols = [col for col, typ in df.dtypes.items() if are_dtypes_equal(str(typ), 'datetime')]
|
891
|
+
for col in dt_cols:
|
892
|
+
df[col] = coerce_timezone(df[col], strip_utc=False)
|
860
893
|
elif self.flavor == 'mssql':
|
861
894
|
dtype = to_sql_kw.get('dtype', {})
|
862
|
-
for col, typ in df.dtypes.items()
|
863
|
-
|
864
|
-
|
895
|
+
dt_cols = [col for col, typ in df.dtypes.items() if are_dtypes_equal(str(typ), 'datetime')]
|
896
|
+
new_dtype = {}
|
897
|
+
for col in dt_cols:
|
898
|
+
if col in dtype:
|
899
|
+
continue
|
900
|
+
dt_typ = get_db_type_from_pd_type(str(df.dtypes[col]), self.flavor, as_sqlalchemy=True)
|
901
|
+
if col not in dtype:
|
902
|
+
new_dtype[col] = dt_typ
|
903
|
+
|
904
|
+
dtype.update(new_dtype)
|
865
905
|
to_sql_kw['dtype'] = dtype
|
866
906
|
|
867
907
|
### Check for JSON columns.
|
@@ -897,7 +937,7 @@ def to_sql(
|
|
897
937
|
|
898
938
|
try:
|
899
939
|
with warnings.catch_warnings():
|
900
|
-
warnings.filterwarnings('ignore'
|
940
|
+
warnings.filterwarnings('ignore')
|
901
941
|
df.to_sql(**to_sql_kw)
|
902
942
|
success = True
|
903
943
|
except Exception as e:
|
@@ -408,6 +408,7 @@ class ValkeyConnector(Connector):
|
|
408
408
|
-------
|
409
409
|
A list of dictionaries, where all keys and values are strings.
|
410
410
|
"""
|
411
|
+
from meerschaum.utils.dtypes import coerce_timezone
|
411
412
|
table_name = self.quote_table(table)
|
412
413
|
datetime_column_key = self.get_datetime_column_key(table)
|
413
414
|
datetime_column = self.get(datetime_column_key)
|
@@ -424,10 +425,10 @@ class ValkeyConnector(Connector):
|
|
424
425
|
dateutil_parser = mrsm.attempt_import('dateutil.parser')
|
425
426
|
|
426
427
|
if isinstance(begin, str):
|
427
|
-
begin = dateutil_parser.parse(begin)
|
428
|
+
begin = coerce_timezone(dateutil_parser.parse(begin))
|
428
429
|
|
429
430
|
if isinstance(end, str):
|
430
|
-
end = dateutil_parser.parse(end)
|
431
|
+
end = coerce_timezone(dateutil_parser.parse(end))
|
431
432
|
|
432
433
|
begin_ts = (
|
433
434
|
(
|
@@ -501,6 +501,7 @@ def sync_pipe(
|
|
501
501
|
-------
|
502
502
|
A `SuccessTuple` indicating success.
|
503
503
|
"""
|
504
|
+
from meerschaum.utils.dtypes import are_dtypes_equal
|
504
505
|
dt_col = pipe.columns.get('datetime', None)
|
505
506
|
indices = [col for col in pipe.columns.values() if col]
|
506
507
|
table_name = self.quote_table(pipe.target)
|
@@ -508,6 +509,7 @@ def sync_pipe(
|
|
508
509
|
if is_dask:
|
509
510
|
df = df.compute()
|
510
511
|
upsert = pipe.parameters.get('upsert', False)
|
512
|
+
static = pipe.parameters.get('static', False)
|
511
513
|
|
512
514
|
def _serialize_indices_docs(_docs):
|
513
515
|
return [
|
@@ -526,7 +528,11 @@ def sync_pipe(
|
|
526
528
|
|
527
529
|
valkey_dtypes = pipe.parameters.get('valkey', {}).get('dtypes', {})
|
528
530
|
new_dtypes = {
|
529
|
-
str(key):
|
531
|
+
str(key): (
|
532
|
+
str(val)
|
533
|
+
if not are_dtypes_equal(str(val), 'datetime')
|
534
|
+
else 'datetime64[ns, UTC]'
|
535
|
+
)
|
530
536
|
for key, val in df.dtypes.items()
|
531
537
|
if str(key) not in valkey_dtypes
|
532
538
|
}
|
@@ -539,7 +545,7 @@ def sync_pipe(
|
|
539
545
|
new_dtypes[col] = 'string'
|
540
546
|
df[col] = df[col].astype('string')
|
541
547
|
|
542
|
-
if new_dtypes:
|
548
|
+
if new_dtypes and not static:
|
543
549
|
valkey_dtypes.update(new_dtypes)
|
544
550
|
if 'valkey' not in pipe.parameters:
|
545
551
|
pipe.parameters['valkey'] = {}
|
@@ -625,7 +631,7 @@ def get_pipe_columns_types(
|
|
625
631
|
|
626
632
|
from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
|
627
633
|
return {
|
628
|
-
col: get_db_type_from_pd_type(typ)
|
634
|
+
col: get_db_type_from_pd_type(typ, flavor='postgresql')
|
629
635
|
for col, typ in pipe.parameters.get('valkey', {}).get('dtypes', {}).items()
|
630
636
|
}
|
631
637
|
|
@@ -706,7 +712,7 @@ def get_sync_time(
|
|
706
712
|
"""
|
707
713
|
from meerschaum.utils.dtypes import are_dtypes_equal
|
708
714
|
dt_col = pipe.columns.get('datetime', None)
|
709
|
-
dt_typ = pipe.dtypes.get(dt_col, 'datetime64[ns]')
|
715
|
+
dt_typ = pipe.dtypes.get(dt_col, 'datetime64[ns, UTC]')
|
710
716
|
if not dt_col:
|
711
717
|
return None
|
712
718
|
|
@@ -733,7 +739,7 @@ def get_sync_time(
|
|
733
739
|
return (
|
734
740
|
int(dt_val)
|
735
741
|
if are_dtypes_equal(dt_typ, 'int')
|
736
|
-
else dateutil_parser.parse(str(dt_val))
|
742
|
+
else dateutil_parser.parse(str(dt_val))
|
737
743
|
)
|
738
744
|
except Exception as e:
|
739
745
|
warn(f"Failed to parse sync time for {pipe}:\n{e}")
|
meerschaum/core/Pipe/__init__.py
CHANGED
@@ -92,6 +92,7 @@ class Pipe:
|
|
92
92
|
_get_data_as_iterator,
|
93
93
|
get_chunk_interval,
|
94
94
|
get_chunk_bounds,
|
95
|
+
parse_date_bounds,
|
95
96
|
)
|
96
97
|
from ._register import register
|
97
98
|
from ._attributes import (
|
@@ -101,8 +102,12 @@ class Pipe:
|
|
101
102
|
indices,
|
102
103
|
indexes,
|
103
104
|
dtypes,
|
105
|
+
autoincrement,
|
106
|
+
upsert,
|
107
|
+
static,
|
104
108
|
get_columns,
|
105
109
|
get_columns_types,
|
110
|
+
get_columns_indices,
|
106
111
|
get_indices,
|
107
112
|
tags,
|
108
113
|
get_id,
|
@@ -153,6 +158,9 @@ class Pipe:
|
|
153
158
|
dtypes: Optional[Dict[str, str]] = None,
|
154
159
|
instance: Optional[Union[str, InstanceConnector]] = None,
|
155
160
|
temporary: bool = False,
|
161
|
+
upsert: Optional[bool] = None,
|
162
|
+
autoincrement: Optional[bool] = None,
|
163
|
+
static: Optional[bool] = None,
|
156
164
|
mrsm_instance: Optional[Union[str, InstanceConnector]] = None,
|
157
165
|
cache: bool = False,
|
158
166
|
debug: bool = False,
|
@@ -201,6 +209,15 @@ class Pipe:
|
|
201
209
|
instance: Optional[Union[str, InstanceConnector]], default None
|
202
210
|
Alias for `mrsm_instance`. If `mrsm_instance` is supplied, this value is ignored.
|
203
211
|
|
212
|
+
upsert: Optional[bool], default None
|
213
|
+
If `True`, set `upsert` to `True` in the parameters.
|
214
|
+
|
215
|
+
autoincrement: Optional[bool], default None
|
216
|
+
If `True`, set `autoincrement` in the parameters.
|
217
|
+
|
218
|
+
static: Optional[bool], default None
|
219
|
+
If `True`, set `static` in the parameters.
|
220
|
+
|
204
221
|
temporary: bool, default False
|
205
222
|
If `True`, prevent instance tables (pipes, users, plugins) from being created.
|
206
223
|
|
@@ -268,7 +285,7 @@ class Pipe:
|
|
268
285
|
or indexes
|
269
286
|
or self._attributes.get('parameters', {}).get('indices', None)
|
270
287
|
or self._attributes.get('parameters', {}).get('indexes', None)
|
271
|
-
)
|
288
|
+
)
|
272
289
|
if isinstance(indices, dict):
|
273
290
|
indices_key = (
|
274
291
|
'indexes'
|
@@ -292,6 +309,15 @@ class Pipe:
|
|
292
309
|
elif dtypes is not None:
|
293
310
|
warn(f"The provided dtypes are of invalid type '{type(dtypes)}'.")
|
294
311
|
|
312
|
+
if isinstance(upsert, bool):
|
313
|
+
self._attributes['parameters']['upsert'] = upsert
|
314
|
+
|
315
|
+
if isinstance(autoincrement, bool):
|
316
|
+
self._attributes['parameters']['autoincrement'] = autoincrement
|
317
|
+
|
318
|
+
if isinstance(static, bool):
|
319
|
+
self._attributes['parameters']['static'] = static
|
320
|
+
|
295
321
|
### NOTE: The parameters dictionary is {} by default.
|
296
322
|
### A Pipe may be registered without parameters, then edited,
|
297
323
|
### or a Pipe may be registered with parameters set in-memory first.
|
@@ -308,7 +334,6 @@ class Pipe:
|
|
308
334
|
|
309
335
|
self._cache = cache and get_config('system', 'experimental', 'cache')
|
310
336
|
|
311
|
-
|
312
337
|
@property
|
313
338
|
def meta(self):
|
314
339
|
"""
|
@@ -321,7 +346,6 @@ class Pipe:
|
|
321
346
|
'instance': self.instance_keys,
|
322
347
|
}
|
323
348
|
|
324
|
-
|
325
349
|
def keys(self) -> List[str]:
|
326
350
|
"""
|
327
351
|
Return the ordered keys for this pipe.
|
@@ -332,7 +356,6 @@ class Pipe:
|
|
332
356
|
if key != 'instance'
|
333
357
|
}
|
334
358
|
|
335
|
-
|
336
359
|
@property
|
337
360
|
def instance_connector(self) -> Union[InstanceConnector, None]:
|
338
361
|
"""
|
@@ -369,7 +392,6 @@ class Pipe:
|
|
369
392
|
return None
|
370
393
|
return self._connector
|
371
394
|
|
372
|
-
|
373
395
|
@property
|
374
396
|
def cache_connector(self) -> Union[meerschaum.connectors.sql.SQLConnector, None]:
|
375
397
|
"""
|
@@ -391,7 +413,6 @@ class Pipe:
|
|
391
413
|
|
392
414
|
return self._cache_connector
|
393
415
|
|
394
|
-
|
395
416
|
@property
|
396
417
|
def cache_pipe(self) -> Union['meerschaum.Pipe', None]:
|
397
418
|
"""
|
@@ -433,11 +454,9 @@ class Pipe:
|
|
433
454
|
|
434
455
|
return self._cache_pipe
|
435
456
|
|
436
|
-
|
437
457
|
def __str__(self, ansi: bool=False):
|
438
458
|
return pipe_repr(self, ansi=ansi)
|
439
459
|
|
440
|
-
|
441
460
|
def __eq__(self, other):
|
442
461
|
try:
|
443
462
|
return (
|
@@ -489,7 +508,6 @@ class Pipe:
|
|
489
508
|
"""
|
490
509
|
self.__init__(**_state)
|
491
510
|
|
492
|
-
|
493
511
|
def __getitem__(self, key: str) -> Any:
|
494
512
|
"""
|
495
513
|
Index the pipe's attributes.
|
@@ -7,6 +7,8 @@ Fetch and manipulate Pipes' attributes
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
from __future__ import annotations
|
10
|
+
|
11
|
+
import meerschaum as mrsm
|
10
12
|
from meerschaum.utils.typing import Tuple, Dict, SuccessTuple, Any, Union, Optional, List
|
11
13
|
from meerschaum.utils.warnings import warn
|
12
14
|
|
@@ -84,7 +86,7 @@ def columns(self, _columns: Union[Dict[str, str], List[str]]) -> None:
|
|
84
86
|
"""
|
85
87
|
if isinstance(_columns, (list, tuple)):
|
86
88
|
_columns = {col: col for col in _columns}
|
87
|
-
if not isinstance(
|
89
|
+
if not isinstance(_columns, dict):
|
88
90
|
warn(f"{self}.columns must be a dictionary, received {type(_columns)}.")
|
89
91
|
return
|
90
92
|
self.parameters['columns'] = _columns
|
@@ -103,10 +105,25 @@ def indices(self) -> Union[Dict[str, Union[str, List[str]]], None]:
|
|
103
105
|
if indices_key not in self.parameters:
|
104
106
|
self.parameters[indices_key] = {}
|
105
107
|
_indices = self.parameters[indices_key]
|
108
|
+
_columns = self.columns
|
109
|
+
dt_col = _columns.get('datetime', None)
|
106
110
|
if not isinstance(_indices, dict):
|
107
111
|
_indices = {}
|
108
112
|
self.parameters[indices_key] = _indices
|
109
|
-
|
113
|
+
unique_cols = list(set((
|
114
|
+
[dt_col]
|
115
|
+
if dt_col
|
116
|
+
else []
|
117
|
+
) + [
|
118
|
+
col
|
119
|
+
for col_ix, col in _columns.items()
|
120
|
+
if col_ix != 'datetime'
|
121
|
+
]))
|
122
|
+
return {
|
123
|
+
**({'unique': unique_cols} if len(unique_cols) > 1 else {}),
|
124
|
+
**_columns,
|
125
|
+
**_indices
|
126
|
+
}
|
110
127
|
|
111
128
|
|
112
129
|
@property
|
@@ -188,6 +205,61 @@ def dtypes(self, _dtypes: Dict[str, Any]) -> None:
|
|
188
205
|
self.parameters['dtypes'] = _dtypes
|
189
206
|
|
190
207
|
|
208
|
+
@property
|
209
|
+
def upsert(self) -> bool:
|
210
|
+
"""
|
211
|
+
Return whether `upsert` is set for the pipe.
|
212
|
+
"""
|
213
|
+
if 'upsert' not in self.parameters:
|
214
|
+
self.parameters['upsert'] = False
|
215
|
+
return self.parameters['upsert']
|
216
|
+
|
217
|
+
|
218
|
+
@upsert.setter
|
219
|
+
def upsert(self, _upsert: bool) -> None:
|
220
|
+
"""
|
221
|
+
Set the `upsert` parameter for the pipe.
|
222
|
+
"""
|
223
|
+
self.parameters['upsert'] = _upsert
|
224
|
+
|
225
|
+
|
226
|
+
@property
|
227
|
+
def static(self) -> bool:
|
228
|
+
"""
|
229
|
+
Return whether `static` is set for the pipe.
|
230
|
+
"""
|
231
|
+
if 'static' not in self.parameters:
|
232
|
+
self.parameters['static'] = False
|
233
|
+
return self.parameters['static']
|
234
|
+
|
235
|
+
|
236
|
+
@static.setter
|
237
|
+
def static(self, _static: bool) -> None:
|
238
|
+
"""
|
239
|
+
Set the `static` parameter for the pipe.
|
240
|
+
"""
|
241
|
+
self.parameters['static'] = _static
|
242
|
+
|
243
|
+
|
244
|
+
@property
|
245
|
+
def autoincrement(self) -> bool:
|
246
|
+
"""
|
247
|
+
Return the `autoincrement` parameter for the pipe.
|
248
|
+
"""
|
249
|
+
if 'autoincrement' not in self.parameters:
|
250
|
+
self.parameters['autoincrement'] = False
|
251
|
+
|
252
|
+
return self.parameters['autoincrement']
|
253
|
+
|
254
|
+
|
255
|
+
@autoincrement.setter
|
256
|
+
def autoincrement(self, _autoincrement: bool) -> None:
|
257
|
+
"""
|
258
|
+
Set the `autoincrement` parameter for the pipe.
|
259
|
+
"""
|
260
|
+
self.parameters['autoincrement'] = _autoincrement
|
261
|
+
|
262
|
+
|
191
263
|
def get_columns(self, *args: str, error: bool = False) -> Union[str, Tuple[str]]:
|
192
264
|
"""
|
193
265
|
Check if the requested columns are defined.
|
@@ -196,7 +268,7 @@ def get_columns(self, *args: str, error: bool = False) -> Union[str, Tuple[str]]
|
|
196
268
|
----------
|
197
269
|
*args: str
|
198
270
|
The column names to be retrieved.
|
199
|
-
|
271
|
+
|
200
272
|
error: bool, default False
|
201
273
|
If `True`, raise an `Exception` if the specified column is not defined.
|
202
274
|
|
@@ -233,12 +305,19 @@ def get_columns(self, *args: str, error: bool = False) -> Union[str, Tuple[str]]
|
|
233
305
|
return tuple(col_names)
|
234
306
|
|
235
307
|
|
236
|
-
def get_columns_types(
|
308
|
+
def get_columns_types(
|
309
|
+
self,
|
310
|
+
refresh: bool = False,
|
311
|
+
debug: bool = False,
|
312
|
+
) -> Union[Dict[str, str], None]:
|
237
313
|
"""
|
238
314
|
Get a dictionary of a pipe's column names and their types.
|
239
315
|
|
240
316
|
Parameters
|
241
317
|
----------
|
318
|
+
refresh: bool, default False
|
319
|
+
If `True`, invalidate the cache and fetch directly from the instance connector.
|
320
|
+
|
242
321
|
debug: bool, default False:
|
243
322
|
Verbosity toggle.
|
244
323
|
|
@@ -250,17 +329,91 @@ def get_columns_types(self, debug: bool = False) -> Union[Dict[str, str], None]:
|
|
250
329
|
--------
|
251
330
|
>>> pipe.get_columns_types()
|
252
331
|
{
|
253
|
-
'dt': 'TIMESTAMP
|
332
|
+
'dt': 'TIMESTAMP WITH TIMEZONE',
|
254
333
|
'id': 'BIGINT',
|
255
334
|
'val': 'DOUBLE PRECISION',
|
256
335
|
}
|
257
336
|
>>>
|
258
337
|
"""
|
259
|
-
|
338
|
+
import time
|
260
339
|
from meerschaum.connectors import get_connector_plugin
|
340
|
+
from meerschaum.config.static import STATIC_CONFIG
|
341
|
+
from meerschaum.utils.warnings import dprint
|
261
342
|
|
262
|
-
|
263
|
-
|
343
|
+
now = time.perf_counter()
|
344
|
+
cache_seconds = STATIC_CONFIG['pipes']['static_schema_cache_seconds']
|
345
|
+
static = self.parameters.get('static', False)
|
346
|
+
if not static:
|
347
|
+
refresh = True
|
348
|
+
if refresh:
|
349
|
+
_ = self.__dict__.pop('_columns_types_timestamp', None)
|
350
|
+
_ = self.__dict__.pop('_columns_types', None)
|
351
|
+
_columns_types = self.__dict__.get('_columns_types', None)
|
352
|
+
if _columns_types:
|
353
|
+
columns_types_timestamp = self.__dict__.get('_columns_types_timestamp', None)
|
354
|
+
if columns_types_timestamp is not None:
|
355
|
+
delta = now - columns_types_timestamp
|
356
|
+
if delta < cache_seconds:
|
357
|
+
if debug:
|
358
|
+
dprint(
|
359
|
+
f"Returning cached `columns_types` for {self} "
|
360
|
+
f"({round(delta, 2)} seconds old)."
|
361
|
+
)
|
362
|
+
return _columns_types
|
363
|
+
|
364
|
+
with mrsm.Venv(get_connector_plugin(self.instance_connector)):
|
365
|
+
_columns_types = (
|
366
|
+
self.instance_connector.get_pipe_columns_types(self, debug=debug)
|
367
|
+
if hasattr(self.instance_connector, 'get_pipe_columns_types')
|
368
|
+
else None
|
369
|
+
)
|
370
|
+
|
371
|
+
self.__dict__['_columns_types'] = _columns_types
|
372
|
+
self.__dict__['_columns_types_timestamp'] = now
|
373
|
+
return _columns_types or {}
|
374
|
+
|
375
|
+
|
376
|
+
def get_columns_indices(
|
377
|
+
self,
|
378
|
+
debug: bool = False,
|
379
|
+
refresh: bool = False,
|
380
|
+
) -> Dict[str, List[Dict[str, str]]]:
|
381
|
+
"""
|
382
|
+
Return a dictionary mapping columns to index information.
|
383
|
+
"""
|
384
|
+
import time
|
385
|
+
from meerschaum.connectors import get_connector_plugin
|
386
|
+
from meerschaum.config.static import STATIC_CONFIG
|
387
|
+
from meerschaum.utils.warnings import dprint
|
388
|
+
|
389
|
+
now = time.perf_counter()
|
390
|
+
exists_timeout_seconds = STATIC_CONFIG['pipes']['exists_timeout_seconds']
|
391
|
+
if refresh:
|
392
|
+
_ = self.__dict__.pop('_columns_indices_timestamp', None)
|
393
|
+
_ = self.__dict__.pop('_columns_indices', None)
|
394
|
+
_columns_indices = self.__dict__.get('_columns_indices', None)
|
395
|
+
if _columns_indices:
|
396
|
+
columns_indices_timestamp = self.__dict__.get('_columns_indices_timestamp', None)
|
397
|
+
if columns_indices_timestamp is not None:
|
398
|
+
delta = now - columns_indices_timestamp
|
399
|
+
if delta < exists_timeout_seconds:
|
400
|
+
if debug:
|
401
|
+
dprint(
|
402
|
+
f"Returning cached `columns_indices` for {self} "
|
403
|
+
f"({round(delta, 2)} seconds old)."
|
404
|
+
)
|
405
|
+
return _columns_indices
|
406
|
+
|
407
|
+
with mrsm.Venv(get_connector_plugin(self.instance_connector)):
|
408
|
+
_columns_indices = (
|
409
|
+
self.instance_connector.get_pipe_columns_indices(self, debug=debug)
|
410
|
+
if hasattr(self.instance_connector, 'get_pipe_columns_indices')
|
411
|
+
else None
|
412
|
+
)
|
413
|
+
|
414
|
+
self.__dict__['_columns_indices'] = _columns_indices
|
415
|
+
self.__dict__['_columns_indices_timestamp'] = now
|
416
|
+
return _columns_indices or {}
|
264
417
|
|
265
418
|
|
266
419
|
def get_id(self, **kw: Any) -> Union[int, None]:
|
@@ -274,7 +427,10 @@ def get_id(self, **kw: Any) -> Union[int, None]:
|
|
274
427
|
from meerschaum.connectors import get_connector_plugin
|
275
428
|
|
276
429
|
with Venv(get_connector_plugin(self.instance_connector)):
|
277
|
-
|
430
|
+
if hasattr(self.instance_connector, 'get_pipe_id'):
|
431
|
+
return self.instance_connector.get_pipe_id(self, **kw)
|
432
|
+
|
433
|
+
return None
|
278
434
|
|
279
435
|
|
280
436
|
@property
|
@@ -509,15 +665,22 @@ def get_indices(self) -> Dict[str, str]:
|
|
509
665
|
if cols
|
510
666
|
}
|
511
667
|
_index_names = {
|
512
|
-
ix: (
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
location_key=self.location_key,
|
519
|
-
)
|
668
|
+
ix: _index_template.format(
|
669
|
+
target=_target,
|
670
|
+
column_names=column_names,
|
671
|
+
connector_keys=self.connector_keys,
|
672
|
+
metric_key=self.connector_key,
|
673
|
+
location_key=self.location_key,
|
520
674
|
)
|
521
675
|
for ix, column_names in _column_names.items()
|
522
676
|
}
|
523
|
-
|
677
|
+
### NOTE: Skip any duplicate indices.
|
678
|
+
seen_index_names = {}
|
679
|
+
for ix, index_name in _index_names.items():
|
680
|
+
if index_name in seen_index_names:
|
681
|
+
continue
|
682
|
+
seen_index_names[index_name] = ix
|
683
|
+
return {
|
684
|
+
ix: index_name
|
685
|
+
for index_name, ix in seen_index_names.items()
|
686
|
+
}
|
meerschaum/core/Pipe/_clear.py
CHANGED
@@ -58,12 +58,14 @@ def clear(
|
|
58
58
|
from meerschaum.utils.venv import Venv
|
59
59
|
from meerschaum.connectors import get_connector_plugin
|
60
60
|
|
61
|
+
begin, end = self.parse_date_bounds(begin, end)
|
62
|
+
|
61
63
|
if self.cache_pipe is not None:
|
62
64
|
success, msg = self.cache_pipe.clear(
|
63
|
-
begin
|
64
|
-
end
|
65
|
-
params
|
66
|
-
debug
|
65
|
+
begin=begin,
|
66
|
+
end=end,
|
67
|
+
params=params,
|
68
|
+
debug=debug,
|
67
69
|
**kwargs
|
68
70
|
)
|
69
71
|
if not success:
|
@@ -72,9 +74,9 @@ def clear(
|
|
72
74
|
with Venv(get_connector_plugin(self.instance_connector)):
|
73
75
|
return self.instance_connector.clear_pipe(
|
74
76
|
self,
|
75
|
-
begin
|
76
|
-
end
|
77
|
-
params
|
78
|
-
debug
|
77
|
+
begin=begin,
|
78
|
+
end=end,
|
79
|
+
params=params,
|
80
|
+
debug=debug,
|
79
81
|
**kwargs
|
80
82
|
)
|