meerschaum 2.5.1__py3-none-any.whl → 2.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. meerschaum/_internal/arguments/_parser.py +6 -1
  2. meerschaum/actions/edit.py +6 -6
  3. meerschaum/actions/sql.py +12 -11
  4. meerschaum/api/dash/pages/login.py +17 -17
  5. meerschaum/api/dash/pipes.py +13 -4
  6. meerschaum/api/routes/_pipes.py +58 -40
  7. meerschaum/config/_edit.py +46 -19
  8. meerschaum/config/_read_config.py +20 -9
  9. meerschaum/config/_version.py +1 -1
  10. meerschaum/config/stack/__init__.py +1 -1
  11. meerschaum/config/static/__init__.py +1 -0
  12. meerschaum/connectors/api/_APIConnector.py +1 -0
  13. meerschaum/connectors/api/_pipes.py +39 -8
  14. meerschaum/connectors/sql/_SQLConnector.py +4 -3
  15. meerschaum/connectors/sql/_pipes.py +511 -118
  16. meerschaum/connectors/sql/_sql.py +55 -15
  17. meerschaum/connectors/valkey/_ValkeyConnector.py +3 -2
  18. meerschaum/connectors/valkey/_pipes.py +11 -5
  19. meerschaum/core/Pipe/__init__.py +27 -9
  20. meerschaum/core/Pipe/_attributes.py +181 -18
  21. meerschaum/core/Pipe/_clear.py +10 -8
  22. meerschaum/core/Pipe/_copy.py +2 -0
  23. meerschaum/core/Pipe/_data.py +65 -17
  24. meerschaum/core/Pipe/_deduplicate.py +30 -28
  25. meerschaum/core/Pipe/_dtypes.py +4 -4
  26. meerschaum/core/Pipe/_fetch.py +12 -10
  27. meerschaum/core/Pipe/_sync.py +28 -11
  28. meerschaum/core/Pipe/_verify.py +52 -49
  29. meerschaum/utils/dataframe.py +62 -32
  30. meerschaum/utils/dtypes/__init__.py +25 -6
  31. meerschaum/utils/dtypes/sql.py +76 -33
  32. meerschaum/utils/misc.py +57 -24
  33. meerschaum/utils/sql.py +697 -44
  34. {meerschaum-2.5.1.dist-info → meerschaum-2.6.0.dist-info}/METADATA +1 -1
  35. {meerschaum-2.5.1.dist-info → meerschaum-2.6.0.dist-info}/RECORD +41 -41
  36. {meerschaum-2.5.1.dist-info → meerschaum-2.6.0.dist-info}/WHEEL +1 -1
  37. {meerschaum-2.5.1.dist-info → meerschaum-2.6.0.dist-info}/LICENSE +0 -0
  38. {meerschaum-2.5.1.dist-info → meerschaum-2.6.0.dist-info}/NOTICE +0 -0
  39. {meerschaum-2.5.1.dist-info → meerschaum-2.6.0.dist-info}/entry_points.txt +0 -0
  40. {meerschaum-2.5.1.dist-info → meerschaum-2.6.0.dist-info}/top_level.txt +0 -0
  41. {meerschaum-2.5.1.dist-info → meerschaum-2.6.0.dist-info}/zip-safe +0 -0
@@ -18,7 +18,7 @@ from meerschaum.utils.warnings import warn
18
18
  _bulk_flavors = {'postgresql', 'timescaledb', 'citus'}
19
19
  ### flavors that do not support chunks
20
20
  _disallow_chunks_flavors = ['duckdb']
21
- _max_chunks_flavors = {'sqlite': 1000,}
21
+ _max_chunks_flavors = {'sqlite': 1000}
22
22
  SKIP_READ_TRANSACTION_FLAVORS: list[str] = ['mssql']
23
23
 
24
24
 
@@ -123,7 +123,8 @@ def read(
123
123
  if chunks is not None and chunks <= 0:
124
124
  return []
125
125
  from meerschaum.utils.sql import sql_item_name, truncate_item_name
126
- from meerschaum.utils.dtypes.sql import NUMERIC_PRECISION_FLAVORS
126
+ from meerschaum.utils.dtypes import are_dtypes_equal, coerce_timezone
127
+ from meerschaum.utils.dtypes.sql import NUMERIC_PRECISION_FLAVORS, TIMEZONE_NAIVE_FLAVORS
127
128
  from meerschaum.utils.packages import attempt_import, import_pandas
128
129
  from meerschaum.utils.pool import get_pool
129
130
  from meerschaum.utils.dataframe import chunksize_to_npartitions, get_numeric_cols
@@ -133,12 +134,22 @@ def read(
133
134
  pd = import_pandas()
134
135
  dd = None
135
136
  is_dask = 'dask' in pd.__name__
136
- pd = attempt_import('pandas')
137
+ pandas = attempt_import('pandas')
137
138
  is_dask = dd is not None
138
139
  npartitions = chunksize_to_npartitions(chunksize)
139
140
  if is_dask:
140
141
  chunksize = None
141
142
  schema = schema or self.schema
143
+ utc_dt_cols = [
144
+ col
145
+ for col, typ in dtype.items()
146
+ if are_dtypes_equal(typ, 'datetime') and 'utc' in typ.lower()
147
+ ] if dtype else []
148
+
149
+ if dtype and utc_dt_cols and self.flavor in TIMEZONE_NAIVE_FLAVORS:
150
+ dtype = dtype.copy()
151
+ for col in utc_dt_cols:
152
+ dtype[col] = 'datetime64[ns]'
142
153
 
143
154
  pool = get_pool(workers=workers)
144
155
  sqlalchemy = attempt_import("sqlalchemy")
@@ -162,7 +173,6 @@ def read(
162
173
  )
163
174
  chunksize = _max_chunks_flavors[self.flavor]
164
175
 
165
- ### NOTE: A bug in duckdb_engine does not allow for chunks.
166
176
  if chunksize is not None and self.flavor in _disallow_chunks_flavors:
167
177
  chunksize = None
168
178
 
@@ -206,6 +216,9 @@ def read(
206
216
  chunk_list = []
207
217
  chunk_hook_results = []
208
218
  def _process_chunk(_chunk, _retry_on_failure: bool = True):
219
+ if self.flavor in TIMEZONE_NAIVE_FLAVORS:
220
+ for col in utc_dt_cols:
221
+ _chunk[col] = coerce_timezone(_chunk[col], strip_timezone=False)
209
222
  if not as_hook_results:
210
223
  chunk_list.append(_chunk)
211
224
  if chunk_hook is None:
@@ -485,6 +498,8 @@ def exec(
485
498
  commit: Optional[bool] = None,
486
499
  close: Optional[bool] = None,
487
500
  with_connection: bool = False,
501
+ _connection=None,
502
+ _transaction=None,
488
503
  **kw: Any
489
504
  ) -> Union[
490
505
  sqlalchemy.engine.result.resultProxy,
@@ -495,7 +510,7 @@ def exec(
495
510
  ]:
496
511
  """
497
512
  Execute SQL code and return the `sqlalchemy` result, e.g. when calling stored procedures.
498
-
513
+
499
514
  If inserting data, please use bind variables to avoid SQL injection!
500
515
 
501
516
  Parameters
@@ -552,15 +567,24 @@ def exec(
552
567
  if not hasattr(query, 'compile'):
553
568
  query = sqlalchemy.text(query)
554
569
 
555
- connection = self.get_connection()
570
+ connection = _connection if _connection is not None else self.get_connection()
556
571
 
557
572
  try:
558
- transaction = connection.begin() if _commit else None
559
- except sqlalchemy.exc.InvalidRequestError:
573
+ transaction = (
574
+ _transaction
575
+ if _transaction is not None else (
576
+ connection.begin()
577
+ if _commit
578
+ else None
579
+ )
580
+ )
581
+ except sqlalchemy.exc.InvalidRequestError as e:
582
+ if _connection is not None or _transaction is not None:
583
+ raise e
560
584
  connection = self.get_connection(rebuild=True)
561
585
  transaction = connection.begin()
562
586
 
563
- if transaction is not None and not transaction.is_active:
587
+ if transaction is not None and not transaction.is_active and _transaction is not None:
564
588
  connection = self.get_connection(rebuild=True)
565
589
  transaction = connection.begin() if _commit else None
566
590
 
@@ -695,6 +719,8 @@ def to_sql(
695
719
  debug: bool = False,
696
720
  as_tuple: bool = False,
697
721
  as_dict: bool = False,
722
+ _connection=None,
723
+ _transaction=None,
698
724
  **kw
699
725
  ) -> Union[bool, SuccessTuple]:
700
726
  """
@@ -765,10 +791,11 @@ def to_sql(
765
791
  DROP_IF_EXISTS_FLAVORS,
766
792
  )
767
793
  from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
768
- from meerschaum.utils.dtypes import are_dtypes_equal, quantize_decimal
794
+ from meerschaum.utils.dtypes import are_dtypes_equal, quantize_decimal, coerce_timezone
769
795
  from meerschaum.utils.dtypes.sql import (
770
796
  NUMERIC_PRECISION_FLAVORS,
771
797
  PD_TO_SQLALCHEMY_DTYPES_FLAVORS,
798
+ get_db_type_from_pd_type,
772
799
  )
773
800
  from meerschaum.connectors.sql._create_engine import flavor_configs
774
801
  from meerschaum.utils.packages import attempt_import, import_pandas
@@ -836,6 +863,8 @@ def to_sql(
836
863
  to_sql_kw.update({
837
864
  'parallel': True,
838
865
  })
866
+ elif _connection is not None:
867
+ to_sql_kw['con'] = _connection
839
868
 
840
869
  if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
841
870
  if self.flavor == 'oracle':
@@ -848,7 +877,6 @@ def to_sql(
848
877
  if not success:
849
878
  warn(f"Unable to drop {name}")
850
879
 
851
-
852
880
  ### Enforce NVARCHAR(2000) as text instead of CLOB.
853
881
  dtype = to_sql_kw.get('dtype', {})
854
882
  for col, typ in df.dtypes.items():
@@ -857,11 +885,23 @@ def to_sql(
857
885
  elif are_dtypes_equal(str(typ), 'int'):
858
886
  dtype[col] = sqlalchemy.types.INTEGER
859
887
  to_sql_kw['dtype'] = dtype
888
+ elif self.flavor == 'duckdb':
889
+ dtype = to_sql_kw.get('dtype', {})
890
+ dt_cols = [col for col, typ in df.dtypes.items() if are_dtypes_equal(str(typ), 'datetime')]
891
+ for col in dt_cols:
892
+ df[col] = coerce_timezone(df[col], strip_utc=False)
860
893
  elif self.flavor == 'mssql':
861
894
  dtype = to_sql_kw.get('dtype', {})
862
- for col, typ in df.dtypes.items():
863
- if are_dtypes_equal(str(typ), 'bool'):
864
- dtype[col] = sqlalchemy.types.INTEGER
895
+ dt_cols = [col for col, typ in df.dtypes.items() if are_dtypes_equal(str(typ), 'datetime')]
896
+ new_dtype = {}
897
+ for col in dt_cols:
898
+ if col in dtype:
899
+ continue
900
+ dt_typ = get_db_type_from_pd_type(str(df.dtypes[col]), self.flavor, as_sqlalchemy=True)
901
+ if col not in dtype:
902
+ new_dtype[col] = dt_typ
903
+
904
+ dtype.update(new_dtype)
865
905
  to_sql_kw['dtype'] = dtype
866
906
 
867
907
  ### Check for JSON columns.
@@ -897,7 +937,7 @@ def to_sql(
897
937
 
898
938
  try:
899
939
  with warnings.catch_warnings():
900
- warnings.filterwarnings('ignore', 'case sensitivity issues')
940
+ warnings.filterwarnings('ignore')
901
941
  df.to_sql(**to_sql_kw)
902
942
  success = True
903
943
  except Exception as e:
@@ -408,6 +408,7 @@ class ValkeyConnector(Connector):
408
408
  -------
409
409
  A list of dictionaries, where all keys and values are strings.
410
410
  """
411
+ from meerschaum.utils.dtypes import coerce_timezone
411
412
  table_name = self.quote_table(table)
412
413
  datetime_column_key = self.get_datetime_column_key(table)
413
414
  datetime_column = self.get(datetime_column_key)
@@ -424,10 +425,10 @@ class ValkeyConnector(Connector):
424
425
  dateutil_parser = mrsm.attempt_import('dateutil.parser')
425
426
 
426
427
  if isinstance(begin, str):
427
- begin = dateutil_parser.parse(begin)
428
+ begin = coerce_timezone(dateutil_parser.parse(begin))
428
429
 
429
430
  if isinstance(end, str):
430
- end = dateutil_parser.parse(end)
431
+ end = coerce_timezone(dateutil_parser.parse(end))
431
432
 
432
433
  begin_ts = (
433
434
  (
@@ -501,6 +501,7 @@ def sync_pipe(
501
501
  -------
502
502
  A `SuccessTuple` indicating success.
503
503
  """
504
+ from meerschaum.utils.dtypes import are_dtypes_equal
504
505
  dt_col = pipe.columns.get('datetime', None)
505
506
  indices = [col for col in pipe.columns.values() if col]
506
507
  table_name = self.quote_table(pipe.target)
@@ -508,6 +509,7 @@ def sync_pipe(
508
509
  if is_dask:
509
510
  df = df.compute()
510
511
  upsert = pipe.parameters.get('upsert', False)
512
+ static = pipe.parameters.get('static', False)
511
513
 
512
514
  def _serialize_indices_docs(_docs):
513
515
  return [
@@ -526,7 +528,11 @@ def sync_pipe(
526
528
 
527
529
  valkey_dtypes = pipe.parameters.get('valkey', {}).get('dtypes', {})
528
530
  new_dtypes = {
529
- str(key): str(val)
531
+ str(key): (
532
+ str(val)
533
+ if not are_dtypes_equal(str(val), 'datetime')
534
+ else 'datetime64[ns, UTC]'
535
+ )
530
536
  for key, val in df.dtypes.items()
531
537
  if str(key) not in valkey_dtypes
532
538
  }
@@ -539,7 +545,7 @@ def sync_pipe(
539
545
  new_dtypes[col] = 'string'
540
546
  df[col] = df[col].astype('string')
541
547
 
542
- if new_dtypes:
548
+ if new_dtypes and not static:
543
549
  valkey_dtypes.update(new_dtypes)
544
550
  if 'valkey' not in pipe.parameters:
545
551
  pipe.parameters['valkey'] = {}
@@ -625,7 +631,7 @@ def get_pipe_columns_types(
625
631
 
626
632
  from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
627
633
  return {
628
- col: get_db_type_from_pd_type(typ)
634
+ col: get_db_type_from_pd_type(typ, flavor='postgresql')
629
635
  for col, typ in pipe.parameters.get('valkey', {}).get('dtypes', {}).items()
630
636
  }
631
637
 
@@ -706,7 +712,7 @@ def get_sync_time(
706
712
  """
707
713
  from meerschaum.utils.dtypes import are_dtypes_equal
708
714
  dt_col = pipe.columns.get('datetime', None)
709
- dt_typ = pipe.dtypes.get(dt_col, 'datetime64[ns]')
715
+ dt_typ = pipe.dtypes.get(dt_col, 'datetime64[ns, UTC]')
710
716
  if not dt_col:
711
717
  return None
712
718
 
@@ -733,7 +739,7 @@ def get_sync_time(
733
739
  return (
734
740
  int(dt_val)
735
741
  if are_dtypes_equal(dt_typ, 'int')
736
- else dateutil_parser.parse(str(dt_val)).replace(tzinfo=None)
742
+ else dateutil_parser.parse(str(dt_val))
737
743
  )
738
744
  except Exception as e:
739
745
  warn(f"Failed to parse sync time for {pipe}:\n{e}")
@@ -92,6 +92,7 @@ class Pipe:
92
92
  _get_data_as_iterator,
93
93
  get_chunk_interval,
94
94
  get_chunk_bounds,
95
+ parse_date_bounds,
95
96
  )
96
97
  from ._register import register
97
98
  from ._attributes import (
@@ -101,8 +102,12 @@ class Pipe:
101
102
  indices,
102
103
  indexes,
103
104
  dtypes,
105
+ autoincrement,
106
+ upsert,
107
+ static,
104
108
  get_columns,
105
109
  get_columns_types,
110
+ get_columns_indices,
106
111
  get_indices,
107
112
  tags,
108
113
  get_id,
@@ -153,6 +158,9 @@ class Pipe:
153
158
  dtypes: Optional[Dict[str, str]] = None,
154
159
  instance: Optional[Union[str, InstanceConnector]] = None,
155
160
  temporary: bool = False,
161
+ upsert: Optional[bool] = None,
162
+ autoincrement: Optional[bool] = None,
163
+ static: Optional[bool] = None,
156
164
  mrsm_instance: Optional[Union[str, InstanceConnector]] = None,
157
165
  cache: bool = False,
158
166
  debug: bool = False,
@@ -201,6 +209,15 @@ class Pipe:
201
209
  instance: Optional[Union[str, InstanceConnector]], default None
202
210
  Alias for `mrsm_instance`. If `mrsm_instance` is supplied, this value is ignored.
203
211
 
212
+ upsert: Optional[bool], default None
213
+ If `True`, set `upsert` to `True` in the parameters.
214
+
215
+ autoincrement: Optional[bool], default None
216
+ If `True`, set `autoincrement` in the parameters.
217
+
218
+ static: Optional[bool], default None
219
+ If `True`, set `static` in the parameters.
220
+
204
221
  temporary: bool, default False
205
222
  If `True`, prevent instance tables (pipes, users, plugins) from being created.
206
223
 
@@ -268,7 +285,7 @@ class Pipe:
268
285
  or indexes
269
286
  or self._attributes.get('parameters', {}).get('indices', None)
270
287
  or self._attributes.get('parameters', {}).get('indexes', None)
271
- ) or columns
288
+ )
272
289
  if isinstance(indices, dict):
273
290
  indices_key = (
274
291
  'indexes'
@@ -292,6 +309,15 @@ class Pipe:
292
309
  elif dtypes is not None:
293
310
  warn(f"The provided dtypes are of invalid type '{type(dtypes)}'.")
294
311
 
312
+ if isinstance(upsert, bool):
313
+ self._attributes['parameters']['upsert'] = upsert
314
+
315
+ if isinstance(autoincrement, bool):
316
+ self._attributes['parameters']['autoincrement'] = autoincrement
317
+
318
+ if isinstance(static, bool):
319
+ self._attributes['parameters']['static'] = static
320
+
295
321
  ### NOTE: The parameters dictionary is {} by default.
296
322
  ### A Pipe may be registered without parameters, then edited,
297
323
  ### or a Pipe may be registered with parameters set in-memory first.
@@ -308,7 +334,6 @@ class Pipe:
308
334
 
309
335
  self._cache = cache and get_config('system', 'experimental', 'cache')
310
336
 
311
-
312
337
  @property
313
338
  def meta(self):
314
339
  """
@@ -321,7 +346,6 @@ class Pipe:
321
346
  'instance': self.instance_keys,
322
347
  }
323
348
 
324
-
325
349
  def keys(self) -> List[str]:
326
350
  """
327
351
  Return the ordered keys for this pipe.
@@ -332,7 +356,6 @@ class Pipe:
332
356
  if key != 'instance'
333
357
  }
334
358
 
335
-
336
359
  @property
337
360
  def instance_connector(self) -> Union[InstanceConnector, None]:
338
361
  """
@@ -369,7 +392,6 @@ class Pipe:
369
392
  return None
370
393
  return self._connector
371
394
 
372
-
373
395
  @property
374
396
  def cache_connector(self) -> Union[meerschaum.connectors.sql.SQLConnector, None]:
375
397
  """
@@ -391,7 +413,6 @@ class Pipe:
391
413
 
392
414
  return self._cache_connector
393
415
 
394
-
395
416
  @property
396
417
  def cache_pipe(self) -> Union['meerschaum.Pipe', None]:
397
418
  """
@@ -433,11 +454,9 @@ class Pipe:
433
454
 
434
455
  return self._cache_pipe
435
456
 
436
-
437
457
  def __str__(self, ansi: bool=False):
438
458
  return pipe_repr(self, ansi=ansi)
439
459
 
440
-
441
460
  def __eq__(self, other):
442
461
  try:
443
462
  return (
@@ -489,7 +508,6 @@ class Pipe:
489
508
  """
490
509
  self.__init__(**_state)
491
510
 
492
-
493
511
  def __getitem__(self, key: str) -> Any:
494
512
  """
495
513
  Index the pipe's attributes.
@@ -7,6 +7,8 @@ Fetch and manipulate Pipes' attributes
7
7
  """
8
8
 
9
9
  from __future__ import annotations
10
+
11
+ import meerschaum as mrsm
10
12
  from meerschaum.utils.typing import Tuple, Dict, SuccessTuple, Any, Union, Optional, List
11
13
  from meerschaum.utils.warnings import warn
12
14
 
@@ -84,7 +86,7 @@ def columns(self, _columns: Union[Dict[str, str], List[str]]) -> None:
84
86
  """
85
87
  if isinstance(_columns, (list, tuple)):
86
88
  _columns = {col: col for col in _columns}
87
- if not isinstance(columns, dict):
89
+ if not isinstance(_columns, dict):
88
90
  warn(f"{self}.columns must be a dictionary, received {type(_columns)}.")
89
91
  return
90
92
  self.parameters['columns'] = _columns
@@ -103,10 +105,25 @@ def indices(self) -> Union[Dict[str, Union[str, List[str]]], None]:
103
105
  if indices_key not in self.parameters:
104
106
  self.parameters[indices_key] = {}
105
107
  _indices = self.parameters[indices_key]
108
+ _columns = self.columns
109
+ dt_col = _columns.get('datetime', None)
106
110
  if not isinstance(_indices, dict):
107
111
  _indices = {}
108
112
  self.parameters[indices_key] = _indices
109
- return {**self.columns, **_indices}
113
+ unique_cols = list(set((
114
+ [dt_col]
115
+ if dt_col
116
+ else []
117
+ ) + [
118
+ col
119
+ for col_ix, col in _columns.items()
120
+ if col_ix != 'datetime'
121
+ ]))
122
+ return {
123
+ **({'unique': unique_cols} if len(unique_cols) > 1 else {}),
124
+ **_columns,
125
+ **_indices
126
+ }
110
127
 
111
128
 
112
129
  @property
@@ -188,6 +205,61 @@ def dtypes(self, _dtypes: Dict[str, Any]) -> None:
188
205
  self.parameters['dtypes'] = _dtypes
189
206
 
190
207
 
208
+ @property
209
+ def upsert(self) -> bool:
210
+ """
211
+ Return whether `upsert` is set for the pipe.
212
+ """
213
+ if 'upsert' not in self.parameters:
214
+ self.parameters['upsert'] = False
215
+ return self.parameters['upsert']
216
+
217
+
218
+ @upsert.setter
219
+ def upsert(self, _upsert: bool) -> None:
220
+ """
221
+ Set the `upsert` parameter for the pipe.
222
+ """
223
+ self.parameters['upsert'] = _upsert
224
+
225
+
226
+ @property
227
+ def static(self) -> bool:
228
+ """
229
+ Return whether `static` is set for the pipe.
230
+ """
231
+ if 'static' not in self.parameters:
232
+ self.parameters['static'] = False
233
+ return self.parameters['static']
234
+
235
+
236
+ @static.setter
237
+ def static(self, _static: bool) -> None:
238
+ """
239
+ Set the `static` parameter for the pipe.
240
+ """
241
+ self.parameters['static'] = _static
242
+
243
+
244
+ @property
245
+ def autoincrement(self) -> bool:
246
+ """
247
+ Return the `autoincrement` parameter for the pipe.
248
+ """
249
+ if 'autoincrement' not in self.parameters:
250
+ self.parameters['autoincrement'] = False
251
+
252
+ return self.parameters['autoincrement']
253
+
254
+
255
+ @autoincrement.setter
256
+ def autoincrement(self, _autoincrement: bool) -> None:
257
+ """
258
+ Set the `autoincrement` parameter for the pipe.
259
+ """
260
+ self.parameters['autoincrement'] = _autoincrement
261
+
262
+
191
263
  def get_columns(self, *args: str, error: bool = False) -> Union[str, Tuple[str]]:
192
264
  """
193
265
  Check if the requested columns are defined.
@@ -196,7 +268,7 @@ def get_columns(self, *args: str, error: bool = False) -> Union[str, Tuple[str]]
196
268
  ----------
197
269
  *args: str
198
270
  The column names to be retrieved.
199
-
271
+
200
272
  error: bool, default False
201
273
  If `True`, raise an `Exception` if the specified column is not defined.
202
274
 
@@ -233,12 +305,19 @@ def get_columns(self, *args: str, error: bool = False) -> Union[str, Tuple[str]]
233
305
  return tuple(col_names)
234
306
 
235
307
 
236
- def get_columns_types(self, debug: bool = False) -> Union[Dict[str, str], None]:
308
+ def get_columns_types(
309
+ self,
310
+ refresh: bool = False,
311
+ debug: bool = False,
312
+ ) -> Union[Dict[str, str], None]:
237
313
  """
238
314
  Get a dictionary of a pipe's column names and their types.
239
315
 
240
316
  Parameters
241
317
  ----------
318
+ refresh: bool, default False
319
+ If `True`, invalidate the cache and fetch directly from the instance connector.
320
+
242
321
  debug: bool, default False:
243
322
  Verbosity toggle.
244
323
 
@@ -250,17 +329,91 @@ def get_columns_types(self, debug: bool = False) -> Union[Dict[str, str], None]:
250
329
  --------
251
330
  >>> pipe.get_columns_types()
252
331
  {
253
- 'dt': 'TIMESTAMP WITHOUT TIMEZONE',
332
+ 'dt': 'TIMESTAMP WITH TIMEZONE',
254
333
  'id': 'BIGINT',
255
334
  'val': 'DOUBLE PRECISION',
256
335
  }
257
336
  >>>
258
337
  """
259
- from meerschaum.utils.venv import Venv
338
+ import time
260
339
  from meerschaum.connectors import get_connector_plugin
340
+ from meerschaum.config.static import STATIC_CONFIG
341
+ from meerschaum.utils.warnings import dprint
261
342
 
262
- with Venv(get_connector_plugin(self.instance_connector)):
263
- return self.instance_connector.get_pipe_columns_types(self, debug=debug)
343
+ now = time.perf_counter()
344
+ cache_seconds = STATIC_CONFIG['pipes']['static_schema_cache_seconds']
345
+ static = self.parameters.get('static', False)
346
+ if not static:
347
+ refresh = True
348
+ if refresh:
349
+ _ = self.__dict__.pop('_columns_types_timestamp', None)
350
+ _ = self.__dict__.pop('_columns_types', None)
351
+ _columns_types = self.__dict__.get('_columns_types', None)
352
+ if _columns_types:
353
+ columns_types_timestamp = self.__dict__.get('_columns_types_timestamp', None)
354
+ if columns_types_timestamp is not None:
355
+ delta = now - columns_types_timestamp
356
+ if delta < cache_seconds:
357
+ if debug:
358
+ dprint(
359
+ f"Returning cached `columns_types` for {self} "
360
+ f"({round(delta, 2)} seconds old)."
361
+ )
362
+ return _columns_types
363
+
364
+ with mrsm.Venv(get_connector_plugin(self.instance_connector)):
365
+ _columns_types = (
366
+ self.instance_connector.get_pipe_columns_types(self, debug=debug)
367
+ if hasattr(self.instance_connector, 'get_pipe_columns_types')
368
+ else None
369
+ )
370
+
371
+ self.__dict__['_columns_types'] = _columns_types
372
+ self.__dict__['_columns_types_timestamp'] = now
373
+ return _columns_types or {}
374
+
375
+
376
+ def get_columns_indices(
377
+ self,
378
+ debug: bool = False,
379
+ refresh: bool = False,
380
+ ) -> Dict[str, List[Dict[str, str]]]:
381
+ """
382
+ Return a dictionary mapping columns to index information.
383
+ """
384
+ import time
385
+ from meerschaum.connectors import get_connector_plugin
386
+ from meerschaum.config.static import STATIC_CONFIG
387
+ from meerschaum.utils.warnings import dprint
388
+
389
+ now = time.perf_counter()
390
+ exists_timeout_seconds = STATIC_CONFIG['pipes']['exists_timeout_seconds']
391
+ if refresh:
392
+ _ = self.__dict__.pop('_columns_indices_timestamp', None)
393
+ _ = self.__dict__.pop('_columns_indices', None)
394
+ _columns_indices = self.__dict__.get('_columns_indices', None)
395
+ if _columns_indices:
396
+ columns_indices_timestamp = self.__dict__.get('_columns_indices_timestamp', None)
397
+ if columns_indices_timestamp is not None:
398
+ delta = now - columns_indices_timestamp
399
+ if delta < exists_timeout_seconds:
400
+ if debug:
401
+ dprint(
402
+ f"Returning cached `columns_indices` for {self} "
403
+ f"({round(delta, 2)} seconds old)."
404
+ )
405
+ return _columns_indices
406
+
407
+ with mrsm.Venv(get_connector_plugin(self.instance_connector)):
408
+ _columns_indices = (
409
+ self.instance_connector.get_pipe_columns_indices(self, debug=debug)
410
+ if hasattr(self.instance_connector, 'get_pipe_columns_indices')
411
+ else None
412
+ )
413
+
414
+ self.__dict__['_columns_indices'] = _columns_indices
415
+ self.__dict__['_columns_indices_timestamp'] = now
416
+ return _columns_indices or {}
264
417
 
265
418
 
266
419
  def get_id(self, **kw: Any) -> Union[int, None]:
@@ -274,7 +427,10 @@ def get_id(self, **kw: Any) -> Union[int, None]:
274
427
  from meerschaum.connectors import get_connector_plugin
275
428
 
276
429
  with Venv(get_connector_plugin(self.instance_connector)):
277
- return self.instance_connector.get_pipe_id(self, **kw)
430
+ if hasattr(self.instance_connector, 'get_pipe_id'):
431
+ return self.instance_connector.get_pipe_id(self, **kw)
432
+
433
+ return None
278
434
 
279
435
 
280
436
  @property
@@ -509,15 +665,22 @@ def get_indices(self) -> Dict[str, str]:
509
665
  if cols
510
666
  }
511
667
  _index_names = {
512
- ix: (
513
- _index_template.format(
514
- target=_target,
515
- column_names=column_names,
516
- connector_keys=self.connector_keys,
517
- metric_key=self.connector_key,
518
- location_key=self.location_key,
519
- )
668
+ ix: _index_template.format(
669
+ target=_target,
670
+ column_names=column_names,
671
+ connector_keys=self.connector_keys,
672
+ metric_key=self.connector_key,
673
+ location_key=self.location_key,
520
674
  )
521
675
  for ix, column_names in _column_names.items()
522
676
  }
523
- return _index_names
677
+ ### NOTE: Skip any duplicate indices.
678
+ seen_index_names = {}
679
+ for ix, index_name in _index_names.items():
680
+ if index_name in seen_index_names:
681
+ continue
682
+ seen_index_names[index_name] = ix
683
+ return {
684
+ ix: index_name
685
+ for index_name, ix in seen_index_names.items()
686
+ }
@@ -58,12 +58,14 @@ def clear(
58
58
  from meerschaum.utils.venv import Venv
59
59
  from meerschaum.connectors import get_connector_plugin
60
60
 
61
+ begin, end = self.parse_date_bounds(begin, end)
62
+
61
63
  if self.cache_pipe is not None:
62
64
  success, msg = self.cache_pipe.clear(
63
- begin = begin,
64
- end = end,
65
- params = params,
66
- debug = debug,
65
+ begin=begin,
66
+ end=end,
67
+ params=params,
68
+ debug=debug,
67
69
  **kwargs
68
70
  )
69
71
  if not success:
@@ -72,9 +74,9 @@ def clear(
72
74
  with Venv(get_connector_plugin(self.instance_connector)):
73
75
  return self.instance_connector.clear_pipe(
74
76
  self,
75
- begin = begin,
76
- end = end,
77
- params = params,
78
- debug = debug,
77
+ begin=begin,
78
+ end=end,
79
+ params=params,
80
+ debug=debug,
79
81
  **kwargs
80
82
  )