meerschaum 2.7.9__py3-none-any.whl → 2.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. meerschaum/_internal/arguments/_parser.py +17 -5
  2. meerschaum/_internal/term/TermPageHandler.py +1 -1
  3. meerschaum/_internal/term/__init__.py +1 -1
  4. meerschaum/actions/api.py +36 -10
  5. meerschaum/actions/copy.py +3 -1
  6. meerschaum/actions/index.py +1 -1
  7. meerschaum/actions/show.py +7 -7
  8. meerschaum/actions/sync.py +5 -1
  9. meerschaum/actions/verify.py +14 -1
  10. meerschaum/api/__init__.py +77 -41
  11. meerschaum/api/_exceptions.py +18 -0
  12. meerschaum/api/dash/__init__.py +4 -2
  13. meerschaum/api/dash/callbacks/dashboard.py +30 -1
  14. meerschaum/api/dash/components.py +2 -2
  15. meerschaum/api/dash/webterm.py +23 -4
  16. meerschaum/api/models/_pipes.py +8 -8
  17. meerschaum/api/resources/static/css/dash.css +2 -2
  18. meerschaum/api/resources/templates/termpage.html +5 -1
  19. meerschaum/api/routes/__init__.py +15 -12
  20. meerschaum/api/routes/_connectors.py +30 -28
  21. meerschaum/api/routes/_index.py +16 -7
  22. meerschaum/api/routes/_misc.py +30 -22
  23. meerschaum/api/routes/_pipes.py +244 -148
  24. meerschaum/api/routes/_plugins.py +58 -47
  25. meerschaum/api/routes/_users.py +39 -31
  26. meerschaum/api/routes/_version.py +8 -10
  27. meerschaum/api/routes/_webterm.py +2 -2
  28. meerschaum/config/_default.py +10 -0
  29. meerschaum/config/_version.py +1 -1
  30. meerschaum/config/static/__init__.py +5 -2
  31. meerschaum/connectors/api/_APIConnector.py +4 -3
  32. meerschaum/connectors/api/_login.py +21 -17
  33. meerschaum/connectors/api/_pipes.py +1 -0
  34. meerschaum/connectors/api/_request.py +9 -10
  35. meerschaum/connectors/sql/_cli.py +11 -3
  36. meerschaum/connectors/sql/_instance.py +1 -1
  37. meerschaum/connectors/sql/_pipes.py +77 -57
  38. meerschaum/connectors/sql/_sql.py +26 -9
  39. meerschaum/core/Pipe/__init__.py +2 -0
  40. meerschaum/core/Pipe/_attributes.py +13 -2
  41. meerschaum/core/Pipe/_data.py +85 -0
  42. meerschaum/core/Pipe/_deduplicate.py +6 -8
  43. meerschaum/core/Pipe/_sync.py +63 -30
  44. meerschaum/core/Pipe/_verify.py +242 -77
  45. meerschaum/core/User/__init__.py +2 -6
  46. meerschaum/jobs/_Job.py +1 -1
  47. meerschaum/jobs/__init__.py +15 -0
  48. meerschaum/utils/dataframe.py +2 -0
  49. meerschaum/utils/dtypes/sql.py +26 -0
  50. meerschaum/utils/formatting/_pipes.py +1 -1
  51. meerschaum/utils/misc.py +11 -7
  52. meerschaum/utils/packages/_packages.py +1 -1
  53. meerschaum/utils/sql.py +6 -2
  54. {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/METADATA +4 -4
  55. {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/RECORD +61 -60
  56. {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/LICENSE +0 -0
  57. {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/NOTICE +0 -0
  58. {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/WHEEL +0 -0
  59. {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/entry_points.txt +0 -0
  60. {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/top_level.txt +0 -0
  61. {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/zip-safe +0 -0
@@ -7,6 +7,7 @@ Interact with Pipes metadata via SQLConnector.
7
7
  """
8
8
  from __future__ import annotations
9
9
  from datetime import datetime, date, timedelta
10
+
10
11
  import meerschaum as mrsm
11
12
  from meerschaum.utils.typing import (
12
13
  Union, Any, SuccessTuple, Tuple, Dict, Optional, List
@@ -1837,7 +1838,7 @@ def sync_pipe(
1837
1838
  and primary_key in unseen_df.columns
1838
1839
  and autoincrement
1839
1840
  )
1840
- stats = {'success': True, 'msg': 'Success'}
1841
+ stats = {'success': True, 'msg': ''}
1841
1842
  if len(unseen_df) > 0:
1842
1843
  with self.engine.connect() as connection:
1843
1844
  with connection.begin():
@@ -1949,6 +1950,7 @@ def sync_pipe(
1949
1950
  datetime_col=(dt_col if dt_col in update_df.columns else None),
1950
1951
  identity_insert=(autoincrement and primary_key in update_df.columns),
1951
1952
  null_indices=pipe.null_indices,
1953
+ cast_columns=pipe.enforce,
1952
1954
  debug=debug,
1953
1955
  )
1954
1956
  update_results = self.exec_queries(
@@ -1967,12 +1969,16 @@ def sync_pipe(
1967
1969
  if not update_success:
1968
1970
  warn(f"Failed to apply update to {pipe}.")
1969
1971
  stats['success'] = stats['success'] and update_success
1970
- stats['msg'] = (stats.get('msg', '') + f'\nFailed to apply update to {pipe}.').lstrip()
1972
+ stats['msg'] = (
1973
+ (stats.get('msg', '') + f'\nFailed to apply update to {pipe}.').lstrip()
1974
+ if not update_success
1975
+ else stats.get('msg', '')
1976
+ )
1971
1977
 
1972
1978
  stop = time.perf_counter()
1973
1979
  success = stats['success']
1974
1980
  if not success:
1975
- return success, stats['msg']
1981
+ return success, stats['msg'] or str(stats)
1976
1982
 
1977
1983
  unseen_count = len(unseen_df.index) if unseen_df is not None else 0
1978
1984
  update_count = len(update_df.index) if update_df is not None else 0
@@ -2529,6 +2535,7 @@ def sync_pipe_inplace(
2529
2535
  datetime_col=pipe.columns.get('datetime', None),
2530
2536
  flavor=self.flavor,
2531
2537
  null_indices=pipe.null_indices,
2538
+ cast_columns=pipe.enforce,
2532
2539
  debug=debug,
2533
2540
  )
2534
2541
  if on_cols else []
@@ -2585,6 +2592,7 @@ def get_sync_time(
2585
2592
  pipe: 'mrsm.Pipe',
2586
2593
  params: Optional[Dict[str, Any]] = None,
2587
2594
  newest: bool = True,
2595
+ remote: bool = False,
2588
2596
  debug: bool = False,
2589
2597
  ) -> Union[datetime, int, None]:
2590
2598
  """Get a Pipe's most recent datetime value.
@@ -2602,50 +2610,76 @@ def get_sync_time(
2602
2610
  If `True`, get the most recent datetime (honoring `params`).
2603
2611
  If `False`, get the oldest datetime (ASC instead of DESC).
2604
2612
 
2613
+ remote: bool, default False
2614
+ If `True`, return the sync time for the remote fetch definition.
2615
+
2605
2616
  Returns
2606
2617
  -------
2607
2618
  A `datetime` object (or `int` if using an integer axis) if the pipe exists, otherwise `None`.
2608
2619
  """
2609
- from meerschaum.utils.sql import sql_item_name, build_where
2610
- table = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
2620
+ from meerschaum.utils.sql import sql_item_name, build_where, wrap_query_with_cte
2621
+ src_name = sql_item_name('src', self.flavor)
2622
+ table_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
2611
2623
 
2612
2624
  dt_col = pipe.columns.get('datetime', None)
2613
2625
  if dt_col is None:
2614
2626
  return None
2615
2627
  dt_col_name = sql_item_name(dt_col, self.flavor, None)
2616
2628
 
2629
+ if remote and pipe.connector.type != 'sql':
2630
+ warn(f"Cannot get the remote sync time for {pipe}.")
2631
+ return None
2632
+
2617
2633
  ASC_or_DESC = "DESC" if newest else "ASC"
2618
2634
  existing_cols = pipe.get_columns_types(debug=debug)
2619
2635
  valid_params = {}
2620
2636
  if params is not None:
2621
2637
  valid_params = {k: v for k, v in params.items() if k in existing_cols}
2638
+ flavor = self.flavor if not remote else pipe.connector.flavor
2622
2639
 
2623
2640
  ### If no bounds are provided for the datetime column,
2624
2641
  ### add IS NOT NULL to the WHERE clause.
2625
2642
  if dt_col not in valid_params:
2626
2643
  valid_params[dt_col] = '_None'
2627
2644
  where = "" if not valid_params else build_where(valid_params, self)
2628
- q = f"SELECT {dt_col_name}\nFROM {table}{where}\nORDER BY {dt_col_name} {ASC_or_DESC}\nLIMIT 1"
2645
+ src_query = (
2646
+ f"SELECT {dt_col_name}\nFROM {table_name}{where}"
2647
+ if not remote
2648
+ else self.get_pipe_metadef(pipe, params=params, begin=None, end=None)
2649
+ )
2650
+
2651
+ base_query = (
2652
+ f"SELECT {dt_col_name}\n"
2653
+ f"FROM {src_name}{where}\n"
2654
+ f"ORDER BY {dt_col_name} {ASC_or_DESC}\n"
2655
+ f"LIMIT 1"
2656
+ )
2629
2657
  if self.flavor == 'mssql':
2630
- q = f"SELECT TOP 1 {dt_col_name}\nFROM {table}{where}\nORDER BY {dt_col_name} {ASC_or_DESC}"
2658
+ base_query = (
2659
+ f"SELECT TOP 1 {dt_col_name}\n"
2660
+ f"FROM {src_name}{where}\n"
2661
+ f"ORDER BY {dt_col_name} {ASC_or_DESC}"
2662
+ )
2631
2663
  elif self.flavor == 'oracle':
2632
- q = (
2664
+ base_query = (
2633
2665
  "SELECT * FROM (\n"
2634
- + f" SELECT {dt_col_name}\nFROM {table}{where}\n "
2635
- + f"ORDER BY {dt_col_name} {ASC_or_DESC}\n"
2636
- + ") WHERE ROWNUM = 1"
2666
+ f" SELECT {dt_col_name}\n"
2667
+ f" FROM {src_name}{where}\n"
2668
+ f" ORDER BY {dt_col_name} {ASC_or_DESC}\n"
2669
+ ") WHERE ROWNUM = 1"
2637
2670
  )
2638
2671
 
2672
+ query = wrap_query_with_cte(src_query, base_query, flavor)
2673
+
2639
2674
  try:
2640
- db_time = self.value(q, silent=True, debug=debug)
2675
+ db_time = self.value(query, silent=True, debug=debug)
2641
2676
 
2642
2677
  ### No datetime could be found.
2643
2678
  if db_time is None:
2644
2679
  return None
2645
2680
  ### sqlite returns str.
2646
2681
  if isinstance(db_time, str):
2647
- from meerschaum.utils.packages import attempt_import
2648
- dateutil_parser = attempt_import('dateutil.parser')
2682
+ dateutil_parser = mrsm.attempt_import('dateutil.parser')
2649
2683
  st = dateutil_parser.parse(db_time)
2650
2684
  ### Do nothing if a datetime object is returned.
2651
2685
  elif isinstance(db_time, datetime):
@@ -2743,7 +2777,7 @@ def get_pipe_rowcount(
2743
2777
  An `int` for the number of rows if the `pipe` exists, otherwise `None`.
2744
2778
 
2745
2779
  """
2746
- from meerschaum.utils.sql import dateadd_str, sql_item_name, wrap_query_with_cte
2780
+ from meerschaum.utils.sql import dateadd_str, sql_item_name, wrap_query_with_cte, build_where
2747
2781
  from meerschaum.connectors.sql._fetch import get_pipe_query
2748
2782
  from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
2749
2783
  if remote:
@@ -2755,18 +2789,20 @@ def get_pipe_rowcount(
2755
2789
  error(msg)
2756
2790
  return None
2757
2791
 
2758
- _pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
2759
2792
 
2793
+ flavor = self.flavor if not remote else pipe.connector.flavor
2794
+ conn = self if not remote else pipe.connector
2795
+ _pipe_name = sql_item_name(pipe.target, flavor, self.get_pipe_schema(pipe))
2760
2796
  dt_col = pipe.columns.get('datetime', None)
2761
2797
  dt_typ = pipe.dtypes.get(dt_col, 'datetime') if dt_col else None
2762
- dt_db_type = get_db_type_from_pd_type(dt_typ, self.flavor) if dt_typ else None
2798
+ dt_db_type = get_db_type_from_pd_type(dt_typ, flavor) if dt_typ else None
2763
2799
  if not dt_col:
2764
2800
  dt_col = pipe.guess_datetime()
2765
- dt_name = sql_item_name(dt_col, self.flavor, None) if dt_col else None
2801
+ dt_name = sql_item_name(dt_col, flavor, None) if dt_col else None
2766
2802
  is_guess = True
2767
2803
  else:
2768
2804
  dt_col = pipe.get_columns('datetime')
2769
- dt_name = sql_item_name(dt_col, self.flavor, None)
2805
+ dt_name = sql_item_name(dt_col, flavor, None)
2770
2806
  is_guess = False
2771
2807
 
2772
2808
  if begin is not None or end is not None:
@@ -2786,32 +2822,15 @@ def get_pipe_rowcount(
2786
2822
  )
2787
2823
 
2788
2824
 
2789
- _datetime_name = sql_item_name(
2790
- dt_col,
2791
- (
2792
- pipe.instance_connector.flavor
2793
- if not remote
2794
- else pipe.connector.flavor
2795
- ),
2796
- None,
2797
- )
2825
+ _datetime_name = sql_item_name(dt_col, flavor)
2798
2826
  _cols_names = [
2799
- sql_item_name(
2800
- col,
2801
- (
2802
- pipe.instance_connector.flavor
2803
- if not remote
2804
- else pipe.connector.flavor
2805
- ),
2806
- None,
2807
- )
2827
+ sql_item_name(col, flavor)
2808
2828
  for col in set(
2809
2829
  (
2810
2830
  [dt_col]
2811
2831
  if dt_col
2812
2832
  else []
2813
- )
2814
- + (
2833
+ ) + (
2815
2834
  []
2816
2835
  if params is None
2817
2836
  else list(params.keys())
@@ -2826,34 +2845,33 @@ def get_pipe_rowcount(
2826
2845
  if not remote
2827
2846
  else get_pipe_query(pipe)
2828
2847
  )
2829
- parent_query = f"SELECT COUNT(*)\nFROM {sql_item_name('src', self.flavor)}"
2830
- query = wrap_query_with_cte(src, parent_query, self.flavor)
2848
+ parent_query = f"SELECT COUNT(*)\nFROM {sql_item_name('src', flavor)}"
2849
+ query = wrap_query_with_cte(src, parent_query, flavor)
2831
2850
  if begin is not None or end is not None:
2832
2851
  query += "\nWHERE"
2833
2852
  if begin is not None:
2834
2853
  query += (
2835
2854
  f"\n {dt_name} >= "
2836
- + dateadd_str(self.flavor, datepart='minute', number=0, begin=begin, db_type=dt_db_type)
2855
+ + dateadd_str(flavor, datepart='minute', number=0, begin=begin, db_type=dt_db_type)
2837
2856
  )
2838
2857
  if end is not None and begin is not None:
2839
2858
  query += "\n AND"
2840
2859
  if end is not None:
2841
2860
  query += (
2842
2861
  f"\n {dt_name} < "
2843
- + dateadd_str(self.flavor, datepart='minute', number=0, begin=end, db_type=dt_db_type)
2862
+ + dateadd_str(flavor, datepart='minute', number=0, begin=end, db_type=dt_db_type)
2844
2863
  )
2845
2864
  if params is not None:
2846
- from meerschaum.utils.sql import build_where
2847
2865
  existing_cols = pipe.get_columns_types(debug=debug)
2848
2866
  valid_params = {k: v for k, v in params.items() if k in existing_cols}
2849
2867
  if valid_params:
2850
- query += build_where(valid_params, self).replace('WHERE', (
2868
+ query += build_where(valid_params, conn).replace('WHERE', (
2851
2869
  'AND' if (begin is not None or end is not None)
2852
2870
  else 'WHERE'
2853
2871
  )
2854
2872
  )
2855
2873
 
2856
- result = self.value(query, debug=debug, silent=True)
2874
+ result = conn.value(query, debug=debug, silent=True)
2857
2875
  try:
2858
2876
  return int(result)
2859
2877
  except Exception:
@@ -3634,7 +3652,6 @@ def deduplicate_pipe(
3634
3652
  if not pipe.exists(debug=debug):
3635
3653
  return False, f"Table {pipe_table_name} does not exist."
3636
3654
 
3637
- ### TODO: Handle deleting duplicates without a datetime axis.
3638
3655
  dt_col = pipe.columns.get('datetime', None)
3639
3656
  cols_types = pipe.get_columns_types(debug=debug)
3640
3657
  existing_cols = pipe.get_columns_types(debug=debug)
@@ -3738,9 +3755,8 @@ def deduplicate_pipe(
3738
3755
 
3739
3756
  session_id = generate_password(3)
3740
3757
 
3741
- dedup_table = '-' + session_id + f'_dedup_{pipe.target}'
3742
- temp_old_table = '-' + session_id + f"_old_{pipe.target}"
3743
-
3758
+ dedup_table = self.get_temporary_target(pipe.target, transact_id=session_id, label='dedup')
3759
+ temp_old_table = self.get_temporary_target(pipe.target, transact_id=session_id, label='old')
3744
3760
  temp_old_table_name = sql_item_name(temp_old_table, self.flavor, self.get_pipe_schema(pipe))
3745
3761
 
3746
3762
  create_temporary_table_query = get_create_table_query(
@@ -3753,16 +3769,21 @@ def deduplicate_pipe(
3753
3769
  if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
3754
3770
  alter_queries = flatten_list([
3755
3771
  get_rename_table_queries(
3756
- pipe.target, temp_old_table, self.flavor, schema=self.get_pipe_schema(pipe)
3772
+ pipe.target,
3773
+ temp_old_table,
3774
+ self.flavor,
3775
+ schema=self.get_pipe_schema(pipe),
3757
3776
  ),
3758
3777
  get_rename_table_queries(
3759
- dedup_table, pipe.target, self.flavor, schema=self.get_pipe_schema(pipe)
3778
+ dedup_table,
3779
+ pipe.target,
3780
+ self.flavor,
3781
+ schema=self.get_pipe_schema(pipe),
3760
3782
  ),
3761
- f"""
3762
- DROP TABLE {if_exists_str} {temp_old_table_name}
3763
- """,
3783
+ f"DROP TABLE {if_exists_str} {temp_old_table_name}",
3764
3784
  ])
3765
3785
 
3786
+ self._log_temporary_tables_creation(temp_old_table, create=(not pipe.temporary), debug=debug)
3766
3787
  create_temporary_result = self.execute(create_temporary_table_query, debug=debug)
3767
3788
  if create_temporary_result is None:
3768
3789
  return False, f"Failed to deduplicate table {pipe_table_name}."
@@ -3794,8 +3815,7 @@ def deduplicate_pipe(
3794
3815
  f"\nfrom {old_rowcount:,} to {new_rowcount:,} rows"
3795
3816
  if old_rowcount != new_rowcount
3796
3817
  else ''
3797
- )
3798
- + '.'
3818
+ ) + '.'
3799
3819
  )
3800
3820
  if success
3801
3821
  else f"Failed to execute query:\n{fail_query}"
@@ -773,7 +773,6 @@ def to_sql(
773
773
  """
774
774
  import time
775
775
  import json
776
- from decimal import Decimal
777
776
  from datetime import timedelta
778
777
  from meerschaum.utils.warnings import error, warn
779
778
  import warnings
@@ -823,6 +822,7 @@ def to_sql(
823
822
 
824
823
  bytes_cols = get_bytes_cols(df)
825
824
  numeric_cols = get_numeric_cols(df)
825
+ ### NOTE: This excludes non-numeric serialized Decimals (e.g. SQLite).
826
826
  numeric_cols_dtypes = {
827
827
  col: typ
828
828
  for col, typ in kw.get('dtype', {}).items()
@@ -833,6 +833,27 @@ def to_sql(
833
833
 
834
834
  }
835
835
  numeric_cols.extend([col for col in numeric_cols_dtypes if col not in numeric_cols])
836
+ numeric_cols_precisions_scales = {
837
+ col: (
838
+ (typ.precision, typ.scale)
839
+ if hasattr(typ, 'precision')
840
+ else get_numeric_precision_scale(self.flavor)
841
+ )
842
+ for col, typ in numeric_cols_dtypes.items()
843
+ }
844
+ cols_pd_types = {
845
+ col: get_pd_type_from_db_type(str(typ))
846
+ for col, typ in kw.get('dtype', {}).items()
847
+ }
848
+ cols_pd_types.update({
849
+ col: f'numeric[{precision},{scale}]'
850
+ for col, (precision, scale) in numeric_cols_precisions_scales.items()
851
+ if precision and scale
852
+ })
853
+ cols_db_types = {
854
+ col: get_db_type_from_pd_type(typ, flavor=self.flavor)
855
+ for col, typ in cols_pd_types.items()
856
+ }
836
857
 
837
858
  enable_bulk_insert = mrsm.get_config(
838
859
  'system', 'connectors', 'sql', 'bulk_insert'
@@ -844,7 +865,7 @@ def to_sql(
844
865
  if method == "":
845
866
  if enable_bulk_insert:
846
867
  method = (
847
- functools.partial(mssql_insert_json, debug=debug)
868
+ functools.partial(mssql_insert_json, cols_types=cols_db_types, debug=debug)
848
869
  if self.flavor == 'mssql'
849
870
  else functools.partial(psql_insert_copy, debug=debug)
850
871
  )
@@ -867,14 +888,10 @@ def to_sql(
867
888
 
868
889
  ### Check for numeric columns.
869
890
  for col in numeric_cols:
870
- typ = numeric_cols_dtypes.get(col, None)
871
-
872
- precision, scale = (
873
- (typ.precision, typ.scale)
874
- if hasattr(typ, 'precision')
875
- else get_numeric_precision_scale(self.flavor)
891
+ precision, scale = numeric_cols_precisions_scales.get(
892
+ col,
893
+ get_numeric_precision_scale(self.flavor)
876
894
  )
877
-
878
895
  df[col] = df[col].apply(
879
896
  functools.partial(
880
897
  serialize_decimal,
@@ -92,6 +92,7 @@ class Pipe:
92
92
  _get_data_as_iterator,
93
93
  get_chunk_interval,
94
94
  get_chunk_bounds,
95
+ get_chunk_bounds_batches,
95
96
  parse_date_bounds,
96
97
  )
97
98
  from ._register import register
@@ -117,6 +118,7 @@ class Pipe:
117
118
  id,
118
119
  get_val_column,
119
120
  parents,
121
+ parent,
120
122
  children,
121
123
  target,
122
124
  _target_legacy,
@@ -590,7 +590,7 @@ def get_val_column(self, debug: bool = False) -> Union[str, None]:
590
590
 
591
591
 
592
592
  @property
593
- def parents(self) -> List[meerschaum.Pipe]:
593
+ def parents(self) -> List[mrsm.Pipe]:
594
594
  """
595
595
  Return a list of `meerschaum.Pipe` objects to be designated as parents.
596
596
  """
@@ -617,7 +617,18 @@ def parents(self) -> List[meerschaum.Pipe]:
617
617
 
618
618
 
619
619
  @property
620
- def children(self) -> List[meerschaum.Pipe]:
620
+ def parent(self) -> Union[mrsm.Pipe, None]:
621
+ """
622
+ Return the first pipe in `self.parents` or `None`.
623
+ """
624
+ parents = self.parents
625
+ if not parents:
626
+ return None
627
+ return parents[0]
628
+
629
+
630
+ @property
631
+ def children(self) -> List[mrsm.Pipe]:
621
632
  """
622
633
  Return a list of `meerschaum.Pipe` objects to be designated as children.
623
634
  """
@@ -544,11 +544,33 @@ def get_rowcount(
544
544
  from meerschaum.utils.warnings import warn
545
545
  from meerschaum.utils.venv import Venv
546
546
  from meerschaum.connectors import get_connector_plugin
547
+ from meerschaum.utils.misc import filter_keywords
547
548
 
548
549
  begin, end = self.parse_date_bounds(begin, end)
549
550
  connector = self.instance_connector if not remote else self.connector
550
551
  try:
551
552
  with Venv(get_connector_plugin(connector)):
553
+ if not hasattr(connector, 'get_pipe_rowcount'):
554
+ warn(
555
+ f"Connectors of type '{connector.type}' "
556
+ "do not implement `get_pipe_rowcount()`.",
557
+ stack=False,
558
+ )
559
+ return 0
560
+ kwargs = filter_keywords(
561
+ connector.get_pipe_rowcount,
562
+ begin=begin,
563
+ end=end,
564
+ params=params,
565
+ remote=remote,
566
+ debug=debug,
567
+ )
568
+ if remote and 'remote' not in kwargs:
569
+ warn(
570
+ f"Connectors of type '{connector.type}' do not support remote rowcounts.",
571
+ stack=False,
572
+ )
573
+ return 0
552
574
  rowcount = connector.get_pipe_rowcount(
553
575
  self,
554
576
  begin=begin,
@@ -651,12 +673,19 @@ def get_chunk_bounds(
651
673
  A list of chunk bounds (datetimes or integers).
652
674
  If unbounded, the first and last chunks will include `None`.
653
675
  """
676
+ from datetime import timedelta
677
+ from meerschaum.utils.dtypes import are_dtypes_equal
678
+ from meerschaum.utils.misc import interval_str
654
679
  include_less_than_begin = not bounded and begin is None
655
680
  include_greater_than_end = not bounded and end is None
656
681
  if begin is None:
657
682
  begin = self.get_sync_time(newest=False, debug=debug)
658
683
  if end is None:
659
684
  end = self.get_sync_time(newest=True, debug=debug)
685
+ if end is not None and hasattr(end, 'tzinfo'):
686
+ end += timedelta(minutes=1)
687
+ elif are_dtypes_equal(str(type(end)), 'int'):
688
+ end += 1
660
689
  if begin is None and end is None:
661
690
  return [(None, None)]
662
691
 
@@ -670,10 +699,17 @@ def get_chunk_bounds(
670
699
  ### Run `verify pipes --workers 1` to sync chunks in series.
671
700
  chunk_bounds = []
672
701
  begin_cursor = begin
702
+ num_chunks = 0
703
+ max_chunks = 1_000_000
673
704
  while begin_cursor < end:
674
705
  end_cursor = begin_cursor + chunk_interval
675
706
  chunk_bounds.append((begin_cursor, end_cursor))
676
707
  begin_cursor = end_cursor
708
+ num_chunks += 1
709
+ if num_chunks >= max_chunks:
710
+ raise ValueError(
711
+ f"Too many chunks of size '{interval_str(chunk_interval)}' between '{begin}' and '{end}'."
712
+ )
677
713
 
678
714
  ### The chunk interval might be too large.
679
715
  if not chunk_bounds and end >= begin:
@@ -695,6 +731,55 @@ def get_chunk_bounds(
695
731
  return chunk_bounds
696
732
 
697
733
 
734
+ def get_chunk_bounds_batches(
735
+ self,
736
+ chunk_bounds: List[Tuple[Union[datetime, int, None], Union[datetime, int, None]]],
737
+ batchsize: Optional[int] = None,
738
+ workers: Optional[int] = None,
739
+ debug: bool = False,
740
+ ) -> List[
741
+ Tuple[
742
+ Tuple[
743
+ Union[datetime, int, None],
744
+ Union[datetime, int, None],
745
+ ], ...
746
+ ]
747
+ ]:
748
+ """
749
+ Return a list of tuples of chunk bounds of size `batchsize`.
750
+
751
+ Parameters
752
+ ----------
753
+ chunk_bounds: List[Tuple[Union[datetime, int, None], Union[datetime, int, None]]]
754
+ A list of chunk_bounds (see `Pipe.get_chunk_bounds()`).
755
+
756
+ batchsize: Optional[int], default None
757
+ How many chunks to include in a batch. Defaults to `Pipe.get_num_workers()`.
758
+
759
+ workers: Optional[int], default None
760
+ If `batchsize` is `None`, use this as the desired number of workers.
761
+ Passed to `Pipe.get_num_workers()`.
762
+
763
+ Returns
764
+ -------
765
+ A list of tuples of chunk bound tuples.
766
+ """
767
+ from meerschaum.utils.misc import iterate_chunks
768
+
769
+ if batchsize is None:
770
+ batchsize = self.get_num_workers(workers=workers)
771
+
772
+ return [
773
+ tuple(
774
+ _batch_chunk_bounds
775
+ for _batch_chunk_bounds in batch
776
+ if _batch_chunk_bounds is not None
777
+ )
778
+ for batch in iterate_chunks(chunk_bounds, batchsize)
779
+ if batch
780
+ ]
781
+
782
+
698
783
  def parse_date_bounds(self, *dt_vals: Union[datetime, int, None]) -> Union[
699
784
  datetime,
700
785
  int,
@@ -110,13 +110,12 @@ def deduplicate(
110
110
  )
111
111
  if bounded and end is None:
112
112
  end = self.get_sync_time(newest=True, debug=debug)
113
-
114
- if bounded and end is not None:
115
- end += (
116
- timedelta(minutes=1)
117
- if isinstance(end, datetime)
118
- else 1
119
- )
113
+ if end is not None:
114
+ end += (
115
+ timedelta(minutes=1)
116
+ if hasattr(end, 'tzinfo')
117
+ else 1
118
+ )
120
119
 
121
120
  chunk_bounds = self.get_chunk_bounds(
122
121
  bounded=bounded,
@@ -129,7 +128,6 @@ def deduplicate(
129
128
  indices = [col for col in self.columns.values() if col]
130
129
  if not indices:
131
130
  return False, "Cannot deduplicate without index columns."
132
- dt_col = self.columns.get('datetime', None)
133
131
 
134
132
  def process_chunk_bounds(bounds) -> Tuple[
135
133
  Tuple[