meerschaum 2.7.10__py3-none-any.whl → 2.8.0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. meerschaum/_internal/arguments/_parser.py +17 -5
  2. meerschaum/actions/copy.py +3 -1
  3. meerschaum/actions/index.py +1 -1
  4. meerschaum/actions/show.py +7 -7
  5. meerschaum/actions/sync.py +5 -1
  6. meerschaum/actions/verify.py +14 -1
  7. meerschaum/api/__init__.py +77 -41
  8. meerschaum/api/_exceptions.py +18 -0
  9. meerschaum/api/dash/__init__.py +4 -2
  10. meerschaum/api/dash/callbacks/dashboard.py +30 -1
  11. meerschaum/api/dash/components.py +2 -2
  12. meerschaum/api/dash/webterm.py +23 -4
  13. meerschaum/api/models/_pipes.py +8 -8
  14. meerschaum/api/resources/static/css/dash.css +2 -2
  15. meerschaum/api/resources/templates/termpage.html +5 -1
  16. meerschaum/api/routes/__init__.py +15 -12
  17. meerschaum/api/routes/_connectors.py +30 -28
  18. meerschaum/api/routes/_index.py +16 -7
  19. meerschaum/api/routes/_misc.py +30 -22
  20. meerschaum/api/routes/_pipes.py +244 -148
  21. meerschaum/api/routes/_plugins.py +58 -47
  22. meerschaum/api/routes/_users.py +39 -31
  23. meerschaum/api/routes/_version.py +8 -10
  24. meerschaum/config/_default.py +10 -0
  25. meerschaum/config/_version.py +1 -1
  26. meerschaum/config/static/__init__.py +4 -1
  27. meerschaum/connectors/api/_APIConnector.py +4 -3
  28. meerschaum/connectors/api/_login.py +21 -17
  29. meerschaum/connectors/api/_pipes.py +1 -0
  30. meerschaum/connectors/api/_request.py +9 -10
  31. meerschaum/connectors/sql/_cli.py +11 -3
  32. meerschaum/connectors/sql/_instance.py +1 -1
  33. meerschaum/connectors/sql/_pipes.py +77 -57
  34. meerschaum/connectors/sql/_sql.py +26 -9
  35. meerschaum/core/Pipe/__init__.py +2 -0
  36. meerschaum/core/Pipe/_attributes.py +13 -2
  37. meerschaum/core/Pipe/_data.py +85 -0
  38. meerschaum/core/Pipe/_deduplicate.py +6 -8
  39. meerschaum/core/Pipe/_sync.py +63 -30
  40. meerschaum/core/Pipe/_verify.py +242 -77
  41. meerschaum/core/User/__init__.py +2 -6
  42. meerschaum/jobs/_Job.py +1 -1
  43. meerschaum/jobs/__init__.py +15 -0
  44. meerschaum/utils/dataframe.py +2 -0
  45. meerschaum/utils/dtypes/sql.py +26 -0
  46. meerschaum/utils/formatting/_pipes.py +1 -1
  47. meerschaum/utils/misc.py +11 -7
  48. meerschaum/utils/packages/_packages.py +1 -1
  49. meerschaum/utils/sql.py +6 -2
  50. {meerschaum-2.7.10.dist-info → meerschaum-2.8.0.dist-info}/METADATA +4 -4
  51. {meerschaum-2.7.10.dist-info → meerschaum-2.8.0.dist-info}/RECORD +57 -56
  52. {meerschaum-2.7.10.dist-info → meerschaum-2.8.0.dist-info}/LICENSE +0 -0
  53. {meerschaum-2.7.10.dist-info → meerschaum-2.8.0.dist-info}/NOTICE +0 -0
  54. {meerschaum-2.7.10.dist-info → meerschaum-2.8.0.dist-info}/WHEEL +0 -0
  55. {meerschaum-2.7.10.dist-info → meerschaum-2.8.0.dist-info}/entry_points.txt +0 -0
  56. {meerschaum-2.7.10.dist-info → meerschaum-2.8.0.dist-info}/top_level.txt +0 -0
  57. {meerschaum-2.7.10.dist-info → meerschaum-2.8.0.dist-info}/zip-safe +0 -0
@@ -7,6 +7,7 @@ Interact with Pipes metadata via SQLConnector.
7
7
  """
8
8
  from __future__ import annotations
9
9
  from datetime import datetime, date, timedelta
10
+
10
11
  import meerschaum as mrsm
11
12
  from meerschaum.utils.typing import (
12
13
  Union, Any, SuccessTuple, Tuple, Dict, Optional, List
@@ -1837,7 +1838,7 @@ def sync_pipe(
1837
1838
  and primary_key in unseen_df.columns
1838
1839
  and autoincrement
1839
1840
  )
1840
- stats = {'success': True, 'msg': 'Success'}
1841
+ stats = {'success': True, 'msg': ''}
1841
1842
  if len(unseen_df) > 0:
1842
1843
  with self.engine.connect() as connection:
1843
1844
  with connection.begin():
@@ -1949,6 +1950,7 @@ def sync_pipe(
1949
1950
  datetime_col=(dt_col if dt_col in update_df.columns else None),
1950
1951
  identity_insert=(autoincrement and primary_key in update_df.columns),
1951
1952
  null_indices=pipe.null_indices,
1953
+ cast_columns=pipe.enforce,
1952
1954
  debug=debug,
1953
1955
  )
1954
1956
  update_results = self.exec_queries(
@@ -1967,12 +1969,16 @@ def sync_pipe(
1967
1969
  if not update_success:
1968
1970
  warn(f"Failed to apply update to {pipe}.")
1969
1971
  stats['success'] = stats['success'] and update_success
1970
- stats['msg'] = (stats.get('msg', '') + f'\nFailed to apply update to {pipe}.').lstrip()
1972
+ stats['msg'] = (
1973
+ (stats.get('msg', '') + f'\nFailed to apply update to {pipe}.').lstrip()
1974
+ if not update_success
1975
+ else stats.get('msg', '')
1976
+ )
1971
1977
 
1972
1978
  stop = time.perf_counter()
1973
1979
  success = stats['success']
1974
1980
  if not success:
1975
- return success, stats['msg']
1981
+ return success, stats['msg'] or str(stats)
1976
1982
 
1977
1983
  unseen_count = len(unseen_df.index) if unseen_df is not None else 0
1978
1984
  update_count = len(update_df.index) if update_df is not None else 0
@@ -2529,6 +2535,7 @@ def sync_pipe_inplace(
2529
2535
  datetime_col=pipe.columns.get('datetime', None),
2530
2536
  flavor=self.flavor,
2531
2537
  null_indices=pipe.null_indices,
2538
+ cast_columns=pipe.enforce,
2532
2539
  debug=debug,
2533
2540
  )
2534
2541
  if on_cols else []
@@ -2585,6 +2592,7 @@ def get_sync_time(
2585
2592
  pipe: 'mrsm.Pipe',
2586
2593
  params: Optional[Dict[str, Any]] = None,
2587
2594
  newest: bool = True,
2595
+ remote: bool = False,
2588
2596
  debug: bool = False,
2589
2597
  ) -> Union[datetime, int, None]:
2590
2598
  """Get a Pipe's most recent datetime value.
@@ -2602,50 +2610,76 @@ def get_sync_time(
2602
2610
  If `True`, get the most recent datetime (honoring `params`).
2603
2611
  If `False`, get the oldest datetime (ASC instead of DESC).
2604
2612
 
2613
+ remote: bool, default False
2614
+ If `True`, return the sync time for the remote fetch definition.
2615
+
2605
2616
  Returns
2606
2617
  -------
2607
2618
  A `datetime` object (or `int` if using an integer axis) if the pipe exists, otherwise `None`.
2608
2619
  """
2609
- from meerschaum.utils.sql import sql_item_name, build_where
2610
- table = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
2620
+ from meerschaum.utils.sql import sql_item_name, build_where, wrap_query_with_cte
2621
+ src_name = sql_item_name('src', self.flavor)
2622
+ table_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
2611
2623
 
2612
2624
  dt_col = pipe.columns.get('datetime', None)
2613
2625
  if dt_col is None:
2614
2626
  return None
2615
2627
  dt_col_name = sql_item_name(dt_col, self.flavor, None)
2616
2628
 
2629
+ if remote and pipe.connector.type != 'sql':
2630
+ warn(f"Cannot get the remote sync time for {pipe}.")
2631
+ return None
2632
+
2617
2633
  ASC_or_DESC = "DESC" if newest else "ASC"
2618
2634
  existing_cols = pipe.get_columns_types(debug=debug)
2619
2635
  valid_params = {}
2620
2636
  if params is not None:
2621
2637
  valid_params = {k: v for k, v in params.items() if k in existing_cols}
2638
+ flavor = self.flavor if not remote else pipe.connector.flavor
2622
2639
 
2623
2640
  ### If no bounds are provided for the datetime column,
2624
2641
  ### add IS NOT NULL to the WHERE clause.
2625
2642
  if dt_col not in valid_params:
2626
2643
  valid_params[dt_col] = '_None'
2627
2644
  where = "" if not valid_params else build_where(valid_params, self)
2628
- q = f"SELECT {dt_col_name}\nFROM {table}{where}\nORDER BY {dt_col_name} {ASC_or_DESC}\nLIMIT 1"
2645
+ src_query = (
2646
+ f"SELECT {dt_col_name}\nFROM {table_name}{where}"
2647
+ if not remote
2648
+ else self.get_pipe_metadef(pipe, params=params, begin=None, end=None)
2649
+ )
2650
+
2651
+ base_query = (
2652
+ f"SELECT {dt_col_name}\n"
2653
+ f"FROM {src_name}{where}\n"
2654
+ f"ORDER BY {dt_col_name} {ASC_or_DESC}\n"
2655
+ f"LIMIT 1"
2656
+ )
2629
2657
  if self.flavor == 'mssql':
2630
- q = f"SELECT TOP 1 {dt_col_name}\nFROM {table}{where}\nORDER BY {dt_col_name} {ASC_or_DESC}"
2658
+ base_query = (
2659
+ f"SELECT TOP 1 {dt_col_name}\n"
2660
+ f"FROM {src_name}{where}\n"
2661
+ f"ORDER BY {dt_col_name} {ASC_or_DESC}"
2662
+ )
2631
2663
  elif self.flavor == 'oracle':
2632
- q = (
2664
+ base_query = (
2633
2665
  "SELECT * FROM (\n"
2634
- + f" SELECT {dt_col_name}\nFROM {table}{where}\n "
2635
- + f"ORDER BY {dt_col_name} {ASC_or_DESC}\n"
2636
- + ") WHERE ROWNUM = 1"
2666
+ f" SELECT {dt_col_name}\n"
2667
+ f" FROM {src_name}{where}\n"
2668
+ f" ORDER BY {dt_col_name} {ASC_or_DESC}\n"
2669
+ ") WHERE ROWNUM = 1"
2637
2670
  )
2638
2671
 
2672
+ query = wrap_query_with_cte(src_query, base_query, flavor)
2673
+
2639
2674
  try:
2640
- db_time = self.value(q, silent=True, debug=debug)
2675
+ db_time = self.value(query, silent=True, debug=debug)
2641
2676
 
2642
2677
  ### No datetime could be found.
2643
2678
  if db_time is None:
2644
2679
  return None
2645
2680
  ### sqlite returns str.
2646
2681
  if isinstance(db_time, str):
2647
- from meerschaum.utils.packages import attempt_import
2648
- dateutil_parser = attempt_import('dateutil.parser')
2682
+ dateutil_parser = mrsm.attempt_import('dateutil.parser')
2649
2683
  st = dateutil_parser.parse(db_time)
2650
2684
  ### Do nothing if a datetime object is returned.
2651
2685
  elif isinstance(db_time, datetime):
@@ -2743,7 +2777,7 @@ def get_pipe_rowcount(
2743
2777
  An `int` for the number of rows if the `pipe` exists, otherwise `None`.
2744
2778
 
2745
2779
  """
2746
- from meerschaum.utils.sql import dateadd_str, sql_item_name, wrap_query_with_cte
2780
+ from meerschaum.utils.sql import dateadd_str, sql_item_name, wrap_query_with_cte, build_where
2747
2781
  from meerschaum.connectors.sql._fetch import get_pipe_query
2748
2782
  from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
2749
2783
  if remote:
@@ -2755,18 +2789,20 @@ def get_pipe_rowcount(
2755
2789
  error(msg)
2756
2790
  return None
2757
2791
 
2758
- _pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
2759
2792
 
2793
+ flavor = self.flavor if not remote else pipe.connector.flavor
2794
+ conn = self if not remote else pipe.connector
2795
+ _pipe_name = sql_item_name(pipe.target, flavor, self.get_pipe_schema(pipe))
2760
2796
  dt_col = pipe.columns.get('datetime', None)
2761
2797
  dt_typ = pipe.dtypes.get(dt_col, 'datetime') if dt_col else None
2762
- dt_db_type = get_db_type_from_pd_type(dt_typ, self.flavor) if dt_typ else None
2798
+ dt_db_type = get_db_type_from_pd_type(dt_typ, flavor) if dt_typ else None
2763
2799
  if not dt_col:
2764
2800
  dt_col = pipe.guess_datetime()
2765
- dt_name = sql_item_name(dt_col, self.flavor, None) if dt_col else None
2801
+ dt_name = sql_item_name(dt_col, flavor, None) if dt_col else None
2766
2802
  is_guess = True
2767
2803
  else:
2768
2804
  dt_col = pipe.get_columns('datetime')
2769
- dt_name = sql_item_name(dt_col, self.flavor, None)
2805
+ dt_name = sql_item_name(dt_col, flavor, None)
2770
2806
  is_guess = False
2771
2807
 
2772
2808
  if begin is not None or end is not None:
@@ -2786,32 +2822,15 @@ def get_pipe_rowcount(
2786
2822
  )
2787
2823
 
2788
2824
 
2789
- _datetime_name = sql_item_name(
2790
- dt_col,
2791
- (
2792
- pipe.instance_connector.flavor
2793
- if not remote
2794
- else pipe.connector.flavor
2795
- ),
2796
- None,
2797
- )
2825
+ _datetime_name = sql_item_name(dt_col, flavor)
2798
2826
  _cols_names = [
2799
- sql_item_name(
2800
- col,
2801
- (
2802
- pipe.instance_connector.flavor
2803
- if not remote
2804
- else pipe.connector.flavor
2805
- ),
2806
- None,
2807
- )
2827
+ sql_item_name(col, flavor)
2808
2828
  for col in set(
2809
2829
  (
2810
2830
  [dt_col]
2811
2831
  if dt_col
2812
2832
  else []
2813
- )
2814
- + (
2833
+ ) + (
2815
2834
  []
2816
2835
  if params is None
2817
2836
  else list(params.keys())
@@ -2826,34 +2845,33 @@ def get_pipe_rowcount(
2826
2845
  if not remote
2827
2846
  else get_pipe_query(pipe)
2828
2847
  )
2829
- parent_query = f"SELECT COUNT(*)\nFROM {sql_item_name('src', self.flavor)}"
2830
- query = wrap_query_with_cte(src, parent_query, self.flavor)
2848
+ parent_query = f"SELECT COUNT(*)\nFROM {sql_item_name('src', flavor)}"
2849
+ query = wrap_query_with_cte(src, parent_query, flavor)
2831
2850
  if begin is not None or end is not None:
2832
2851
  query += "\nWHERE"
2833
2852
  if begin is not None:
2834
2853
  query += (
2835
2854
  f"\n {dt_name} >= "
2836
- + dateadd_str(self.flavor, datepart='minute', number=0, begin=begin, db_type=dt_db_type)
2855
+ + dateadd_str(flavor, datepart='minute', number=0, begin=begin, db_type=dt_db_type)
2837
2856
  )
2838
2857
  if end is not None and begin is not None:
2839
2858
  query += "\n AND"
2840
2859
  if end is not None:
2841
2860
  query += (
2842
2861
  f"\n {dt_name} < "
2843
- + dateadd_str(self.flavor, datepart='minute', number=0, begin=end, db_type=dt_db_type)
2862
+ + dateadd_str(flavor, datepart='minute', number=0, begin=end, db_type=dt_db_type)
2844
2863
  )
2845
2864
  if params is not None:
2846
- from meerschaum.utils.sql import build_where
2847
2865
  existing_cols = pipe.get_columns_types(debug=debug)
2848
2866
  valid_params = {k: v for k, v in params.items() if k in existing_cols}
2849
2867
  if valid_params:
2850
- query += build_where(valid_params, self).replace('WHERE', (
2868
+ query += build_where(valid_params, conn).replace('WHERE', (
2851
2869
  'AND' if (begin is not None or end is not None)
2852
2870
  else 'WHERE'
2853
2871
  )
2854
2872
  )
2855
2873
 
2856
- result = self.value(query, debug=debug, silent=True)
2874
+ result = conn.value(query, debug=debug, silent=True)
2857
2875
  try:
2858
2876
  return int(result)
2859
2877
  except Exception:
@@ -3634,7 +3652,6 @@ def deduplicate_pipe(
3634
3652
  if not pipe.exists(debug=debug):
3635
3653
  return False, f"Table {pipe_table_name} does not exist."
3636
3654
 
3637
- ### TODO: Handle deleting duplicates without a datetime axis.
3638
3655
  dt_col = pipe.columns.get('datetime', None)
3639
3656
  cols_types = pipe.get_columns_types(debug=debug)
3640
3657
  existing_cols = pipe.get_columns_types(debug=debug)
@@ -3738,9 +3755,8 @@ def deduplicate_pipe(
3738
3755
 
3739
3756
  session_id = generate_password(3)
3740
3757
 
3741
- dedup_table = '-' + session_id + f'_dedup_{pipe.target}'
3742
- temp_old_table = '-' + session_id + f"_old_{pipe.target}"
3743
-
3758
+ dedup_table = self.get_temporary_target(pipe.target, transact_id=session_id, label='dedup')
3759
+ temp_old_table = self.get_temporary_target(pipe.target, transact_id=session_id, label='old')
3744
3760
  temp_old_table_name = sql_item_name(temp_old_table, self.flavor, self.get_pipe_schema(pipe))
3745
3761
 
3746
3762
  create_temporary_table_query = get_create_table_query(
@@ -3753,16 +3769,21 @@ def deduplicate_pipe(
3753
3769
  if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
3754
3770
  alter_queries = flatten_list([
3755
3771
  get_rename_table_queries(
3756
- pipe.target, temp_old_table, self.flavor, schema=self.get_pipe_schema(pipe)
3772
+ pipe.target,
3773
+ temp_old_table,
3774
+ self.flavor,
3775
+ schema=self.get_pipe_schema(pipe),
3757
3776
  ),
3758
3777
  get_rename_table_queries(
3759
- dedup_table, pipe.target, self.flavor, schema=self.get_pipe_schema(pipe)
3778
+ dedup_table,
3779
+ pipe.target,
3780
+ self.flavor,
3781
+ schema=self.get_pipe_schema(pipe),
3760
3782
  ),
3761
- f"""
3762
- DROP TABLE {if_exists_str} {temp_old_table_name}
3763
- """,
3783
+ f"DROP TABLE {if_exists_str} {temp_old_table_name}",
3764
3784
  ])
3765
3785
 
3786
+ self._log_temporary_tables_creation(temp_old_table, create=(not pipe.temporary), debug=debug)
3766
3787
  create_temporary_result = self.execute(create_temporary_table_query, debug=debug)
3767
3788
  if create_temporary_result is None:
3768
3789
  return False, f"Failed to deduplicate table {pipe_table_name}."
@@ -3794,8 +3815,7 @@ def deduplicate_pipe(
3794
3815
  f"\nfrom {old_rowcount:,} to {new_rowcount:,} rows"
3795
3816
  if old_rowcount != new_rowcount
3796
3817
  else ''
3797
- )
3798
- + '.'
3818
+ ) + '.'
3799
3819
  )
3800
3820
  if success
3801
3821
  else f"Failed to execute query:\n{fail_query}"
@@ -773,7 +773,6 @@ def to_sql(
773
773
  """
774
774
  import time
775
775
  import json
776
- from decimal import Decimal
777
776
  from datetime import timedelta
778
777
  from meerschaum.utils.warnings import error, warn
779
778
  import warnings
@@ -823,6 +822,7 @@ def to_sql(
823
822
 
824
823
  bytes_cols = get_bytes_cols(df)
825
824
  numeric_cols = get_numeric_cols(df)
825
+ ### NOTE: This excludes non-numeric serialized Decimals (e.g. SQLite).
826
826
  numeric_cols_dtypes = {
827
827
  col: typ
828
828
  for col, typ in kw.get('dtype', {}).items()
@@ -833,6 +833,27 @@ def to_sql(
833
833
 
834
834
  }
835
835
  numeric_cols.extend([col for col in numeric_cols_dtypes if col not in numeric_cols])
836
+ numeric_cols_precisions_scales = {
837
+ col: (
838
+ (typ.precision, typ.scale)
839
+ if hasattr(typ, 'precision')
840
+ else get_numeric_precision_scale(self.flavor)
841
+ )
842
+ for col, typ in numeric_cols_dtypes.items()
843
+ }
844
+ cols_pd_types = {
845
+ col: get_pd_type_from_db_type(str(typ))
846
+ for col, typ in kw.get('dtype', {}).items()
847
+ }
848
+ cols_pd_types.update({
849
+ col: f'numeric[{precision},{scale}]'
850
+ for col, (precision, scale) in numeric_cols_precisions_scales.items()
851
+ if precision and scale
852
+ })
853
+ cols_db_types = {
854
+ col: get_db_type_from_pd_type(typ, flavor=self.flavor)
855
+ for col, typ in cols_pd_types.items()
856
+ }
836
857
 
837
858
  enable_bulk_insert = mrsm.get_config(
838
859
  'system', 'connectors', 'sql', 'bulk_insert'
@@ -844,7 +865,7 @@ def to_sql(
844
865
  if method == "":
845
866
  if enable_bulk_insert:
846
867
  method = (
847
- functools.partial(mssql_insert_json, debug=debug)
868
+ functools.partial(mssql_insert_json, cols_types=cols_db_types, debug=debug)
848
869
  if self.flavor == 'mssql'
849
870
  else functools.partial(psql_insert_copy, debug=debug)
850
871
  )
@@ -867,14 +888,10 @@ def to_sql(
867
888
 
868
889
  ### Check for numeric columns.
869
890
  for col in numeric_cols:
870
- typ = numeric_cols_dtypes.get(col, None)
871
-
872
- precision, scale = (
873
- (typ.precision, typ.scale)
874
- if hasattr(typ, 'precision')
875
- else get_numeric_precision_scale(self.flavor)
891
+ precision, scale = numeric_cols_precisions_scales.get(
892
+ col,
893
+ get_numeric_precision_scale(self.flavor)
876
894
  )
877
-
878
895
  df[col] = df[col].apply(
879
896
  functools.partial(
880
897
  serialize_decimal,
@@ -92,6 +92,7 @@ class Pipe:
92
92
  _get_data_as_iterator,
93
93
  get_chunk_interval,
94
94
  get_chunk_bounds,
95
+ get_chunk_bounds_batches,
95
96
  parse_date_bounds,
96
97
  )
97
98
  from ._register import register
@@ -117,6 +118,7 @@ class Pipe:
117
118
  id,
118
119
  get_val_column,
119
120
  parents,
121
+ parent,
120
122
  children,
121
123
  target,
122
124
  _target_legacy,
@@ -590,7 +590,7 @@ def get_val_column(self, debug: bool = False) -> Union[str, None]:
590
590
 
591
591
 
592
592
  @property
593
- def parents(self) -> List[meerschaum.Pipe]:
593
+ def parents(self) -> List[mrsm.Pipe]:
594
594
  """
595
595
  Return a list of `meerschaum.Pipe` objects to be designated as parents.
596
596
  """
@@ -617,7 +617,18 @@ def parents(self) -> List[meerschaum.Pipe]:
617
617
 
618
618
 
619
619
  @property
620
- def children(self) -> List[meerschaum.Pipe]:
620
+ def parent(self) -> Union[mrsm.Pipe, None]:
621
+ """
622
+ Return the first pipe in `self.parents` or `None`.
623
+ """
624
+ parents = self.parents
625
+ if not parents:
626
+ return None
627
+ return parents[0]
628
+
629
+
630
+ @property
631
+ def children(self) -> List[mrsm.Pipe]:
621
632
  """
622
633
  Return a list of `meerschaum.Pipe` objects to be designated as children.
623
634
  """
@@ -544,11 +544,33 @@ def get_rowcount(
544
544
  from meerschaum.utils.warnings import warn
545
545
  from meerschaum.utils.venv import Venv
546
546
  from meerschaum.connectors import get_connector_plugin
547
+ from meerschaum.utils.misc import filter_keywords
547
548
 
548
549
  begin, end = self.parse_date_bounds(begin, end)
549
550
  connector = self.instance_connector if not remote else self.connector
550
551
  try:
551
552
  with Venv(get_connector_plugin(connector)):
553
+ if not hasattr(connector, 'get_pipe_rowcount'):
554
+ warn(
555
+ f"Connectors of type '{connector.type}' "
556
+ "do not implement `get_pipe_rowcount()`.",
557
+ stack=False,
558
+ )
559
+ return 0
560
+ kwargs = filter_keywords(
561
+ connector.get_pipe_rowcount,
562
+ begin=begin,
563
+ end=end,
564
+ params=params,
565
+ remote=remote,
566
+ debug=debug,
567
+ )
568
+ if remote and 'remote' not in kwargs:
569
+ warn(
570
+ f"Connectors of type '{connector.type}' do not support remote rowcounts.",
571
+ stack=False,
572
+ )
573
+ return 0
552
574
  rowcount = connector.get_pipe_rowcount(
553
575
  self,
554
576
  begin=begin,
@@ -651,12 +673,19 @@ def get_chunk_bounds(
651
673
  A list of chunk bounds (datetimes or integers).
652
674
  If unbounded, the first and last chunks will include `None`.
653
675
  """
676
+ from datetime import timedelta
677
+ from meerschaum.utils.dtypes import are_dtypes_equal
678
+ from meerschaum.utils.misc import interval_str
654
679
  include_less_than_begin = not bounded and begin is None
655
680
  include_greater_than_end = not bounded and end is None
656
681
  if begin is None:
657
682
  begin = self.get_sync_time(newest=False, debug=debug)
658
683
  if end is None:
659
684
  end = self.get_sync_time(newest=True, debug=debug)
685
+ if end is not None and hasattr(end, 'tzinfo'):
686
+ end += timedelta(minutes=1)
687
+ elif are_dtypes_equal(str(type(end)), 'int'):
688
+ end += 1
660
689
  if begin is None and end is None:
661
690
  return [(None, None)]
662
691
 
@@ -670,10 +699,17 @@ def get_chunk_bounds(
670
699
  ### Run `verify pipes --workers 1` to sync chunks in series.
671
700
  chunk_bounds = []
672
701
  begin_cursor = begin
702
+ num_chunks = 0
703
+ max_chunks = 1_000_000
673
704
  while begin_cursor < end:
674
705
  end_cursor = begin_cursor + chunk_interval
675
706
  chunk_bounds.append((begin_cursor, end_cursor))
676
707
  begin_cursor = end_cursor
708
+ num_chunks += 1
709
+ if num_chunks >= max_chunks:
710
+ raise ValueError(
711
+ f"Too many chunks of size '{interval_str(chunk_interval)}' between '{begin}' and '{end}'."
712
+ )
677
713
 
678
714
  ### The chunk interval might be too large.
679
715
  if not chunk_bounds and end >= begin:
@@ -695,6 +731,55 @@ def get_chunk_bounds(
695
731
  return chunk_bounds
696
732
 
697
733
 
734
+ def get_chunk_bounds_batches(
735
+ self,
736
+ chunk_bounds: List[Tuple[Union[datetime, int, None], Union[datetime, int, None]]],
737
+ batchsize: Optional[int] = None,
738
+ workers: Optional[int] = None,
739
+ debug: bool = False,
740
+ ) -> List[
741
+ Tuple[
742
+ Tuple[
743
+ Union[datetime, int, None],
744
+ Union[datetime, int, None],
745
+ ], ...
746
+ ]
747
+ ]:
748
+ """
749
+ Return a list of tuples of chunk bounds of size `batchsize`.
750
+
751
+ Parameters
752
+ ----------
753
+ chunk_bounds: List[Tuple[Union[datetime, int, None], Union[datetime, int, None]]]
754
+ A list of chunk_bounds (see `Pipe.get_chunk_bounds()`).
755
+
756
+ batchsize: Optional[int], default None
757
+ How many chunks to include in a batch. Defaults to `Pipe.get_num_workers()`.
758
+
759
+ workers: Optional[int], default None
760
+ If `batchsize` is `None`, use this as the desired number of workers.
761
+ Passed to `Pipe.get_num_workers()`.
762
+
763
+ Returns
764
+ -------
765
+ A list of tuples of chunk bound tuples.
766
+ """
767
+ from meerschaum.utils.misc import iterate_chunks
768
+
769
+ if batchsize is None:
770
+ batchsize = self.get_num_workers(workers=workers)
771
+
772
+ return [
773
+ tuple(
774
+ _batch_chunk_bounds
775
+ for _batch_chunk_bounds in batch
776
+ if _batch_chunk_bounds is not None
777
+ )
778
+ for batch in iterate_chunks(chunk_bounds, batchsize)
779
+ if batch
780
+ ]
781
+
782
+
698
783
  def parse_date_bounds(self, *dt_vals: Union[datetime, int, None]) -> Union[
699
784
  datetime,
700
785
  int,
@@ -110,13 +110,12 @@ def deduplicate(
110
110
  )
111
111
  if bounded and end is None:
112
112
  end = self.get_sync_time(newest=True, debug=debug)
113
-
114
- if bounded and end is not None:
115
- end += (
116
- timedelta(minutes=1)
117
- if isinstance(end, datetime)
118
- else 1
119
- )
113
+ if end is not None:
114
+ end += (
115
+ timedelta(minutes=1)
116
+ if hasattr(end, 'tzinfo')
117
+ else 1
118
+ )
120
119
 
121
120
  chunk_bounds = self.get_chunk_bounds(
122
121
  bounded=bounded,
@@ -129,7 +128,6 @@ def deduplicate(
129
128
  indices = [col for col in self.columns.values() if col]
130
129
  if not indices:
131
130
  return False, "Cannot deduplicate without index columns."
132
- dt_col = self.columns.get('datetime', None)
133
131
 
134
132
  def process_chunk_bounds(bounds) -> Tuple[
135
133
  Tuple[