meerschaum 2.5.0__py3-none-any.whl → 2.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. meerschaum/_internal/arguments/_parser.py +6 -1
  2. meerschaum/_internal/entry.py +16 -5
  3. meerschaum/actions/edit.py +6 -6
  4. meerschaum/actions/sql.py +12 -11
  5. meerschaum/api/dash/pages/login.py +17 -17
  6. meerschaum/api/dash/pipes.py +104 -13
  7. meerschaum/api/routes/_pipes.py +58 -40
  8. meerschaum/api/routes/_webterm.py +1 -0
  9. meerschaum/config/_edit.py +46 -19
  10. meerschaum/config/_read_config.py +20 -9
  11. meerschaum/config/_version.py +1 -1
  12. meerschaum/config/stack/__init__.py +1 -1
  13. meerschaum/config/static/__init__.py +1 -0
  14. meerschaum/connectors/api/_APIConnector.py +1 -0
  15. meerschaum/connectors/api/_pipes.py +39 -8
  16. meerschaum/connectors/sql/_SQLConnector.py +4 -3
  17. meerschaum/connectors/sql/_pipes.py +511 -118
  18. meerschaum/connectors/sql/_sql.py +55 -15
  19. meerschaum/connectors/valkey/_ValkeyConnector.py +3 -2
  20. meerschaum/connectors/valkey/_pipes.py +11 -5
  21. meerschaum/core/Pipe/__init__.py +27 -9
  22. meerschaum/core/Pipe/_attributes.py +181 -18
  23. meerschaum/core/Pipe/_clear.py +10 -8
  24. meerschaum/core/Pipe/_copy.py +2 -0
  25. meerschaum/core/Pipe/_data.py +65 -17
  26. meerschaum/core/Pipe/_deduplicate.py +30 -28
  27. meerschaum/core/Pipe/_dtypes.py +4 -4
  28. meerschaum/core/Pipe/_fetch.py +12 -10
  29. meerschaum/core/Pipe/_sync.py +28 -11
  30. meerschaum/core/Pipe/_verify.py +52 -49
  31. meerschaum/utils/dataframe.py +64 -34
  32. meerschaum/utils/dtypes/__init__.py +25 -6
  33. meerschaum/utils/dtypes/sql.py +76 -33
  34. meerschaum/utils/misc.py +57 -24
  35. meerschaum/utils/packages/_packages.py +2 -1
  36. meerschaum/utils/schedule.py +7 -5
  37. meerschaum/utils/sql.py +697 -44
  38. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/METADATA +5 -3
  39. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/RECORD +45 -45
  40. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/WHEEL +1 -1
  41. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/LICENSE +0 -0
  42. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/NOTICE +0 -0
  43. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/entry_points.txt +0 -0
  44. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/top_level.txt +0 -0
  45. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/zip-safe +0 -0
@@ -55,6 +55,8 @@ def copy_to(
55
55
  if str(instance_keys) == self.instance_keys:
56
56
  return False, f"Cannot copy {self} to instance '{instance_keys}'."
57
57
 
58
+ begin, end = self.parse_date_bounds(begin, end)
59
+
58
60
  new_pipe = mrsm.Pipe(
59
61
  self.connector_keys,
60
62
  self.metric_key,
@@ -23,8 +23,8 @@ def get_data(
23
23
  self,
24
24
  select_columns: Optional[List[str]] = None,
25
25
  omit_columns: Optional[List[str]] = None,
26
- begin: Union[datetime, int, None] = None,
27
- end: Union[datetime, int, None] = None,
26
+ begin: Union[datetime, int, str, None] = None,
27
+ end: Union[datetime, int, str, None] = None,
28
28
  params: Optional[Dict[str, Any]] = None,
29
29
  as_iterator: bool = False,
30
30
  as_chunks: bool = False,
@@ -48,12 +48,12 @@ def get_data(
48
48
  omit_columns: Optional[List[str]], default None
49
49
  If provided, remove these columns from the selection.
50
50
 
51
- begin: Union[datetime, int, None], default None
51
+ begin: Union[datetime, int, str, None], default None
52
52
  Lower bound datetime to begin searching for data (inclusive).
53
53
  Translates to a `WHERE` clause like `WHERE datetime >= begin`.
54
54
  Defaults to `None`.
55
55
 
56
- end: Union[datetime, int, None], default None
56
+ end: Union[datetime, int, str, None], default None
57
57
  Upper bound datetime to stop searching for data (inclusive).
58
58
  Translates to a `WHERE` clause like `WHERE datetime < end`.
59
59
  Defaults to `None`.
@@ -105,11 +105,12 @@ def get_data(
105
105
  from meerschaum.utils.venv import Venv
106
106
  from meerschaum.connectors import get_connector_plugin
107
107
  from meerschaum.utils.misc import iterate_chunks, items_str
108
- from meerschaum.utils.dtypes import to_pandas_dtype
108
+ from meerschaum.utils.dtypes import to_pandas_dtype, coerce_timezone
109
109
  from meerschaum.utils.dataframe import add_missing_cols_to_df, df_is_chunk_generator
110
110
  from meerschaum.utils.packages import attempt_import
111
111
  dd = attempt_import('dask.dataframe') if as_dask else None
112
112
  dask = attempt_import('dask') if as_dask else None
113
+ dateutil_parser = attempt_import('dateutil.parser')
113
114
 
114
115
  if select_columns == '*':
115
116
  select_columns = None
@@ -119,12 +120,13 @@ def get_data(
119
120
  if isinstance(omit_columns, str):
120
121
  omit_columns = [omit_columns]
121
122
 
123
+ begin, end = self.parse_date_bounds(begin, end)
122
124
  as_iterator = as_iterator or as_chunks
125
+ dt_col = self.columns.get('datetime', None)
123
126
 
124
127
  def _sort_df(_df):
125
128
  if df_is_chunk_generator(_df):
126
129
  return _df
127
- dt_col = self.columns.get('datetime', None)
128
130
  indices = [] if dt_col not in _df.columns else [dt_col]
129
131
  non_dt_cols = [
130
132
  col
@@ -311,16 +313,8 @@ def _get_data_as_iterator(
311
313
  Return a pipe's data as a generator.
312
314
  """
313
315
  from meerschaum.utils.misc import round_time
314
- parse_begin = isinstance(begin, str)
315
- parse_end = isinstance(end, str)
316
- if parse_begin or parse_end:
317
- from meerschaum.utils.packages import attempt_import
318
- dateutil_parser = attempt_import('dateutil.parser')
319
- if parse_begin:
320
- begin = dateutil_parser.parse(begin)
321
- if parse_end:
322
- end = dateutil_parser.parse(end)
323
-
316
+ from meerschaum.utils.dtypes import coerce_timezone
317
+ begin, end = self.parse_date_bounds(begin, end)
324
318
  if not self.exists(debug=debug):
325
319
  return
326
320
 
@@ -332,11 +326,15 @@ def _get_data_as_iterator(
332
326
  if begin is not None
333
327
  else self.get_sync_time(round_down=False, newest=False, params=params, debug=debug)
334
328
  ) if dt_col else None
329
+ if isinstance(min_dt, datetime):
330
+ min_dt = coerce_timezone(min_dt)
335
331
  max_dt = (
336
332
  end
337
333
  if end is not None
338
334
  else self.get_sync_time(round_down=False, newest=True, params=params, debug=debug)
339
335
  ) if dt_col else None
336
+ if isinstance(max_dt, datetime):
337
+ max_dt = coerce_timezone(max_dt)
340
338
 
341
339
  ### We want to search just past the maximum value.
342
340
  if end is None:
@@ -450,6 +448,8 @@ def get_backtrack_data(
450
448
  if not self.exists(debug=debug):
451
449
  return None
452
450
 
451
+ begin = self.parse_date_bounds(begin)
452
+
453
453
  backtrack_interval = self.get_backtrack_interval(debug=debug)
454
454
  if backtrack_minutes is None:
455
455
  backtrack_minutes = (
@@ -550,6 +550,7 @@ def get_rowcount(
550
550
  from meerschaum.utils.venv import Venv
551
551
  from meerschaum.connectors import get_connector_plugin
552
552
 
553
+ begin, end = self.parse_date_bounds(begin, end)
553
554
  connector = self.instance_connector if not remote else self.connector
554
555
  try:
555
556
  with Venv(get_connector_plugin(connector)):
@@ -607,7 +608,7 @@ def get_chunk_interval(
607
608
  if dt_col is None:
608
609
  return timedelta(minutes=chunk_minutes)
609
610
 
610
- dt_dtype = self.dtypes.get(dt_col, 'datetime64[ns]')
611
+ dt_dtype = self.dtypes.get(dt_col, 'datetime64[ns, UTC]')
611
612
  if 'int' in dt_dtype.lower():
612
613
  return chunk_minutes
613
614
  return timedelta(minutes=chunk_minutes)
@@ -664,6 +665,8 @@ def get_chunk_bounds(
664
665
  if begin is None and end is None:
665
666
  return [(None, None)]
666
667
 
668
+ begin, end = self.parse_date_bounds(begin, end)
669
+
667
670
  ### Set the chunk interval under `pipe.parameters['verify']['chunk_minutes']`.
668
671
  chunk_interval = self.get_chunk_interval(chunk_interval, debug=debug)
669
672
 
@@ -695,3 +698,48 @@ def get_chunk_bounds(
695
698
  chunk_bounds = chunk_bounds + [(end, None)]
696
699
 
697
700
  return chunk_bounds
701
+
702
+
703
+ def parse_date_bounds(self, *dt_vals: Union[datetime, int, None]) -> Union[
704
+ datetime,
705
+ int,
706
+ str,
707
+ None,
708
+ Tuple[Union[datetime, int, str, None]]
709
+ ]:
710
+ """
711
+ Given a date bound (begin, end), coerce a timezone if necessary.
712
+ """
713
+ from meerschaum.utils.misc import is_int
714
+ from meerschaum.utils.dtypes import coerce_timezone
715
+ from meerschaum.utils.warnings import warn
716
+ dateutil_parser = mrsm.attempt_import('dateutil.parser')
717
+
718
+ def _parse_date_bound(dt_val):
719
+ if dt_val is None:
720
+ return None
721
+
722
+ if isinstance(dt_val, int):
723
+ return dt_val
724
+
725
+ if dt_val == '':
726
+ return ''
727
+
728
+ if is_int(dt_val):
729
+ return int(dt_val)
730
+
731
+ if isinstance(dt_val, str):
732
+ try:
733
+ dt_val = dateutil_parser.parse(dt_val)
734
+ except Exception as e:
735
+ warn(f"Could not parse '{dt_val}' as datetime:\n{e}")
736
+ return None
737
+
738
+ dt_col = self.columns.get('datetime', None)
739
+ dt_typ = str(self.dtypes.get(dt_col, 'datetime64[ns, UTC]'))
740
+ return coerce_timezone(dt_val, strip_utc=('utc' not in dt_typ.lower()))
741
+
742
+ bounds = tuple(_parse_date_bound(dt_val) for dt_val in dt_vals)
743
+ if len(bounds) == 1:
744
+ return bounds[0]
745
+ return bounds
@@ -65,14 +65,16 @@ def deduplicate(
65
65
  from meerschaum.connectors import get_connector_plugin
66
66
  from meerschaum.utils.pool import get_pool
67
67
 
68
+ begin, end = self.parse_date_bounds(begin, end)
69
+
68
70
  if self.cache_pipe is not None:
69
71
  success, msg = self.cache_pipe.deduplicate(
70
- begin = begin,
71
- end = end,
72
- params = params,
73
- bounded = bounded,
74
- debug = debug,
75
- _use_instance_method = _use_instance_method,
72
+ begin=begin,
73
+ end=end,
74
+ params=params,
75
+ bounded=bounded,
76
+ debug=debug,
77
+ _use_instance_method=_use_instance_method,
76
78
  **kwargs
77
79
  )
78
80
  if not success:
@@ -86,11 +88,11 @@ def deduplicate(
86
88
  if hasattr(self.instance_connector, 'deduplicate_pipe'):
87
89
  return self.instance_connector.deduplicate_pipe(
88
90
  self,
89
- begin = begin,
90
- end = end,
91
- params = params,
92
- bounded = bounded,
93
- debug = debug,
91
+ begin=begin,
92
+ end=end,
93
+ params=params,
94
+ bounded=bounded,
95
+ debug=debug,
94
96
  **kwargs
95
97
  )
96
98
 
@@ -117,33 +119,33 @@ def deduplicate(
117
119
  )
118
120
 
119
121
  chunk_bounds = self.get_chunk_bounds(
120
- bounded = bounded,
121
- begin = begin,
122
- end = end,
123
- chunk_interval = chunk_interval,
124
- debug = debug,
122
+ bounded=bounded,
123
+ begin=begin,
124
+ end=end,
125
+ chunk_interval=chunk_interval,
126
+ debug=debug,
125
127
  )
126
128
 
127
129
  indices = [col for col in self.columns.values() if col]
128
130
  if not indices:
129
- return False, f"Cannot deduplicate without index columns."
131
+ return False, "Cannot deduplicate without index columns."
130
132
  dt_col = self.columns.get('datetime', None)
131
133
 
132
134
  def process_chunk_bounds(bounds) -> Tuple[
133
- Tuple[
134
- Union[datetime, int, None],
135
- Union[datetime, int, None]
136
- ],
137
- SuccessTuple
138
- ]:
135
+ Tuple[
136
+ Union[datetime, int, None],
137
+ Union[datetime, int, None]
138
+ ],
139
+ SuccessTuple
140
+ ]:
139
141
  ### Only selecting the index values here to keep bandwidth down.
140
142
  chunk_begin, chunk_end = bounds
141
143
  chunk_df = self.get_data(
142
- select_columns = indices,
143
- begin = chunk_begin,
144
- end = chunk_end,
145
- params = params,
146
- debug = debug,
144
+ select_columns=indices,
145
+ begin=chunk_begin,
146
+ end=chunk_end,
147
+ params=params,
148
+ debug=debug,
147
149
  )
148
150
  if chunk_df is None:
149
151
  return bounds, (True, "")
@@ -101,18 +101,18 @@ def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str,
101
101
  dt_col = self.columns.get('datetime', None)
102
102
  if dt_col:
103
103
  if not self.parameters.get('dtypes', {}).get(dt_col, None):
104
- dtypes[dt_col] = 'datetime64[ns]'
104
+ dtypes[dt_col] = 'datetime64[ns, UTC]'
105
105
  return dtypes
106
106
 
107
- from meerschaum.utils.sql import get_pd_type
108
- from meerschaum.utils.misc import to_pandas_dtype
107
+ from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
108
+ from meerschaum.utils.dtypes import to_pandas_dtype
109
109
  columns_types = self.get_columns_types(debug=debug)
110
110
 
111
111
  ### NOTE: get_columns_types() may return either the types as
112
112
  ### PostgreSQL- or Pandas-style.
113
113
  dtypes = {
114
114
  c: (
115
- get_pd_type(t, allow_custom_dtypes=True)
115
+ get_pd_type_from_db_type(t, allow_custom_dtypes=True)
116
116
  if str(t).isupper()
117
117
  else to_pandas_dtype(t)
118
118
  )
@@ -18,14 +18,14 @@ if TYPE_CHECKING:
18
18
  pd = mrsm.attempt_import('pandas')
19
19
 
20
20
  def fetch(
21
- self,
22
- begin: Union[datetime, str, None] = '',
23
- end: Optional[datetime] = None,
24
- check_existing: bool = True,
25
- sync_chunks: bool = False,
26
- debug: bool = False,
27
- **kw: Any
28
- ) -> Union['pd.DataFrame', Iterator['pd.DataFrame'], None]:
21
+ self,
22
+ begin: Union[datetime, int, str, None] = '',
23
+ end: Union[datetime, int, None] = None,
24
+ check_existing: bool = True,
25
+ sync_chunks: bool = False,
26
+ debug: bool = False,
27
+ **kw: Any
28
+ ) -> Union['pd.DataFrame', Iterator['pd.DataFrame'], None]:
29
29
  """
30
30
  Fetch a Pipe's latest data from its connector.
31
31
 
@@ -76,6 +76,8 @@ def fetch(
76
76
  chunk_message = '\n' + chunk_label + '\n' + chunk_message
77
77
  return chunk_success, chunk_message
78
78
 
79
+ begin, end = self.parse_date_bounds(begin, end)
80
+
79
81
  with mrsm.Venv(get_connector_plugin(self.connector)):
80
82
  _args, _kwargs = filter_arguments(
81
83
  self.connector.fetch,
@@ -125,7 +127,7 @@ def get_backtrack_interval(
125
127
  if dt_col is None:
126
128
  return backtrack_interval
127
129
 
128
- dt_dtype = self.dtypes.get(dt_col, 'datetime64[ns]')
130
+ dt_dtype = self.dtypes.get(dt_col, 'datetime64[ns, UTC]')
129
131
  if 'int' in dt_dtype.lower():
130
132
  return backtrack_minutes
131
133
 
@@ -164,6 +166,6 @@ def _determine_begin(
164
166
  backtrack_interval = timedelta(minutes=backtrack_interval)
165
167
  try:
166
168
  return sync_time - backtrack_interval
167
- except Exception as e:
169
+ except Exception:
168
170
  warn(f"Unable to substract backtrack interval {backtrack_interval} from {sync_time}.")
169
171
  return sync_time
@@ -141,6 +141,7 @@ def sync(
141
141
  chunksize = None
142
142
  sync_chunks = False
143
143
 
144
+ begin, end = self.parse_date_bounds(begin, end)
144
145
  kw.update({
145
146
  'begin': begin,
146
147
  'end': end,
@@ -460,7 +461,7 @@ def get_sync_time(
460
461
  apply_backtrack_interval: bool = False,
461
462
  round_down: bool = False,
462
463
  debug: bool = False
463
- ) -> Union['datetime', None]:
464
+ ) -> Union['datetime', int, None]:
464
465
  """
465
466
  Get the most recent datetime value for a Pipe.
466
467
 
@@ -485,7 +486,7 @@ def get_sync_time(
485
486
 
486
487
  Returns
487
488
  -------
488
- A `datetime` object if the pipe exists, otherwise `None`.
489
+ A `datetime` or int, if the pipe exists, otherwise `None`.
489
490
 
490
491
  """
491
492
  from meerschaum.utils.venv import Venv
@@ -510,13 +511,13 @@ def get_sync_time(
510
511
  except Exception as e:
511
512
  warn(f"Failed to apply backtrack interval:\n{e}")
512
513
 
513
- return sync_time
514
+ return self.parse_date_bounds(sync_time)
514
515
 
515
516
 
516
517
  def exists(
517
- self,
518
- debug : bool = False
519
- ) -> bool:
518
+ self,
519
+ debug: bool = False
520
+ ) -> bool:
520
521
  """
521
522
  See if a Pipe's table exists.
522
523
 
@@ -549,7 +550,11 @@ def exists(
549
550
  return _exists
550
551
 
551
552
  with Venv(get_connector_plugin(self.instance_connector)):
552
- _exists = self.instance_connector.pipe_exists(pipe=self, debug=debug)
553
+ _exists = (
554
+ self.instance_connector.pipe_exists(pipe=self, debug=debug)
555
+ if hasattr(self.instance_connector, 'pipe_exists')
556
+ else False
557
+ )
553
558
 
554
559
  self.__dict__['_exists'] = _exists
555
560
  self.__dict__['_exists_timestamp'] = now
@@ -624,6 +629,18 @@ def filter_existing(
624
629
  merge = pd.merge
625
630
  NA = pd.NA
626
631
 
632
+ primary_key = self.columns.get('primary', None)
633
+ autoincrement = self.parameters.get('autoincrement', False)
634
+ pipe_columns = self.columns.copy()
635
+
636
+ if primary_key and autoincrement and df is not None and primary_key in df.columns:
637
+ if safe_copy:
638
+ df = df.copy()
639
+ safe_copy = False
640
+ if df[primary_key].isnull().all():
641
+ del df[primary_key]
642
+ _ = self.columns.pop(primary_key, None)
643
+
627
644
  def get_empty_df():
628
645
  empty_df = pd.DataFrame([])
629
646
  dtypes = dict(df.dtypes) if df is not None else {}
@@ -643,8 +660,8 @@ def filter_existing(
643
660
 
644
661
  ### begin is the oldest data in the new dataframe
645
662
  begin, end = None, None
646
- dt_col = self.columns.get('datetime', None)
647
- dt_type = self.dtypes.get(dt_col, 'datetime64[ns]') if dt_col else None
663
+ dt_col = pipe_columns.get('datetime', None)
664
+ dt_type = self.dtypes.get(dt_col, 'datetime64[ns, UTC]') if dt_col else None
648
665
  try:
649
666
  min_dt_val = df[dt_col].min(skipna=True) if dt_col else None
650
667
  if is_dask and min_dt_val is not None:
@@ -713,7 +730,7 @@ def filter_existing(
713
730
 
714
731
  unique_index_vals = {
715
732
  col: df[col].unique()
716
- for col in self.columns
733
+ for col in pipe_columns
717
734
  if col in df.columns and col != dt_col
718
735
  } if not date_bound_only else {}
719
736
  filter_params_index_limit = get_config('pipes', 'sync', 'filter_params_index_limit')
@@ -749,7 +766,7 @@ def filter_existing(
749
766
 
750
767
  ### Separate new rows from changed ones.
751
768
  on_cols = [
752
- col for col_key, col in self.columns.items()
769
+ col for col_key, col in pipe_columns.items()
753
770
  if (
754
771
  col
755
772
  and
@@ -11,6 +11,7 @@ from meerschaum.utils.typing import SuccessTuple, Any, Optional, Union, Tuple, L
11
11
  from meerschaum.utils.warnings import warn, info
12
12
  from meerschaum.utils.debug import dprint
13
13
 
14
+
14
15
  def verify(
15
16
  self,
16
17
  begin: Union[datetime, int, None] = None,
@@ -84,6 +85,8 @@ def verify(
84
85
  if bounded and end is None:
85
86
  end = self.get_sync_time(newest=True, debug=debug)
86
87
 
88
+ begin, end = self.parse_date_bounds(begin, end)
89
+
87
90
  if bounded and end is not None:
88
91
  end += (
89
92
  timedelta(minutes=1)
@@ -98,45 +101,45 @@ def verify(
98
101
 
99
102
  if cannot_determine_bounds:
100
103
  sync_success, sync_msg = self.sync(
101
- begin = begin,
102
- end = end,
103
- params = params,
104
- workers = workers,
105
- debug = debug,
104
+ begin=begin,
105
+ end=end,
106
+ params=params,
107
+ workers=workers,
108
+ debug=debug,
106
109
  **kwargs
107
110
  )
108
111
  if not sync_success:
109
112
  return sync_success, sync_msg
113
+
110
114
  if deduplicate:
111
115
  return self.deduplicate(
112
- begin = begin,
113
- end = end,
114
- params = params,
115
- workers = workers,
116
- debug = debug,
116
+ begin=begin,
117
+ end=end,
118
+ params=params,
119
+ workers=workers,
120
+ debug=debug,
117
121
  **kwargs
118
122
  )
119
123
  return sync_success, sync_msg
120
124
 
121
-
122
125
  chunk_interval = self.get_chunk_interval(chunk_interval, debug=debug)
123
126
  chunk_bounds = self.get_chunk_bounds(
124
- begin = begin,
125
- end = end,
126
- chunk_interval = chunk_interval,
127
- bounded = bounded,
128
- debug = debug,
127
+ begin=begin,
128
+ end=end,
129
+ chunk_interval=chunk_interval,
130
+ bounded=bounded,
131
+ debug=debug,
129
132
  )
130
133
 
131
134
  ### Consider it a success if no chunks need to be verified.
132
135
  if not chunk_bounds:
133
136
  if deduplicate:
134
137
  return self.deduplicate(
135
- begin = begin,
136
- end = end,
137
- params = params,
138
- workers = workers,
139
- debug = debug,
138
+ begin=begin,
139
+ end=end,
140
+ params=params,
141
+ workers=workers,
142
+ debug=debug,
140
143
  **kwargs
141
144
  )
142
145
  return True, f"Could not determine chunks between '{begin}' and '{end}'; nothing to do."
@@ -175,21 +178,21 @@ def verify(
175
178
  ### }
176
179
  bounds_success_tuples = {}
177
180
  def process_chunk_bounds(
178
- chunk_begin_and_end: Tuple[
179
- Union[int, datetime],
180
- Union[int, datetime]
181
- ]
182
- ):
181
+ chunk_begin_and_end: Tuple[
182
+ Union[int, datetime],
183
+ Union[int, datetime]
184
+ ]
185
+ ):
183
186
  if chunk_begin_and_end in bounds_success_tuples:
184
187
  return chunk_begin_and_end, bounds_success_tuples[chunk_begin_and_end]
185
188
 
186
189
  chunk_begin, chunk_end = chunk_begin_and_end
187
190
  return chunk_begin_and_end, self.sync(
188
- begin = chunk_begin,
189
- end = chunk_end,
190
- params = params,
191
- workers = workers,
192
- debug = debug,
191
+ begin=chunk_begin,
192
+ end=chunk_end,
193
+ params=params,
194
+ workers=workers,
195
+ debug=debug,
193
196
  **kwargs
194
197
  )
195
198
 
@@ -216,11 +219,11 @@ def verify(
216
219
  msg = get_chunks_success_message(bounds_success_tuples, header=message_header)
217
220
  if deduplicate:
218
221
  deduplicate_success, deduplicate_msg = self.deduplicate(
219
- begin = begin,
220
- end = end,
221
- params = params,
222
- workers = workers,
223
- debug = debug,
222
+ begin=begin,
223
+ end=end,
224
+ params=params,
225
+ workers=workers,
226
+ debug=debug,
224
227
  **kwargs
225
228
  )
226
229
  return deduplicate_success, msg + '\n\n' + deduplicate_msg
@@ -239,7 +242,7 @@ def verify(
239
242
  warn(
240
243
  f"Will resync the following failed chunks:\n "
241
244
  + '\n '.join(bounds_to_print),
242
- stack = False,
245
+ stack=False,
243
246
  )
244
247
 
245
248
  retry_bounds_success_tuples = dict(pool.map(process_chunk_bounds, chunk_bounds_to_resync))
@@ -256,11 +259,11 @@ def verify(
256
259
  )
257
260
  if deduplicate:
258
261
  deduplicate_success, deduplicate_msg = self.deduplicate(
259
- begin = begin,
260
- end = end,
261
- params = params,
262
- workers = workers,
263
- debug = debug,
262
+ begin=begin,
263
+ end=end,
264
+ params=params,
265
+ workers=workers,
266
+ debug=debug,
264
267
  **kwargs
265
268
  )
266
269
  return deduplicate_success, message + '\n\n' + deduplicate_msg
@@ -269,11 +272,11 @@ def verify(
269
272
  message = get_chunks_success_message(bounds_success_tuples, header=message_header)
270
273
  if deduplicate:
271
274
  deduplicate_success, deduplicate_msg = self.deduplicate(
272
- begin = begin,
273
- end = end,
274
- params = params,
275
- workers = workers,
276
- debug = debug,
275
+ begin=begin,
276
+ end=end,
277
+ params=params,
278
+ workers=workers,
279
+ debug=debug,
277
280
  **kwargs
278
281
  )
279
282
  return deduplicate_success, message + '\n\n' + deduplicate_msg
@@ -394,7 +397,7 @@ def get_bound_interval(self, debug: bool = False) -> Union[timedelta, int, None]
394
397
  if not dt_col:
395
398
  return bound_time_value
396
399
 
397
- dt_typ = self.dtypes.get(dt_col, 'datetime64[ns]')
400
+ dt_typ = self.dtypes.get(dt_col, 'datetime64[ns, UTC]')
398
401
  if 'int' in dt_typ.lower():
399
402
  return int(bound_time_value)
400
403
 
@@ -417,7 +420,7 @@ def get_bound_time(self, debug: bool = False) -> Union[datetime, int, None]:
417
420
  -------
418
421
  A `datetime` or `int` corresponding to the
419
422
  `begin` bound for verification and deduplication syncs.
420
- """
423
+ """
421
424
  bound_interval = self.get_bound_interval(debug=debug)
422
425
  if bound_interval is None:
423
426
  return None