meerschaum 2.6.9__py3-none-any.whl → 2.6.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,4 +2,4 @@
2
2
  Specify the Meerschaum release version.
3
3
  """
4
4
 
5
- __version__ = "2.6.9"
5
+ __version__ = "2.6.10"
@@ -56,7 +56,7 @@ def enforce_dtypes(
56
56
  chunksize=chunksize,
57
57
  debug=debug,
58
58
  )
59
- else:
59
+ elif isinstance(df, (dict, list)):
60
60
  df = parse_df_datetimes(
61
61
  df,
62
62
  ignore_cols=[
@@ -567,6 +567,7 @@ def filter_existing(
567
567
  safe_copy: bool = True,
568
568
  date_bound_only: bool = False,
569
569
  include_unchanged_columns: bool = False,
570
+ enforce_dtypes: bool = False,
570
571
  chunksize: Optional[int] = -1,
571
572
  debug: bool = False,
572
573
  **kw
@@ -591,6 +592,10 @@ def filter_existing(
591
592
  If `True`, include the backtrack columns which haven't changed in the update dataframe.
592
593
  This is useful if you can't update individual keys.
593
594
 
595
+ enforce_dtypes: bool, default False
596
+ If `True`, ensure the given and intermediate dataframes are enforced to the correct dtypes.
597
+ Setting `enforce_dtypes=True` may impact performance.
598
+
594
599
  chunksize: Optional[int], default -1
595
600
  The `chunksize` used when fetching existing data.
596
601
 
@@ -618,8 +623,9 @@ def filter_existing(
618
623
  from meerschaum.config import get_config
619
624
  pd = import_pandas()
620
625
  pandas = attempt_import('pandas')
621
- df = self.enforce_dtypes(df, chunksize=chunksize, debug=debug)
622
- is_dask = 'dask' in df.__module__
626
+ if enforce_dtypes or 'dataframe' not in str(type(df)).lower():
627
+ df = self.enforce_dtypes(df, chunksize=chunksize, debug=debug)
628
+ is_dask = hasattr('df', '__module__') and 'dask' in df.__module__
623
629
  if is_dask:
624
630
  dd = attempt_import('dask.dataframe')
625
631
  merge = dd.merge
@@ -759,7 +765,8 @@ def filter_existing(
759
765
  dprint(f"No backtrack data was found for {self}.")
760
766
  return df, get_empty_df(), df
761
767
 
762
- backtrack_df = self.enforce_dtypes(backtrack_df, chunksize=chunksize, debug=debug)
768
+ if enforce_dtypes:
769
+ backtrack_df = self.enforce_dtypes(backtrack_df, chunksize=chunksize, debug=debug)
763
770
 
764
771
  if debug:
765
772
  dprint(f"Existing data for {self}:\n" + str(backtrack_df), **kw)
@@ -796,7 +803,8 @@ def filter_existing(
796
803
  ),
797
804
  on_cols_dtypes,
798
805
  )
799
- delta_df = self.enforce_dtypes(delta_df, chunksize=chunksize, debug=debug)
806
+ if enforce_dtypes:
807
+ delta_df = self.enforce_dtypes(delta_df, chunksize=chunksize, debug=debug)
800
808
 
801
809
  ### Cast dicts or lists to strings so we can merge.
802
810
  serializer = functools.partial(json.dumps, sort_keys=True, separators=(',', ':'), default=str)
@@ -820,7 +828,6 @@ def filter_existing(
820
828
  indicator=True,
821
829
  suffixes=('', '_old'),
822
830
  ) if on_cols else delta_df
823
- joined_df = self.enforce_dtypes(joined_df, chunksize=chunksize, debug=debug)
824
831
  for col in casted_cols:
825
832
  if col in joined_df.columns:
826
833
  joined_df[col] = joined_df[col].apply(deserializer)
@@ -837,7 +844,6 @@ def filter_existing(
837
844
  .dropna(how='all')[cols]
838
845
  .reset_index(drop=True)
839
846
  ) if on_cols else delta_df
840
- unseen_df = self.enforce_dtypes(unseen_df, chunksize=chunksize, debug=debug)
841
847
 
842
848
  ### Rows that have already been inserted but values have changed.
843
849
  update_df = (
@@ -846,7 +852,6 @@ def filter_existing(
846
852
  .dropna(how='all')[cols]
847
853
  .reset_index(drop=True)
848
854
  ) if on_cols else get_empty_df()
849
- update_df = self.enforce_dtypes(update_df, chunksize=chunksize, debug=debug)
850
855
 
851
856
  if include_unchanged_columns and on_cols:
852
857
  unchanged_backtrack_cols = [
@@ -854,6 +859,8 @@ def filter_existing(
854
859
  for col in backtrack_df.columns
855
860
  if col in on_cols or col not in update_df.columns
856
861
  ]
862
+ if enforce_dtypes:
863
+ update_df = self.enforce_dtypes(update_df, chunksize=chunksize, debug=debug)
857
864
  update_df = merge(
858
865
  backtrack_df[unchanged_backtrack_cols],
859
866
  update_df,
@@ -1283,18 +1283,8 @@ def query_df(
1283
1283
  if debug:
1284
1284
  dprint(f"`end` will be cast to '{end}'.")
1285
1285
 
1286
- begin_tz = begin.tzinfo if begin is not None else None
1287
- end_tz = end.tzinfo if end is not None else None
1288
-
1289
- if begin_tz is not None or end_tz is not None or df_tz is not None:
1290
- begin = coerce_timezone(begin, strip_utc=False)
1291
- end = coerce_timezone(end, strip_utc=False)
1292
- if df_tz is not None:
1293
- if debug:
1294
- dprint(f"Casting column '{datetime_column}' to UTC...")
1295
- df[datetime_column] = coerce_timezone(df[datetime_column], strip_utc=False)
1296
- if debug:
1297
- dprint(f"Using datetime bounds:\n{begin=}\n{end=}")
1286
+ begin = coerce_timezone(begin, strip_utc=(df_tz is None)) if begin is not None else None
1287
+ end = coerce_timezone(end, strip_utc=(df_tz is None)) if begin is not None else None
1298
1288
 
1299
1289
  in_ex_params = get_in_ex_params(params)
1300
1290
 
@@ -1340,15 +1330,24 @@ def query_df(
1340
1330
  ]
1341
1331
  for col in bool_cols:
1342
1332
  df[col] = df[col].astype('boolean[pyarrow]')
1343
- df['__mrsm_mask'] = query_mask.astype('boolean[pyarrow]')
1344
1333
 
1345
- if inplace:
1346
- df.where(query_mask, other=NA, inplace=True)
1347
- df.dropna(how='all', inplace=True)
1348
- result_df = df
1334
+ if not isinstance(query_mask, bool):
1335
+ df['__mrsm_mask'] = (
1336
+ query_mask.astype('boolean[pyarrow]')
1337
+ if hasattr(query_mask, 'astype')
1338
+ else query_mask
1339
+ )
1340
+
1341
+ if inplace:
1342
+ df.where(query_mask, other=NA, inplace=True)
1343
+ df.dropna(how='all', inplace=True)
1344
+ result_df = df
1345
+ else:
1346
+ result_df = df.where(query_mask, other=NA)
1347
+ result_df.dropna(how='all', inplace=True)
1348
+
1349
1349
  else:
1350
- result_df = df.where(query_mask, other=NA)
1351
- result_df.dropna(how='all', inplace=True)
1350
+ result_df = df
1352
1351
 
1353
1352
  if '__mrsm_mask' in df.columns:
1354
1353
  del df['__mrsm_mask']
@@ -267,8 +267,18 @@ def coerce_timezone(
267
267
 
268
268
  if dt_is_series:
269
269
  is_dask = 'dask' in dt.__module__
270
- pandas = mrsm.attempt_import('pandas')
270
+ pandas = mrsm.attempt_import('pandas', lazy=False)
271
271
  dd = mrsm.attempt_import('dask.dataframe') if is_dask else None
272
+
273
+ if (
274
+ pandas.api.types.is_datetime64_any_dtype(dt) and (
275
+ (dt.dt.tz is not None and not strip_utc)
276
+ or
277
+ (dt.dt.tz is None and strip_utc)
278
+ )
279
+ ):
280
+ return dt
281
+
272
282
  dt_series = (
273
283
  pandas.to_datetime(dt, utc=True, format='ISO8601')
274
284
  if dd is None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: meerschaum
3
- Version: 2.6.9
3
+ Version: 2.6.10
4
4
  Summary: Sync Time-Series Pipes with Meerschaum
5
5
  Home-page: https://meerschaum.io
6
6
  Author: Bennett Meares
@@ -143,7 +143,7 @@ meerschaum/config/_preprocess.py,sha256=-AEA8m_--KivZwTQ1sWN6LTn5sio_fUr2XZ51BO6
143
143
  meerschaum/config/_read_config.py,sha256=RLC3HHi_1ndj7ITVDKLD9_uULY3caGRwSz3ATYE-ixA,15014
144
144
  meerschaum/config/_shell.py,sha256=46_m49Txc5q1rGfCgO49ca48BODx45DQJi8D0zz1R18,4245
145
145
  meerschaum/config/_sync.py,sha256=jHcWRkxd82_BgX8Xo8agsWvf7BSbv3qHLWmYl6ehp_0,4242
146
- meerschaum/config/_version.py,sha256=TPP8TjaIitWMVdjkOvsGLJlk5ixmTGvF8uEiF1nbPtA,71
146
+ meerschaum/config/_version.py,sha256=QOPk_Pjr2KnGrSNrSaTJLqzoKYSFI0Wk__-Tb-BOgSQ,72
147
147
  meerschaum/config/paths.py,sha256=JjibeGN3YAdSNceRwsd42aNmeUrIgM6ndzC8qZAmNI0,621
148
148
  meerschaum/config/resources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
149
149
  meerschaum/config/stack/__init__.py,sha256=2UukC0Lmk-aVL1o1qXzumqmuIrw3vu9fD7iCuz4XD4I,10544
@@ -200,12 +200,12 @@ meerschaum/core/Pipe/_data.py,sha256=HOvKlpD9NiBg599mSH1lzUQ_2g8-PsnMbGzMTOJx81E
200
200
  meerschaum/core/Pipe/_deduplicate.py,sha256=xthUdsDxGO2t3m0XGDm9K3F6dpaZoemtjNi8gyKm0e0,10177
201
201
  meerschaum/core/Pipe/_delete.py,sha256=1geNp9BgrocXP1gt76dMbnlJWKYFMuSNqPFA4K4-hXE,2118
202
202
  meerschaum/core/Pipe/_drop.py,sha256=qj5L0obDh2_dKAg7LD3WLI3_L0Q5YECTfTmPc8zZUxI,1135
203
- meerschaum/core/Pipe/_dtypes.py,sha256=VohahiZk44Uw3PNsTLyqH8m9wRdB-mkPgYvgCb5hlA0,4114
203
+ meerschaum/core/Pipe/_dtypes.py,sha256=LNgZ47W5DMtZ2hwrcQamwapjWPWgtVQ42u67g4M0n3k,4143
204
204
  meerschaum/core/Pipe/_edit.py,sha256=HrKWe9vhqKaNOjOcJzW5BNbaUBPIbgNAhJEK8OMsy7c,8416
205
205
  meerschaum/core/Pipe/_fetch.py,sha256=Q_LncNi1nv-YwvRPbh1QK0hf6hflL7Hn9v9lT3oQgF4,5451
206
206
  meerschaum/core/Pipe/_register.py,sha256=Sd5xaAW8H7uLTIoommcKb-6kHPRuHJLWNSbPnt2UbvA,2240
207
207
  meerschaum/core/Pipe/_show.py,sha256=nG50y8eBT9TVuKkRgAKtNDNIxysJvMNxfu__lkL1F9k,1352
208
- meerschaum/core/Pipe/_sync.py,sha256=z24Y8nKiqwSQTZtuOK-UM1Jl-2IS4Uev_Y9_i7vORXs,34486
208
+ meerschaum/core/Pipe/_sync.py,sha256=E2egt63tqhVpB0ZteMO36HTF4EYEMtdevKOtN1aLd9o,34753
209
209
  meerschaum/core/Pipe/_verify.py,sha256=c3HvsZd4QPydqozaV6cDDRtwYiNz4V91b0IcnKvcimA,14158
210
210
  meerschaum/core/Plugin/__init__.py,sha256=UXg64EvJPgI1PCxkY_KM02-ZmBm4FZpLPIQR_uSJJDc,137
211
211
  meerschaum/core/User/_User.py,sha256=JZ9Y1tsjZe-cgD24m9YfZ6ZwSOKn_sHc4rbQ7KblBz8,6592
@@ -219,7 +219,7 @@ meerschaum/plugins/__init__.py,sha256=6krcqaMKyzuVqesXMqEL0XEy2SJQ4xfNt2-oI_fJ6v
219
219
  meerschaum/plugins/bootstrap.py,sha256=VwjpZAuYdqPJW0YoVgAoM_taHkdQHqP902-8T7OWWCI,11339
220
220
  meerschaum/utils/__init__.py,sha256=QrK1K9hIbPCRCM5k2nZGFqGnrqhA0Eh-iSmCU7FG6Cs,612
221
221
  meerschaum/utils/_get_pipes.py,sha256=tu4xKPoDn79Dz2kWM13cXTP4DSCkn-3G9M8KiLftopw,11073
222
- meerschaum/utils/dataframe.py,sha256=RByfxVec8qv8-zSffTiLavGjG4FII0gehPFX0wcLhmM,43672
222
+ meerschaum/utils/dataframe.py,sha256=R-TgFTV6qbJ3xPEl596Dw6m1lF8Lqz35n1PtFjCYmI0,43400
223
223
  meerschaum/utils/debug.py,sha256=GyIzJmunkoPnOcZNYVQdT4Sgd-aOb5MI2VbIgATOjIQ,3695
224
224
  meerschaum/utils/interactive.py,sha256=t-6jWozXSqL7lYGDHuwiOjTgr-UKhdcg61q_eR5mikI,3196
225
225
  meerschaum/utils/misc.py,sha256=soGmUooT216Dl15KbcUTzf8E-aC6uNM6Zvy1PiUT_Y4,47089
@@ -239,7 +239,7 @@ meerschaum/utils/daemon/RotatingFile.py,sha256=ePm_svjwyFDWh6V1k-bp1RHXCSWlyxDtl
239
239
  meerschaum/utils/daemon/StdinFile.py,sha256=J6tyUReM8NEp3bBQAxMfe8mjJG5mWi6CzHN4x86VQBI,3237
240
240
  meerschaum/utils/daemon/__init__.py,sha256=o9jWb4lRTIyny4EPt7fPXFgV_vIf1mUofsTwoE1ZecA,8751
241
241
  meerschaum/utils/daemon/_names.py,sha256=d2ZwTxBoTAqXZkCfZ5LuX2XrkQmLNUq1OTlUqfoH5dA,4515
242
- meerschaum/utils/dtypes/__init__.py,sha256=mswDCXBK8SKeP2wcCjdRPAEmdx4QvSMjBmwTmt3gtSw,8090
242
+ meerschaum/utils/dtypes/__init__.py,sha256=ZG1Ccy8QmTNuR0IP5NSMtgWj7Cseh_VU4wxSQDw2m94,8352
243
243
  meerschaum/utils/dtypes/sql.py,sha256=IQihwQy4OKSbRjvJy6ky6SszFKR7W1iMs-ruZDsf2js,18701
244
244
  meerschaum/utils/formatting/__init__.py,sha256=GpJQWeqkdWw5IuDmW4Rgmapjzv-KkI4jhBZllJi4QIg,15999
245
245
  meerschaum/utils/formatting/_jobs.py,sha256=izsqPJhTtUkXUUtWnbXtReYsUYwulXtci3pBj72Ne64,6637
@@ -251,11 +251,11 @@ meerschaum/utils/packages/_packages.py,sha256=IFcQ4MzmTqjdWkqOsUa25xUNmG246TFqe2
251
251
  meerschaum/utils/packages/lazy_loader.py,sha256=VHnph3VozH29R4JnSSBfwtA5WKZYZQFT_GeQSShCnuc,2540
252
252
  meerschaum/utils/venv/_Venv.py,sha256=sBnlmxHdAh2bx8btfVoD79-H9-cYsv5lP02IIXkyECs,3553
253
253
  meerschaum/utils/venv/__init__.py,sha256=f3oi67lXYPLKJrnRW9lae7M3A8SFiC7DzaMoBdCVUFs,24609
254
- meerschaum-2.6.9.dist-info/LICENSE,sha256=jG2zQEdRNt88EgHUWPpXVWmOrOduUQRx7MnYV9YIPaw,11359
255
- meerschaum-2.6.9.dist-info/METADATA,sha256=MGzHL6_uLOLftJfakBs5_osOwEqgGkCNWTkMVAHWr-A,24757
256
- meerschaum-2.6.9.dist-info/NOTICE,sha256=OTA9Fcthjf5BRvWDDIcBC_xfLpeDV-RPZh3M-HQBRtQ,114
257
- meerschaum-2.6.9.dist-info/WHEEL,sha256=a7TGlA-5DaHMRrarXjVbQagU3Man_dCnGIWMJr5kRWo,91
258
- meerschaum-2.6.9.dist-info/entry_points.txt,sha256=5YBVzibw-0rNA_1VjB16z5GABsOGf-CDhW4yqH8C7Gc,88
259
- meerschaum-2.6.9.dist-info/top_level.txt,sha256=bNoSiDj0El6buocix-FRoAtJOeq1qOF5rRm2u9i7Q6A,11
260
- meerschaum-2.6.9.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
261
- meerschaum-2.6.9.dist-info/RECORD,,
254
+ meerschaum-2.6.10.dist-info/LICENSE,sha256=jG2zQEdRNt88EgHUWPpXVWmOrOduUQRx7MnYV9YIPaw,11359
255
+ meerschaum-2.6.10.dist-info/METADATA,sha256=pJcfXtifDFIe38Cu39zlZOlgp0wZ1-IDGQPe7HE0cPU,24758
256
+ meerschaum-2.6.10.dist-info/NOTICE,sha256=OTA9Fcthjf5BRvWDDIcBC_xfLpeDV-RPZh3M-HQBRtQ,114
257
+ meerschaum-2.6.10.dist-info/WHEEL,sha256=a7TGlA-5DaHMRrarXjVbQagU3Man_dCnGIWMJr5kRWo,91
258
+ meerschaum-2.6.10.dist-info/entry_points.txt,sha256=5YBVzibw-0rNA_1VjB16z5GABsOGf-CDhW4yqH8C7Gc,88
259
+ meerschaum-2.6.10.dist-info/top_level.txt,sha256=bNoSiDj0El6buocix-FRoAtJOeq1qOF5rRm2u9i7Q6A,11
260
+ meerschaum-2.6.10.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
261
+ meerschaum-2.6.10.dist-info/RECORD,,