meerschaum 2.6.9__py3-none-any.whl → 2.6.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/config/_version.py +1 -1
- meerschaum/core/Pipe/_dtypes.py +1 -1
- meerschaum/core/Pipe/_sync.py +14 -7
- meerschaum/utils/dataframe.py +18 -19
- meerschaum/utils/dtypes/__init__.py +11 -1
- {meerschaum-2.6.9.dist-info → meerschaum-2.6.10.dist-info}/METADATA +1 -1
- {meerschaum-2.6.9.dist-info → meerschaum-2.6.10.dist-info}/RECORD +13 -13
- {meerschaum-2.6.9.dist-info → meerschaum-2.6.10.dist-info}/LICENSE +0 -0
- {meerschaum-2.6.9.dist-info → meerschaum-2.6.10.dist-info}/NOTICE +0 -0
- {meerschaum-2.6.9.dist-info → meerschaum-2.6.10.dist-info}/WHEEL +0 -0
- {meerschaum-2.6.9.dist-info → meerschaum-2.6.10.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.6.9.dist-info → meerschaum-2.6.10.dist-info}/top_level.txt +0 -0
- {meerschaum-2.6.9.dist-info → meerschaum-2.6.10.dist-info}/zip-safe +0 -0
meerschaum/config/_version.py
CHANGED
meerschaum/core/Pipe/_dtypes.py
CHANGED
meerschaum/core/Pipe/_sync.py
CHANGED
@@ -567,6 +567,7 @@ def filter_existing(
|
|
567
567
|
safe_copy: bool = True,
|
568
568
|
date_bound_only: bool = False,
|
569
569
|
include_unchanged_columns: bool = False,
|
570
|
+
enforce_dtypes: bool = False,
|
570
571
|
chunksize: Optional[int] = -1,
|
571
572
|
debug: bool = False,
|
572
573
|
**kw
|
@@ -591,6 +592,10 @@ def filter_existing(
|
|
591
592
|
If `True`, include the backtrack columns which haven't changed in the update dataframe.
|
592
593
|
This is useful if you can't update individual keys.
|
593
594
|
|
595
|
+
enforce_dtypes: bool, default False
|
596
|
+
If `True`, ensure the given and intermediate dataframes are enforced to the correct dtypes.
|
597
|
+
Setting `enforce_dtypes=True` may impact performance.
|
598
|
+
|
594
599
|
chunksize: Optional[int], default -1
|
595
600
|
The `chunksize` used when fetching existing data.
|
596
601
|
|
@@ -618,8 +623,9 @@ def filter_existing(
|
|
618
623
|
from meerschaum.config import get_config
|
619
624
|
pd = import_pandas()
|
620
625
|
pandas = attempt_import('pandas')
|
621
|
-
|
622
|
-
|
626
|
+
if enforce_dtypes or 'dataframe' not in str(type(df)).lower():
|
627
|
+
df = self.enforce_dtypes(df, chunksize=chunksize, debug=debug)
|
628
|
+
is_dask = hasattr('df', '__module__') and 'dask' in df.__module__
|
623
629
|
if is_dask:
|
624
630
|
dd = attempt_import('dask.dataframe')
|
625
631
|
merge = dd.merge
|
@@ -759,7 +765,8 @@ def filter_existing(
|
|
759
765
|
dprint(f"No backtrack data was found for {self}.")
|
760
766
|
return df, get_empty_df(), df
|
761
767
|
|
762
|
-
|
768
|
+
if enforce_dtypes:
|
769
|
+
backtrack_df = self.enforce_dtypes(backtrack_df, chunksize=chunksize, debug=debug)
|
763
770
|
|
764
771
|
if debug:
|
765
772
|
dprint(f"Existing data for {self}:\n" + str(backtrack_df), **kw)
|
@@ -796,7 +803,8 @@ def filter_existing(
|
|
796
803
|
),
|
797
804
|
on_cols_dtypes,
|
798
805
|
)
|
799
|
-
|
806
|
+
if enforce_dtypes:
|
807
|
+
delta_df = self.enforce_dtypes(delta_df, chunksize=chunksize, debug=debug)
|
800
808
|
|
801
809
|
### Cast dicts or lists to strings so we can merge.
|
802
810
|
serializer = functools.partial(json.dumps, sort_keys=True, separators=(',', ':'), default=str)
|
@@ -820,7 +828,6 @@ def filter_existing(
|
|
820
828
|
indicator=True,
|
821
829
|
suffixes=('', '_old'),
|
822
830
|
) if on_cols else delta_df
|
823
|
-
joined_df = self.enforce_dtypes(joined_df, chunksize=chunksize, debug=debug)
|
824
831
|
for col in casted_cols:
|
825
832
|
if col in joined_df.columns:
|
826
833
|
joined_df[col] = joined_df[col].apply(deserializer)
|
@@ -837,7 +844,6 @@ def filter_existing(
|
|
837
844
|
.dropna(how='all')[cols]
|
838
845
|
.reset_index(drop=True)
|
839
846
|
) if on_cols else delta_df
|
840
|
-
unseen_df = self.enforce_dtypes(unseen_df, chunksize=chunksize, debug=debug)
|
841
847
|
|
842
848
|
### Rows that have already been inserted but values have changed.
|
843
849
|
update_df = (
|
@@ -846,7 +852,6 @@ def filter_existing(
|
|
846
852
|
.dropna(how='all')[cols]
|
847
853
|
.reset_index(drop=True)
|
848
854
|
) if on_cols else get_empty_df()
|
849
|
-
update_df = self.enforce_dtypes(update_df, chunksize=chunksize, debug=debug)
|
850
855
|
|
851
856
|
if include_unchanged_columns and on_cols:
|
852
857
|
unchanged_backtrack_cols = [
|
@@ -854,6 +859,8 @@ def filter_existing(
|
|
854
859
|
for col in backtrack_df.columns
|
855
860
|
if col in on_cols or col not in update_df.columns
|
856
861
|
]
|
862
|
+
if enforce_dtypes:
|
863
|
+
update_df = self.enforce_dtypes(update_df, chunksize=chunksize, debug=debug)
|
857
864
|
update_df = merge(
|
858
865
|
backtrack_df[unchanged_backtrack_cols],
|
859
866
|
update_df,
|
meerschaum/utils/dataframe.py
CHANGED
@@ -1283,18 +1283,8 @@ def query_df(
|
|
1283
1283
|
if debug:
|
1284
1284
|
dprint(f"`end` will be cast to '{end}'.")
|
1285
1285
|
|
1286
|
-
|
1287
|
-
|
1288
|
-
|
1289
|
-
if begin_tz is not None or end_tz is not None or df_tz is not None:
|
1290
|
-
begin = coerce_timezone(begin, strip_utc=False)
|
1291
|
-
end = coerce_timezone(end, strip_utc=False)
|
1292
|
-
if df_tz is not None:
|
1293
|
-
if debug:
|
1294
|
-
dprint(f"Casting column '{datetime_column}' to UTC...")
|
1295
|
-
df[datetime_column] = coerce_timezone(df[datetime_column], strip_utc=False)
|
1296
|
-
if debug:
|
1297
|
-
dprint(f"Using datetime bounds:\n{begin=}\n{end=}")
|
1286
|
+
begin = coerce_timezone(begin, strip_utc=(df_tz is None)) if begin is not None else None
|
1287
|
+
end = coerce_timezone(end, strip_utc=(df_tz is None)) if begin is not None else None
|
1298
1288
|
|
1299
1289
|
in_ex_params = get_in_ex_params(params)
|
1300
1290
|
|
@@ -1340,15 +1330,24 @@ def query_df(
|
|
1340
1330
|
]
|
1341
1331
|
for col in bool_cols:
|
1342
1332
|
df[col] = df[col].astype('boolean[pyarrow]')
|
1343
|
-
df['__mrsm_mask'] = query_mask.astype('boolean[pyarrow]')
|
1344
1333
|
|
1345
|
-
if
|
1346
|
-
df
|
1347
|
-
|
1348
|
-
|
1334
|
+
if not isinstance(query_mask, bool):
|
1335
|
+
df['__mrsm_mask'] = (
|
1336
|
+
query_mask.astype('boolean[pyarrow]')
|
1337
|
+
if hasattr(query_mask, 'astype')
|
1338
|
+
else query_mask
|
1339
|
+
)
|
1340
|
+
|
1341
|
+
if inplace:
|
1342
|
+
df.where(query_mask, other=NA, inplace=True)
|
1343
|
+
df.dropna(how='all', inplace=True)
|
1344
|
+
result_df = df
|
1345
|
+
else:
|
1346
|
+
result_df = df.where(query_mask, other=NA)
|
1347
|
+
result_df.dropna(how='all', inplace=True)
|
1348
|
+
|
1349
1349
|
else:
|
1350
|
-
result_df = df
|
1351
|
-
result_df.dropna(how='all', inplace=True)
|
1350
|
+
result_df = df
|
1352
1351
|
|
1353
1352
|
if '__mrsm_mask' in df.columns:
|
1354
1353
|
del df['__mrsm_mask']
|
@@ -267,8 +267,18 @@ def coerce_timezone(
|
|
267
267
|
|
268
268
|
if dt_is_series:
|
269
269
|
is_dask = 'dask' in dt.__module__
|
270
|
-
pandas = mrsm.attempt_import('pandas')
|
270
|
+
pandas = mrsm.attempt_import('pandas', lazy=False)
|
271
271
|
dd = mrsm.attempt_import('dask.dataframe') if is_dask else None
|
272
|
+
|
273
|
+
if (
|
274
|
+
pandas.api.types.is_datetime64_any_dtype(dt) and (
|
275
|
+
(dt.dt.tz is not None and not strip_utc)
|
276
|
+
or
|
277
|
+
(dt.dt.tz is None and strip_utc)
|
278
|
+
)
|
279
|
+
):
|
280
|
+
return dt
|
281
|
+
|
272
282
|
dt_series = (
|
273
283
|
pandas.to_datetime(dt, utc=True, format='ISO8601')
|
274
284
|
if dd is None
|
@@ -143,7 +143,7 @@ meerschaum/config/_preprocess.py,sha256=-AEA8m_--KivZwTQ1sWN6LTn5sio_fUr2XZ51BO6
|
|
143
143
|
meerschaum/config/_read_config.py,sha256=RLC3HHi_1ndj7ITVDKLD9_uULY3caGRwSz3ATYE-ixA,15014
|
144
144
|
meerschaum/config/_shell.py,sha256=46_m49Txc5q1rGfCgO49ca48BODx45DQJi8D0zz1R18,4245
|
145
145
|
meerschaum/config/_sync.py,sha256=jHcWRkxd82_BgX8Xo8agsWvf7BSbv3qHLWmYl6ehp_0,4242
|
146
|
-
meerschaum/config/_version.py,sha256=
|
146
|
+
meerschaum/config/_version.py,sha256=QOPk_Pjr2KnGrSNrSaTJLqzoKYSFI0Wk__-Tb-BOgSQ,72
|
147
147
|
meerschaum/config/paths.py,sha256=JjibeGN3YAdSNceRwsd42aNmeUrIgM6ndzC8qZAmNI0,621
|
148
148
|
meerschaum/config/resources/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
149
149
|
meerschaum/config/stack/__init__.py,sha256=2UukC0Lmk-aVL1o1qXzumqmuIrw3vu9fD7iCuz4XD4I,10544
|
@@ -200,12 +200,12 @@ meerschaum/core/Pipe/_data.py,sha256=HOvKlpD9NiBg599mSH1lzUQ_2g8-PsnMbGzMTOJx81E
|
|
200
200
|
meerschaum/core/Pipe/_deduplicate.py,sha256=xthUdsDxGO2t3m0XGDm9K3F6dpaZoemtjNi8gyKm0e0,10177
|
201
201
|
meerschaum/core/Pipe/_delete.py,sha256=1geNp9BgrocXP1gt76dMbnlJWKYFMuSNqPFA4K4-hXE,2118
|
202
202
|
meerschaum/core/Pipe/_drop.py,sha256=qj5L0obDh2_dKAg7LD3WLI3_L0Q5YECTfTmPc8zZUxI,1135
|
203
|
-
meerschaum/core/Pipe/_dtypes.py,sha256=
|
203
|
+
meerschaum/core/Pipe/_dtypes.py,sha256=LNgZ47W5DMtZ2hwrcQamwapjWPWgtVQ42u67g4M0n3k,4143
|
204
204
|
meerschaum/core/Pipe/_edit.py,sha256=HrKWe9vhqKaNOjOcJzW5BNbaUBPIbgNAhJEK8OMsy7c,8416
|
205
205
|
meerschaum/core/Pipe/_fetch.py,sha256=Q_LncNi1nv-YwvRPbh1QK0hf6hflL7Hn9v9lT3oQgF4,5451
|
206
206
|
meerschaum/core/Pipe/_register.py,sha256=Sd5xaAW8H7uLTIoommcKb-6kHPRuHJLWNSbPnt2UbvA,2240
|
207
207
|
meerschaum/core/Pipe/_show.py,sha256=nG50y8eBT9TVuKkRgAKtNDNIxysJvMNxfu__lkL1F9k,1352
|
208
|
-
meerschaum/core/Pipe/_sync.py,sha256=
|
208
|
+
meerschaum/core/Pipe/_sync.py,sha256=E2egt63tqhVpB0ZteMO36HTF4EYEMtdevKOtN1aLd9o,34753
|
209
209
|
meerschaum/core/Pipe/_verify.py,sha256=c3HvsZd4QPydqozaV6cDDRtwYiNz4V91b0IcnKvcimA,14158
|
210
210
|
meerschaum/core/Plugin/__init__.py,sha256=UXg64EvJPgI1PCxkY_KM02-ZmBm4FZpLPIQR_uSJJDc,137
|
211
211
|
meerschaum/core/User/_User.py,sha256=JZ9Y1tsjZe-cgD24m9YfZ6ZwSOKn_sHc4rbQ7KblBz8,6592
|
@@ -219,7 +219,7 @@ meerschaum/plugins/__init__.py,sha256=6krcqaMKyzuVqesXMqEL0XEy2SJQ4xfNt2-oI_fJ6v
|
|
219
219
|
meerschaum/plugins/bootstrap.py,sha256=VwjpZAuYdqPJW0YoVgAoM_taHkdQHqP902-8T7OWWCI,11339
|
220
220
|
meerschaum/utils/__init__.py,sha256=QrK1K9hIbPCRCM5k2nZGFqGnrqhA0Eh-iSmCU7FG6Cs,612
|
221
221
|
meerschaum/utils/_get_pipes.py,sha256=tu4xKPoDn79Dz2kWM13cXTP4DSCkn-3G9M8KiLftopw,11073
|
222
|
-
meerschaum/utils/dataframe.py,sha256=
|
222
|
+
meerschaum/utils/dataframe.py,sha256=R-TgFTV6qbJ3xPEl596Dw6m1lF8Lqz35n1PtFjCYmI0,43400
|
223
223
|
meerschaum/utils/debug.py,sha256=GyIzJmunkoPnOcZNYVQdT4Sgd-aOb5MI2VbIgATOjIQ,3695
|
224
224
|
meerschaum/utils/interactive.py,sha256=t-6jWozXSqL7lYGDHuwiOjTgr-UKhdcg61q_eR5mikI,3196
|
225
225
|
meerschaum/utils/misc.py,sha256=soGmUooT216Dl15KbcUTzf8E-aC6uNM6Zvy1PiUT_Y4,47089
|
@@ -239,7 +239,7 @@ meerschaum/utils/daemon/RotatingFile.py,sha256=ePm_svjwyFDWh6V1k-bp1RHXCSWlyxDtl
|
|
239
239
|
meerschaum/utils/daemon/StdinFile.py,sha256=J6tyUReM8NEp3bBQAxMfe8mjJG5mWi6CzHN4x86VQBI,3237
|
240
240
|
meerschaum/utils/daemon/__init__.py,sha256=o9jWb4lRTIyny4EPt7fPXFgV_vIf1mUofsTwoE1ZecA,8751
|
241
241
|
meerschaum/utils/daemon/_names.py,sha256=d2ZwTxBoTAqXZkCfZ5LuX2XrkQmLNUq1OTlUqfoH5dA,4515
|
242
|
-
meerschaum/utils/dtypes/__init__.py,sha256=
|
242
|
+
meerschaum/utils/dtypes/__init__.py,sha256=ZG1Ccy8QmTNuR0IP5NSMtgWj7Cseh_VU4wxSQDw2m94,8352
|
243
243
|
meerschaum/utils/dtypes/sql.py,sha256=IQihwQy4OKSbRjvJy6ky6SszFKR7W1iMs-ruZDsf2js,18701
|
244
244
|
meerschaum/utils/formatting/__init__.py,sha256=GpJQWeqkdWw5IuDmW4Rgmapjzv-KkI4jhBZllJi4QIg,15999
|
245
245
|
meerschaum/utils/formatting/_jobs.py,sha256=izsqPJhTtUkXUUtWnbXtReYsUYwulXtci3pBj72Ne64,6637
|
@@ -251,11 +251,11 @@ meerschaum/utils/packages/_packages.py,sha256=IFcQ4MzmTqjdWkqOsUa25xUNmG246TFqe2
|
|
251
251
|
meerschaum/utils/packages/lazy_loader.py,sha256=VHnph3VozH29R4JnSSBfwtA5WKZYZQFT_GeQSShCnuc,2540
|
252
252
|
meerschaum/utils/venv/_Venv.py,sha256=sBnlmxHdAh2bx8btfVoD79-H9-cYsv5lP02IIXkyECs,3553
|
253
253
|
meerschaum/utils/venv/__init__.py,sha256=f3oi67lXYPLKJrnRW9lae7M3A8SFiC7DzaMoBdCVUFs,24609
|
254
|
-
meerschaum-2.6.
|
255
|
-
meerschaum-2.6.
|
256
|
-
meerschaum-2.6.
|
257
|
-
meerschaum-2.6.
|
258
|
-
meerschaum-2.6.
|
259
|
-
meerschaum-2.6.
|
260
|
-
meerschaum-2.6.
|
261
|
-
meerschaum-2.6.
|
254
|
+
meerschaum-2.6.10.dist-info/LICENSE,sha256=jG2zQEdRNt88EgHUWPpXVWmOrOduUQRx7MnYV9YIPaw,11359
|
255
|
+
meerschaum-2.6.10.dist-info/METADATA,sha256=pJcfXtifDFIe38Cu39zlZOlgp0wZ1-IDGQPe7HE0cPU,24758
|
256
|
+
meerschaum-2.6.10.dist-info/NOTICE,sha256=OTA9Fcthjf5BRvWDDIcBC_xfLpeDV-RPZh3M-HQBRtQ,114
|
257
|
+
meerschaum-2.6.10.dist-info/WHEEL,sha256=a7TGlA-5DaHMRrarXjVbQagU3Man_dCnGIWMJr5kRWo,91
|
258
|
+
meerschaum-2.6.10.dist-info/entry_points.txt,sha256=5YBVzibw-0rNA_1VjB16z5GABsOGf-CDhW4yqH8C7Gc,88
|
259
|
+
meerschaum-2.6.10.dist-info/top_level.txt,sha256=bNoSiDj0El6buocix-FRoAtJOeq1qOF5rRm2u9i7Q6A,11
|
260
|
+
meerschaum-2.6.10.dist-info/zip-safe,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
261
|
+
meerschaum-2.6.10.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|