meerschaum 2.6.16__py3-none-any.whl → 2.7.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/_internal/arguments/_parse_arguments.py +1 -1
- meerschaum/actions/delete.py +65 -69
- meerschaum/actions/edit.py +22 -2
- meerschaum/actions/install.py +1 -2
- meerschaum/actions/sync.py +2 -3
- meerschaum/config/_default.py +1 -1
- meerschaum/config/_paths.py +2 -1
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/api/_pipes.py +4 -3
- meerschaum/connectors/sql/_create_engine.py +3 -3
- meerschaum/connectors/sql/_pipes.py +84 -38
- meerschaum/connectors/sql/_sql.py +6 -1
- meerschaum/connectors/valkey/_pipes.py +12 -1
- meerschaum/core/Pipe/__init__.py +23 -13
- meerschaum/core/Pipe/_attributes.py +19 -0
- meerschaum/core/Pipe/_dtypes.py +1 -1
- meerschaum/core/Pipe/_sync.py +61 -21
- meerschaum/core/Pipe/_verify.py +8 -7
- meerschaum/jobs/_Job.py +2 -1
- meerschaum/plugins/_Plugin.py +11 -14
- meerschaum/utils/daemon/Daemon.py +20 -13
- meerschaum/utils/dataframe.py +175 -13
- meerschaum/utils/dtypes/__init__.py +103 -14
- meerschaum/utils/dtypes/sql.py +26 -0
- meerschaum/utils/misc.py +8 -8
- meerschaum/utils/packages/_packages.py +1 -1
- meerschaum/utils/schedule.py +8 -3
- meerschaum/utils/sql.py +70 -47
- meerschaum/utils/venv/_Venv.py +4 -4
- meerschaum/utils/venv/__init__.py +33 -13
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/METADATA +2 -2
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/RECORD +38 -38
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/LICENSE +0 -0
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/NOTICE +0 -0
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/WHEEL +0 -0
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/top_level.txt +0 -0
- {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/zip-safe +0 -0
@@ -460,10 +460,16 @@ def get_create_index_queries(
|
|
460
460
|
else None
|
461
461
|
)
|
462
462
|
primary_key_constraint_name = (
|
463
|
-
sql_item_name(f'
|
463
|
+
sql_item_name(f'PK_{pipe.target}', self.flavor, None)
|
464
464
|
if primary_key is not None
|
465
465
|
else None
|
466
466
|
)
|
467
|
+
primary_key_clustered = "CLUSTERED" if _datetime is None else "NONCLUSTERED"
|
468
|
+
datetime_clustered = (
|
469
|
+
"CLUSTERED"
|
470
|
+
if not existing_primary_keys and _datetime is not None
|
471
|
+
else "NONCLUSTERED"
|
472
|
+
)
|
467
473
|
|
468
474
|
_id_index_name = (
|
469
475
|
sql_item_name(index_names['id'], self.flavor, None)
|
@@ -474,6 +480,7 @@ def get_create_index_queries(
|
|
474
480
|
_create_space_partition = get_config('system', 'experimental', 'space')
|
475
481
|
|
476
482
|
### create datetime index
|
483
|
+
dt_query = None
|
477
484
|
if _datetime is not None:
|
478
485
|
if self.flavor == 'timescaledb' and pipe.parameters.get('hypertable', True):
|
479
486
|
_id_count = (
|
@@ -504,19 +511,19 @@ def get_create_index_queries(
|
|
504
511
|
+ 'if_not_exists => true, '
|
505
512
|
+ "migrate_data => true);"
|
506
513
|
)
|
507
|
-
elif
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
)
|
514
|
+
elif _datetime_index_name:
|
515
|
+
if self.flavor == 'mssql':
|
516
|
+
dt_query = (
|
517
|
+
f"CREATE {datetime_clustered} INDEX {_datetime_index_name} "
|
518
|
+
f"ON {_pipe_name} ({_datetime_name})"
|
519
|
+
)
|
520
|
+
else:
|
521
|
+
dt_query = (
|
522
|
+
f"CREATE INDEX {_datetime_index_name} "
|
523
|
+
+ f"ON {_pipe_name} ({_datetime_name})"
|
524
|
+
)
|
519
525
|
|
526
|
+
if dt_query:
|
520
527
|
index_queries[_datetime] = [dt_query]
|
521
528
|
|
522
529
|
primary_queries = []
|
@@ -623,7 +630,7 @@ def get_create_index_queries(
|
|
623
630
|
),
|
624
631
|
(
|
625
632
|
f"ALTER TABLE {_pipe_name}\n"
|
626
|
-
f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
|
633
|
+
f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY {primary_key_clustered} ({primary_key_name})"
|
627
634
|
),
|
628
635
|
])
|
629
636
|
index_queries[primary_key] = primary_queries
|
@@ -875,6 +882,7 @@ def get_pipe_data(
|
|
875
882
|
from meerschaum.utils.dtypes import (
|
876
883
|
attempt_cast_to_numeric,
|
877
884
|
attempt_cast_to_uuid,
|
885
|
+
attempt_cast_to_bytes,
|
878
886
|
are_dtypes_equal,
|
879
887
|
)
|
880
888
|
from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
|
@@ -891,17 +899,15 @@ def get_pipe_data(
|
|
891
899
|
col: get_pd_type_from_db_type(typ)
|
892
900
|
for col, typ in cols_types.items()
|
893
901
|
}
|
894
|
-
}
|
902
|
+
} if pipe.enforce else {}
|
895
903
|
if dtypes:
|
896
904
|
if self.flavor == 'sqlite':
|
897
905
|
if not pipe.columns.get('datetime', None):
|
898
906
|
_dt = pipe.guess_datetime()
|
899
907
|
dt = sql_item_name(_dt, self.flavor, None) if _dt else None
|
900
|
-
is_guess = True
|
901
908
|
else:
|
902
909
|
_dt = pipe.get_columns('datetime')
|
903
910
|
dt = sql_item_name(_dt, self.flavor, None)
|
904
|
-
is_guess = False
|
905
911
|
|
906
912
|
if _dt:
|
907
913
|
dt_type = dtypes.get(_dt, 'object').lower()
|
@@ -929,7 +935,7 @@ def get_pipe_data(
|
|
929
935
|
col: to_pandas_dtype(typ)
|
930
936
|
for col, typ in dtypes.items()
|
931
937
|
if col in select_columns and col not in (omit_columns or [])
|
932
|
-
}
|
938
|
+
} if pipe.enforce else {}
|
933
939
|
query = self.get_pipe_data_query(
|
934
940
|
pipe,
|
935
941
|
select_columns=select_columns,
|
@@ -959,6 +965,11 @@ def get_pipe_data(
|
|
959
965
|
for col, typ in pipe.dtypes.items()
|
960
966
|
if typ == 'uuid' and col in dtypes
|
961
967
|
]
|
968
|
+
bytes_columns = [
|
969
|
+
col
|
970
|
+
for col, typ in pipe.dtypes.items()
|
971
|
+
if typ == 'bytes' and col in dtypes
|
972
|
+
]
|
962
973
|
|
963
974
|
kw['coerce_float'] = kw.get('coerce_float', (len(numeric_columns) == 0))
|
964
975
|
|
@@ -978,6 +989,11 @@ def get_pipe_data(
|
|
978
989
|
continue
|
979
990
|
df[col] = df[col].apply(attempt_cast_to_uuid)
|
980
991
|
|
992
|
+
for col in bytes_columns:
|
993
|
+
if col not in df.columns:
|
994
|
+
continue
|
995
|
+
df[col] = df[col].apply(attempt_cast_to_bytes)
|
996
|
+
|
981
997
|
if self.flavor == 'sqlite':
|
982
998
|
ignore_dt_cols = [
|
983
999
|
col
|
@@ -1339,7 +1355,13 @@ def create_pipe_table_from_df(
|
|
1339
1355
|
"""
|
1340
1356
|
Create a pipe's table from its configured dtypes and an incoming dataframe.
|
1341
1357
|
"""
|
1342
|
-
from meerschaum.utils.dataframe import
|
1358
|
+
from meerschaum.utils.dataframe import (
|
1359
|
+
get_json_cols,
|
1360
|
+
get_numeric_cols,
|
1361
|
+
get_uuid_cols,
|
1362
|
+
get_datetime_cols,
|
1363
|
+
get_bytes_cols,
|
1364
|
+
)
|
1343
1365
|
from meerschaum.utils.sql import get_create_table_queries, sql_item_name
|
1344
1366
|
primary_key = pipe.columns.get('primary', None)
|
1345
1367
|
dt_col = pipe.columns.get('datetime', None)
|
@@ -1365,6 +1387,18 @@ def create_pipe_table_from_df(
|
|
1365
1387
|
col: 'numeric'
|
1366
1388
|
for col in get_numeric_cols(df)
|
1367
1389
|
},
|
1390
|
+
**{
|
1391
|
+
col: 'bytes'
|
1392
|
+
for col in get_bytes_cols(df)
|
1393
|
+
},
|
1394
|
+
**{
|
1395
|
+
col: 'datetime64[ns, UTC]'
|
1396
|
+
for col in get_datetime_cols(df, timezone_aware=True, timezone_naive=False)
|
1397
|
+
},
|
1398
|
+
**{
|
1399
|
+
col: 'datetime64[ns]'
|
1400
|
+
for col in get_datetime_cols(df, timezone_aware=False, timezone_naive=True)
|
1401
|
+
},
|
1368
1402
|
**pipe.dtypes
|
1369
1403
|
}
|
1370
1404
|
autoincrement = (
|
@@ -1455,11 +1489,9 @@ def sync_pipe(
|
|
1455
1489
|
get_update_queries,
|
1456
1490
|
sql_item_name,
|
1457
1491
|
update_queries,
|
1458
|
-
get_create_table_queries,
|
1459
1492
|
get_reset_autoincrement_queries,
|
1460
1493
|
)
|
1461
1494
|
from meerschaum.utils.misc import generate_password
|
1462
|
-
from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
|
1463
1495
|
from meerschaum.utils.dtypes import are_dtypes_equal
|
1464
1496
|
from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
|
1465
1497
|
from meerschaum import Pipe
|
@@ -1572,6 +1604,7 @@ def sync_pipe(
|
|
1572
1604
|
'schema': self.get_pipe_schema(pipe),
|
1573
1605
|
})
|
1574
1606
|
|
1607
|
+
dt_col = pipe.columns.get('datetime', None)
|
1575
1608
|
primary_key = pipe.columns.get('primary', None)
|
1576
1609
|
autoincrement = (
|
1577
1610
|
pipe.parameters.get('autoincrement', False)
|
@@ -1589,17 +1622,23 @@ def sync_pipe(
|
|
1589
1622
|
if not edit_success:
|
1590
1623
|
return edit_success, edit_msg
|
1591
1624
|
|
1592
|
-
|
1625
|
+
def _check_pk(_df_to_clear):
|
1626
|
+
if _df_to_clear is None:
|
1627
|
+
return
|
1628
|
+
if primary_key not in _df_to_clear.columns:
|
1629
|
+
return
|
1630
|
+
if not _df_to_clear[primary_key].notnull().any():
|
1631
|
+
del _df_to_clear[primary_key]
|
1632
|
+
|
1633
|
+
autoincrement_needs_reset = bool(
|
1634
|
+
autoincrement
|
1635
|
+
and primary_key
|
1636
|
+
and primary_key in unseen_df.columns
|
1637
|
+
and unseen_df[primary_key].notnull().any()
|
1638
|
+
)
|
1593
1639
|
if autoincrement and primary_key:
|
1594
|
-
|
1595
|
-
|
1596
|
-
del unseen_df[primary_key]
|
1597
|
-
if update_df is not None and primary_key in update_df.columns:
|
1598
|
-
del update_df[primary_key]
|
1599
|
-
if delta_df is not None and primary_key in delta_df.columns:
|
1600
|
-
del delta_df[primary_key]
|
1601
|
-
elif unseen_df[primary_key].notnull().any():
|
1602
|
-
autoincrement_needs_reset = True
|
1640
|
+
for _df_to_clear in (unseen_df, update_df, delta_df):
|
1641
|
+
_check_pk(_df_to_clear)
|
1603
1642
|
|
1604
1643
|
if is_new:
|
1605
1644
|
create_success, create_msg = self.create_pipe_table_from_df(
|
@@ -1612,6 +1651,7 @@ def sync_pipe(
|
|
1612
1651
|
|
1613
1652
|
do_identity_insert = bool(
|
1614
1653
|
self.flavor in ('mssql',)
|
1654
|
+
and primary_key
|
1615
1655
|
and primary_key in unseen_df.columns
|
1616
1656
|
and autoincrement
|
1617
1657
|
)
|
@@ -1707,7 +1747,11 @@ def sync_pipe(
|
|
1707
1747
|
col
|
1708
1748
|
for col_key, col in pipe.columns.items()
|
1709
1749
|
if col and col in existing_cols
|
1710
|
-
]
|
1750
|
+
] if not primary_key else (
|
1751
|
+
[dt_col, primary_key]
|
1752
|
+
if self.flavor == 'timescaledb' and dt_col and dt_col in update_df.columns
|
1753
|
+
else [primary_key]
|
1754
|
+
)
|
1711
1755
|
update_queries = get_update_queries(
|
1712
1756
|
pipe.target,
|
1713
1757
|
temp_target,
|
@@ -1716,7 +1760,8 @@ def sync_pipe(
|
|
1716
1760
|
upsert=upsert,
|
1717
1761
|
schema=self.get_pipe_schema(pipe),
|
1718
1762
|
patch_schema=self.internal_schema,
|
1719
|
-
datetime_col=
|
1763
|
+
datetime_col=(dt_col if dt_col in update_df.columns else None),
|
1764
|
+
identity_insert=(autoincrement and primary_key in update_df.columns),
|
1720
1765
|
debug=debug,
|
1721
1766
|
)
|
1722
1767
|
update_success = all(
|
@@ -1834,7 +1879,6 @@ def sync_pipe_inplace(
|
|
1834
1879
|
session_execute,
|
1835
1880
|
update_queries,
|
1836
1881
|
)
|
1837
|
-
from meerschaum.utils.dtypes import are_dtypes_equal
|
1838
1882
|
from meerschaum.utils.dtypes.sql import (
|
1839
1883
|
get_pd_type_from_db_type,
|
1840
1884
|
)
|
@@ -2054,6 +2098,7 @@ def sync_pipe_inplace(
|
|
2054
2098
|
) if not (upsert or static) else new_cols_types
|
2055
2099
|
|
2056
2100
|
common_cols = [col for col in new_cols if col in backtrack_cols_types]
|
2101
|
+
primary_key = pipe.columns.get('primary', None)
|
2057
2102
|
on_cols = {
|
2058
2103
|
col: new_cols.get(col)
|
2059
2104
|
for col_key, col in pipe.columns.items()
|
@@ -2064,7 +2109,7 @@ def sync_pipe_inplace(
|
|
2064
2109
|
and col in backtrack_cols_types
|
2065
2110
|
and col in new_cols
|
2066
2111
|
)
|
2067
|
-
}
|
2112
|
+
} if not primary_key else {primary_key: new_cols.get(primary_key)}
|
2068
2113
|
|
2069
2114
|
null_replace_new_cols_str = (
|
2070
2115
|
', '.join([
|
@@ -2591,7 +2636,7 @@ def get_pipe_rowcount(
|
|
2591
2636
|
result = self.value(query, debug=debug, silent=True)
|
2592
2637
|
try:
|
2593
2638
|
return int(result)
|
2594
|
-
except Exception
|
2639
|
+
except Exception:
|
2595
2640
|
return None
|
2596
2641
|
|
2597
2642
|
|
@@ -2616,10 +2661,11 @@ def drop_pipe(
|
|
2616
2661
|
from meerschaum.utils.sql import table_exists, sql_item_name, DROP_IF_EXISTS_FLAVORS
|
2617
2662
|
success = True
|
2618
2663
|
target = pipe.target
|
2664
|
+
schema = self.get_pipe_schema(pipe)
|
2619
2665
|
target_name = (
|
2620
|
-
sql_item_name(target, self.flavor,
|
2666
|
+
sql_item_name(target, self.flavor, schema)
|
2621
2667
|
)
|
2622
|
-
if table_exists(target, self, debug=debug):
|
2668
|
+
if table_exists(target, self, schema=schema, debug=debug):
|
2623
2669
|
if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
|
2624
2670
|
success = self.exec(
|
2625
2671
|
f"DROP TABLE {if_exists_str} {target_name}", silent=True, debug=debug
|
@@ -790,7 +790,12 @@ def to_sql(
|
|
790
790
|
truncate_item_name,
|
791
791
|
DROP_IF_EXISTS_FLAVORS,
|
792
792
|
)
|
793
|
-
from meerschaum.utils.dataframe import
|
793
|
+
from meerschaum.utils.dataframe import (
|
794
|
+
get_json_cols,
|
795
|
+
get_numeric_cols,
|
796
|
+
get_uuid_cols,
|
797
|
+
get_bytes_cols,
|
798
|
+
)
|
794
799
|
from meerschaum.utils.dtypes import are_dtypes_equal, quantize_decimal, coerce_timezone
|
795
800
|
from meerschaum.utils.dtypes.sql import (
|
796
801
|
NUMERIC_PRECISION_FLAVORS,
|
@@ -46,9 +46,20 @@ def serialize_document(doc: Dict[str, Any]) -> str:
|
|
46
46
|
-------
|
47
47
|
A serialized string for the document.
|
48
48
|
"""
|
49
|
+
from meerschaum.utils.dtypes import serialize_bytes
|
49
50
|
return json.dumps(
|
50
51
|
doc,
|
51
|
-
default=(
|
52
|
+
default=(
|
53
|
+
lambda x: (
|
54
|
+
json_serialize_datetime(x)
|
55
|
+
if hasattr(x, 'tzinfo')
|
56
|
+
else (
|
57
|
+
serialize_bytes(x)
|
58
|
+
if isinstance(x, bytes)
|
59
|
+
else str(x)
|
60
|
+
)
|
61
|
+
)
|
62
|
+
),
|
52
63
|
separators=(',', ':'),
|
53
64
|
sort_keys=True,
|
54
65
|
)
|
meerschaum/core/Pipe/__init__.py
CHANGED
@@ -106,6 +106,7 @@ class Pipe:
|
|
106
106
|
upsert,
|
107
107
|
static,
|
108
108
|
tzinfo,
|
109
|
+
enforce,
|
109
110
|
get_columns,
|
110
111
|
get_columns_types,
|
111
112
|
get_columns_indices,
|
@@ -132,6 +133,7 @@ class Pipe:
|
|
132
133
|
_persist_new_json_columns,
|
133
134
|
_persist_new_numeric_columns,
|
134
135
|
_persist_new_uuid_columns,
|
136
|
+
_persist_new_bytes_columns,
|
135
137
|
)
|
136
138
|
from ._verify import (
|
137
139
|
verify,
|
@@ -162,12 +164,14 @@ class Pipe:
|
|
162
164
|
upsert: Optional[bool] = None,
|
163
165
|
autoincrement: Optional[bool] = None,
|
164
166
|
static: Optional[bool] = None,
|
167
|
+
enforce: Optional[bool] = None,
|
165
168
|
mrsm_instance: Optional[Union[str, InstanceConnector]] = None,
|
166
169
|
cache: bool = False,
|
167
170
|
debug: bool = False,
|
168
171
|
connector_keys: Optional[str] = None,
|
169
172
|
metric_key: Optional[str] = None,
|
170
173
|
location_key: Optional[str] = None,
|
174
|
+
instance_keys: Optional[str] = None,
|
171
175
|
indexes: Union[Dict[str, str], List[str], None] = None,
|
172
176
|
):
|
173
177
|
"""
|
@@ -219,6 +223,10 @@ class Pipe:
|
|
219
223
|
static: Optional[bool], default None
|
220
224
|
If `True`, set `static` in the parameters.
|
221
225
|
|
226
|
+
enforce: Optionanl[bool], default None
|
227
|
+
If `False`, skip data type enforcement.
|
228
|
+
Default behavior is `True`.
|
229
|
+
|
222
230
|
temporary: bool, default False
|
223
231
|
If `True`, prevent instance tables (pipes, users, plugins) from being created.
|
224
232
|
|
@@ -319,11 +327,13 @@ class Pipe:
|
|
319
327
|
if isinstance(static, bool):
|
320
328
|
self._attributes['parameters']['static'] = static
|
321
329
|
|
330
|
+
if isinstance(enforce, bool):
|
331
|
+
self._attributes['parameters']['enforce'] = enforce
|
332
|
+
|
322
333
|
### NOTE: The parameters dictionary is {} by default.
|
323
334
|
### A Pipe may be registered without parameters, then edited,
|
324
335
|
### or a Pipe may be registered with parameters set in-memory first.
|
325
|
-
|
326
|
-
_mrsm_instance = mrsm_instance if mrsm_instance is not None else instance
|
336
|
+
_mrsm_instance = mrsm_instance if mrsm_instance is not None else (instance or instance_keys)
|
327
337
|
if _mrsm_instance is None:
|
328
338
|
_mrsm_instance = get_config('meerschaum', 'instance', patch=True)
|
329
339
|
|
@@ -341,10 +351,10 @@ class Pipe:
|
|
341
351
|
Return the four keys needed to reconstruct this pipe.
|
342
352
|
"""
|
343
353
|
return {
|
344
|
-
'
|
345
|
-
'
|
346
|
-
'
|
347
|
-
'
|
354
|
+
'connector_keys': self.connector_keys,
|
355
|
+
'metric_key': self.metric_key,
|
356
|
+
'location_key': self.location_key,
|
357
|
+
'instance_keys': self.instance_keys,
|
348
358
|
}
|
349
359
|
|
350
360
|
def keys(self) -> List[str]:
|
@@ -385,7 +395,7 @@ class Pipe:
|
|
385
395
|
warnings.simplefilter('ignore')
|
386
396
|
try:
|
387
397
|
conn = parse_instance_keys(self.connector_keys)
|
388
|
-
except Exception
|
398
|
+
except Exception:
|
389
399
|
conn = None
|
390
400
|
if conn:
|
391
401
|
self._connector = conn
|
@@ -429,7 +439,7 @@ class Pipe:
|
|
429
439
|
_fetch_patch = {
|
430
440
|
'fetch': ({
|
431
441
|
'definition': (
|
432
|
-
|
442
|
+
"SELECT * FROM "
|
433
443
|
+ sql_item_name(
|
434
444
|
str(self.target),
|
435
445
|
self.instance_connector.flavor,
|
@@ -467,7 +477,7 @@ class Pipe:
|
|
467
477
|
and self.location_key == other.location_key
|
468
478
|
and self.instance_keys == other.instance_keys
|
469
479
|
)
|
470
|
-
except Exception
|
480
|
+
except Exception:
|
471
481
|
return False
|
472
482
|
|
473
483
|
def __hash__(self):
|
@@ -496,11 +506,11 @@ class Pipe:
|
|
496
506
|
Define the state dictionary (pickling).
|
497
507
|
"""
|
498
508
|
return {
|
499
|
-
'
|
500
|
-
'
|
501
|
-
'
|
509
|
+
'connector_keys': self.connector_keys,
|
510
|
+
'metric_key': self.metric_key,
|
511
|
+
'location_key': self.location_key,
|
502
512
|
'parameters': self.parameters,
|
503
|
-
'
|
513
|
+
'instance_keys': self.instance_keys,
|
504
514
|
}
|
505
515
|
|
506
516
|
def __setstate__(self, _state: Dict[str, Any]):
|
@@ -289,6 +289,25 @@ def tzinfo(self) -> Union[None, timezone]:
|
|
289
289
|
return None
|
290
290
|
|
291
291
|
|
292
|
+
@property
|
293
|
+
def enforce(self) -> bool:
|
294
|
+
"""
|
295
|
+
Return the `enforce` parameter for the pipe.
|
296
|
+
"""
|
297
|
+
if 'enforce' not in self.parameters:
|
298
|
+
self.parameters['enforce'] = True
|
299
|
+
|
300
|
+
return self.parameters['enforce']
|
301
|
+
|
302
|
+
|
303
|
+
@enforce.setter
|
304
|
+
def enforce(self, _enforce: bool) -> None:
|
305
|
+
"""
|
306
|
+
Set the `enforce` parameter for the pipe.
|
307
|
+
"""
|
308
|
+
self.parameters['_enforce'] = _enforce
|
309
|
+
|
310
|
+
|
292
311
|
def get_columns(self, *args: str, error: bool = False) -> Union[str, Tuple[str]]:
|
293
312
|
"""
|
294
313
|
Check if the requested columns are defined.
|
meerschaum/core/Pipe/_dtypes.py
CHANGED
meerschaum/core/Pipe/_sync.py
CHANGED
@@ -368,10 +368,11 @@ def sync(
|
|
368
368
|
### Cast to a dataframe and ensure datatypes are what we expect.
|
369
369
|
df = self.enforce_dtypes(df, chunksize=chunksize, debug=debug)
|
370
370
|
|
371
|
-
### Capture `numeric`, `uuid`, and `
|
371
|
+
### Capture `numeric`, `uuid`, `json`, and `bytes` columns.
|
372
372
|
self._persist_new_json_columns(df, debug=debug)
|
373
373
|
self._persist_new_numeric_columns(df, debug=debug)
|
374
374
|
self._persist_new_uuid_columns(df, debug=debug)
|
375
|
+
self._persist_new_bytes_columns(df, debug=debug)
|
375
376
|
|
376
377
|
if debug:
|
377
378
|
dprint(
|
@@ -617,11 +618,13 @@ def filter_existing(
|
|
617
618
|
filter_unseen_df,
|
618
619
|
add_missing_cols_to_df,
|
619
620
|
get_unhashable_cols,
|
620
|
-
get_numeric_cols,
|
621
621
|
)
|
622
622
|
from meerschaum.utils.dtypes import (
|
623
623
|
to_pandas_dtype,
|
624
624
|
none_if_null,
|
625
|
+
to_datetime,
|
626
|
+
are_dtypes_equal,
|
627
|
+
value_is_null,
|
625
628
|
)
|
626
629
|
from meerschaum.config import get_config
|
627
630
|
pd = import_pandas()
|
@@ -669,29 +672,36 @@ def filter_existing(
|
|
669
672
|
### begin is the oldest data in the new dataframe
|
670
673
|
begin, end = None, None
|
671
674
|
dt_col = pipe_columns.get('datetime', None)
|
675
|
+
primary_key = pipe_columns.get('primary', None)
|
672
676
|
dt_type = self.dtypes.get(dt_col, 'datetime64[ns, UTC]') if dt_col else None
|
677
|
+
|
678
|
+
if autoincrement and primary_key == dt_col and dt_col not in df.columns:
|
679
|
+
if enforce_dtypes:
|
680
|
+
df = self.enforce_dtypes(df, chunksize=chunksize, debug=debug)
|
681
|
+
return df, get_empty_df(), df
|
682
|
+
|
673
683
|
try:
|
674
|
-
min_dt_val = df[dt_col].min(skipna=True) if dt_col else None
|
684
|
+
min_dt_val = df[dt_col].min(skipna=True) if dt_col and dt_col in df.columns else None
|
675
685
|
if is_dask and min_dt_val is not None:
|
676
686
|
min_dt_val = min_dt_val.compute()
|
677
687
|
min_dt = (
|
678
|
-
|
679
|
-
if min_dt_val is not None and 'datetime'
|
688
|
+
to_datetime(min_dt_val, as_pydatetime=True)
|
689
|
+
if min_dt_val is not None and are_dtypes_equal(dt_type, 'datetime')
|
680
690
|
else min_dt_val
|
681
691
|
)
|
682
692
|
except Exception:
|
683
693
|
min_dt = None
|
684
|
-
|
685
|
-
|
694
|
+
|
695
|
+
if not are_dtypes_equal('datetime', str(type(min_dt))) or value_is_null(min_dt):
|
696
|
+
if not are_dtypes_equal('int', str(type(min_dt))):
|
686
697
|
min_dt = None
|
687
698
|
|
688
699
|
if isinstance(min_dt, datetime):
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
)
|
700
|
+
rounded_min_dt = round_time(min_dt, to='down')
|
701
|
+
try:
|
702
|
+
begin = rounded_min_dt - timedelta(minutes=1)
|
703
|
+
except OverflowError:
|
704
|
+
begin = rounded_min_dt
|
695
705
|
elif dt_type and 'int' in dt_type.lower():
|
696
706
|
begin = min_dt
|
697
707
|
elif dt_col is None:
|
@@ -699,11 +709,11 @@ def filter_existing(
|
|
699
709
|
|
700
710
|
### end is the newest data in the new dataframe
|
701
711
|
try:
|
702
|
-
max_dt_val = df[dt_col].max(skipna=True) if dt_col else None
|
712
|
+
max_dt_val = df[dt_col].max(skipna=True) if dt_col and dt_col in df.columns else None
|
703
713
|
if is_dask and max_dt_val is not None:
|
704
714
|
max_dt_val = max_dt_val.compute()
|
705
715
|
max_dt = (
|
706
|
-
|
716
|
+
to_datetime(max_dt_val, as_pydatetime=True)
|
707
717
|
if max_dt_val is not None and 'datetime' in str(dt_type)
|
708
718
|
else max_dt_val
|
709
719
|
)
|
@@ -712,8 +722,8 @@ def filter_existing(
|
|
712
722
|
traceback.print_exc()
|
713
723
|
max_dt = None
|
714
724
|
|
715
|
-
if ('datetime'
|
716
|
-
if 'int'
|
725
|
+
if not are_dtypes_equal('datetime', str(type(max_dt))) or value_is_null(max_dt):
|
726
|
+
if not are_dtypes_equal('int', str(type(max_dt))):
|
717
727
|
max_dt = None
|
718
728
|
|
719
729
|
if isinstance(max_dt, datetime):
|
@@ -723,7 +733,7 @@ def filter_existing(
|
|
723
733
|
to='down'
|
724
734
|
) + timedelta(minutes=1)
|
725
735
|
)
|
726
|
-
elif dt_type and 'int' in dt_type.lower():
|
736
|
+
elif dt_type and 'int' in dt_type.lower() and max_dt is not None:
|
727
737
|
end = max_dt + 1
|
728
738
|
|
729
739
|
if max_dt is not None and min_dt is not None and min_dt > max_dt:
|
@@ -738,7 +748,7 @@ def filter_existing(
|
|
738
748
|
|
739
749
|
unique_index_vals = {
|
740
750
|
col: df[col].unique()
|
741
|
-
for col in pipe_columns
|
751
|
+
for col in (pipe_columns if not primary_key else [primary_key])
|
742
752
|
if col in df.columns and col != dt_col
|
743
753
|
} if not date_bound_only else {}
|
744
754
|
filter_params_index_limit = get_config('pipes', 'sync', 'filter_params_index_limit')
|
@@ -777,14 +787,15 @@ def filter_existing(
|
|
777
787
|
|
778
788
|
### Separate new rows from changed ones.
|
779
789
|
on_cols = [
|
780
|
-
col
|
790
|
+
col
|
791
|
+
for col_key, col in pipe_columns.items()
|
781
792
|
if (
|
782
793
|
col
|
783
794
|
and
|
784
795
|
col_key != 'value'
|
785
796
|
and col in backtrack_df.columns
|
786
797
|
)
|
787
|
-
]
|
798
|
+
] if not primary_key else [primary_key]
|
788
799
|
self_dtypes = self.dtypes
|
789
800
|
on_cols_dtypes = {
|
790
801
|
col: to_pandas_dtype(typ)
|
@@ -1020,3 +1031,32 @@ def _persist_new_json_columns(self, df, debug: bool = False) -> SuccessTuple:
|
|
1020
1031
|
return edit_success, edit_msg
|
1021
1032
|
|
1022
1033
|
return True, "Success"
|
1034
|
+
|
1035
|
+
|
1036
|
+
def _persist_new_bytes_columns(self, df, debug: bool = False) -> SuccessTuple:
|
1037
|
+
"""
|
1038
|
+
Check for new `bytes` columns and update the parameters.
|
1039
|
+
"""
|
1040
|
+
from meerschaum.utils.dataframe import get_bytes_cols
|
1041
|
+
bytes_cols = get_bytes_cols(df)
|
1042
|
+
existing_bytes_cols = [col for col, typ in self.dtypes.items() if typ == 'bytes']
|
1043
|
+
new_bytes_cols = [col for col in bytes_cols if col not in existing_bytes_cols]
|
1044
|
+
if not new_bytes_cols:
|
1045
|
+
return True, "Success"
|
1046
|
+
|
1047
|
+
self._attributes_sync_time = None
|
1048
|
+
dt_col = self.columns.get('datetime', None)
|
1049
|
+
dtypes = self.parameters.get('dtypes', {})
|
1050
|
+
if dt_col not in dtypes:
|
1051
|
+
dtypes[dt_col] = 'datetime'
|
1052
|
+
dtypes.update({col: 'bytes' for col in bytes_cols})
|
1053
|
+
self.parameters['dtypes'] = dtypes
|
1054
|
+
|
1055
|
+
if not self.temporary:
|
1056
|
+
edit_success, edit_msg = self.edit(interactive=False, debug=debug)
|
1057
|
+
if not edit_success:
|
1058
|
+
warn(f"Unable to update bytes dtypes for {self}:\n{edit_msg}")
|
1059
|
+
|
1060
|
+
return edit_success, edit_msg
|
1061
|
+
|
1062
|
+
return True, "Success"
|