meerschaum 2.7.9__py3-none-any.whl → 2.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/_internal/arguments/_parser.py +17 -5
- meerschaum/_internal/term/TermPageHandler.py +1 -1
- meerschaum/_internal/term/__init__.py +1 -1
- meerschaum/actions/api.py +36 -10
- meerschaum/actions/copy.py +3 -1
- meerschaum/actions/index.py +1 -1
- meerschaum/actions/show.py +7 -7
- meerschaum/actions/sync.py +5 -1
- meerschaum/actions/verify.py +14 -1
- meerschaum/api/__init__.py +77 -41
- meerschaum/api/_exceptions.py +18 -0
- meerschaum/api/dash/__init__.py +4 -2
- meerschaum/api/dash/callbacks/dashboard.py +30 -1
- meerschaum/api/dash/components.py +2 -2
- meerschaum/api/dash/webterm.py +23 -4
- meerschaum/api/models/_pipes.py +8 -8
- meerschaum/api/resources/static/css/dash.css +2 -2
- meerschaum/api/resources/templates/termpage.html +5 -1
- meerschaum/api/routes/__init__.py +15 -12
- meerschaum/api/routes/_connectors.py +30 -28
- meerschaum/api/routes/_index.py +16 -7
- meerschaum/api/routes/_misc.py +30 -22
- meerschaum/api/routes/_pipes.py +244 -148
- meerschaum/api/routes/_plugins.py +58 -47
- meerschaum/api/routes/_users.py +39 -31
- meerschaum/api/routes/_version.py +8 -10
- meerschaum/api/routes/_webterm.py +2 -2
- meerschaum/config/_default.py +10 -0
- meerschaum/config/_version.py +1 -1
- meerschaum/config/static/__init__.py +5 -2
- meerschaum/connectors/api/_APIConnector.py +4 -3
- meerschaum/connectors/api/_login.py +21 -17
- meerschaum/connectors/api/_pipes.py +1 -0
- meerschaum/connectors/api/_request.py +9 -10
- meerschaum/connectors/sql/_cli.py +11 -3
- meerschaum/connectors/sql/_instance.py +1 -1
- meerschaum/connectors/sql/_pipes.py +77 -57
- meerschaum/connectors/sql/_sql.py +26 -9
- meerschaum/core/Pipe/__init__.py +2 -0
- meerschaum/core/Pipe/_attributes.py +13 -2
- meerschaum/core/Pipe/_data.py +85 -0
- meerschaum/core/Pipe/_deduplicate.py +6 -8
- meerschaum/core/Pipe/_sync.py +63 -30
- meerschaum/core/Pipe/_verify.py +242 -77
- meerschaum/core/User/__init__.py +2 -6
- meerschaum/jobs/_Job.py +1 -1
- meerschaum/jobs/__init__.py +15 -0
- meerschaum/utils/dataframe.py +2 -0
- meerschaum/utils/dtypes/sql.py +26 -0
- meerschaum/utils/formatting/_pipes.py +1 -1
- meerschaum/utils/misc.py +11 -7
- meerschaum/utils/packages/_packages.py +1 -1
- meerschaum/utils/sql.py +6 -2
- {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/METADATA +4 -4
- {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/RECORD +61 -60
- {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/LICENSE +0 -0
- {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/NOTICE +0 -0
- {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/WHEEL +0 -0
- {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/top_level.txt +0 -0
- {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/zip-safe +0 -0
@@ -7,6 +7,7 @@ Interact with Pipes metadata via SQLConnector.
|
|
7
7
|
"""
|
8
8
|
from __future__ import annotations
|
9
9
|
from datetime import datetime, date, timedelta
|
10
|
+
|
10
11
|
import meerschaum as mrsm
|
11
12
|
from meerschaum.utils.typing import (
|
12
13
|
Union, Any, SuccessTuple, Tuple, Dict, Optional, List
|
@@ -1837,7 +1838,7 @@ def sync_pipe(
|
|
1837
1838
|
and primary_key in unseen_df.columns
|
1838
1839
|
and autoincrement
|
1839
1840
|
)
|
1840
|
-
stats = {'success': True, 'msg': '
|
1841
|
+
stats = {'success': True, 'msg': ''}
|
1841
1842
|
if len(unseen_df) > 0:
|
1842
1843
|
with self.engine.connect() as connection:
|
1843
1844
|
with connection.begin():
|
@@ -1949,6 +1950,7 @@ def sync_pipe(
|
|
1949
1950
|
datetime_col=(dt_col if dt_col in update_df.columns else None),
|
1950
1951
|
identity_insert=(autoincrement and primary_key in update_df.columns),
|
1951
1952
|
null_indices=pipe.null_indices,
|
1953
|
+
cast_columns=pipe.enforce,
|
1952
1954
|
debug=debug,
|
1953
1955
|
)
|
1954
1956
|
update_results = self.exec_queries(
|
@@ -1967,12 +1969,16 @@ def sync_pipe(
|
|
1967
1969
|
if not update_success:
|
1968
1970
|
warn(f"Failed to apply update to {pipe}.")
|
1969
1971
|
stats['success'] = stats['success'] and update_success
|
1970
|
-
stats['msg'] = (
|
1972
|
+
stats['msg'] = (
|
1973
|
+
(stats.get('msg', '') + f'\nFailed to apply update to {pipe}.').lstrip()
|
1974
|
+
if not update_success
|
1975
|
+
else stats.get('msg', '')
|
1976
|
+
)
|
1971
1977
|
|
1972
1978
|
stop = time.perf_counter()
|
1973
1979
|
success = stats['success']
|
1974
1980
|
if not success:
|
1975
|
-
return success, stats['msg']
|
1981
|
+
return success, stats['msg'] or str(stats)
|
1976
1982
|
|
1977
1983
|
unseen_count = len(unseen_df.index) if unseen_df is not None else 0
|
1978
1984
|
update_count = len(update_df.index) if update_df is not None else 0
|
@@ -2529,6 +2535,7 @@ def sync_pipe_inplace(
|
|
2529
2535
|
datetime_col=pipe.columns.get('datetime', None),
|
2530
2536
|
flavor=self.flavor,
|
2531
2537
|
null_indices=pipe.null_indices,
|
2538
|
+
cast_columns=pipe.enforce,
|
2532
2539
|
debug=debug,
|
2533
2540
|
)
|
2534
2541
|
if on_cols else []
|
@@ -2585,6 +2592,7 @@ def get_sync_time(
|
|
2585
2592
|
pipe: 'mrsm.Pipe',
|
2586
2593
|
params: Optional[Dict[str, Any]] = None,
|
2587
2594
|
newest: bool = True,
|
2595
|
+
remote: bool = False,
|
2588
2596
|
debug: bool = False,
|
2589
2597
|
) -> Union[datetime, int, None]:
|
2590
2598
|
"""Get a Pipe's most recent datetime value.
|
@@ -2602,50 +2610,76 @@ def get_sync_time(
|
|
2602
2610
|
If `True`, get the most recent datetime (honoring `params`).
|
2603
2611
|
If `False`, get the oldest datetime (ASC instead of DESC).
|
2604
2612
|
|
2613
|
+
remote: bool, default False
|
2614
|
+
If `True`, return the sync time for the remote fetch definition.
|
2615
|
+
|
2605
2616
|
Returns
|
2606
2617
|
-------
|
2607
2618
|
A `datetime` object (or `int` if using an integer axis) if the pipe exists, otherwise `None`.
|
2608
2619
|
"""
|
2609
|
-
from meerschaum.utils.sql import sql_item_name, build_where
|
2610
|
-
|
2620
|
+
from meerschaum.utils.sql import sql_item_name, build_where, wrap_query_with_cte
|
2621
|
+
src_name = sql_item_name('src', self.flavor)
|
2622
|
+
table_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
|
2611
2623
|
|
2612
2624
|
dt_col = pipe.columns.get('datetime', None)
|
2613
2625
|
if dt_col is None:
|
2614
2626
|
return None
|
2615
2627
|
dt_col_name = sql_item_name(dt_col, self.flavor, None)
|
2616
2628
|
|
2629
|
+
if remote and pipe.connector.type != 'sql':
|
2630
|
+
warn(f"Cannot get the remote sync time for {pipe}.")
|
2631
|
+
return None
|
2632
|
+
|
2617
2633
|
ASC_or_DESC = "DESC" if newest else "ASC"
|
2618
2634
|
existing_cols = pipe.get_columns_types(debug=debug)
|
2619
2635
|
valid_params = {}
|
2620
2636
|
if params is not None:
|
2621
2637
|
valid_params = {k: v for k, v in params.items() if k in existing_cols}
|
2638
|
+
flavor = self.flavor if not remote else pipe.connector.flavor
|
2622
2639
|
|
2623
2640
|
### If no bounds are provided for the datetime column,
|
2624
2641
|
### add IS NOT NULL to the WHERE clause.
|
2625
2642
|
if dt_col not in valid_params:
|
2626
2643
|
valid_params[dt_col] = '_None'
|
2627
2644
|
where = "" if not valid_params else build_where(valid_params, self)
|
2628
|
-
|
2645
|
+
src_query = (
|
2646
|
+
f"SELECT {dt_col_name}\nFROM {table_name}{where}"
|
2647
|
+
if not remote
|
2648
|
+
else self.get_pipe_metadef(pipe, params=params, begin=None, end=None)
|
2649
|
+
)
|
2650
|
+
|
2651
|
+
base_query = (
|
2652
|
+
f"SELECT {dt_col_name}\n"
|
2653
|
+
f"FROM {src_name}{where}\n"
|
2654
|
+
f"ORDER BY {dt_col_name} {ASC_or_DESC}\n"
|
2655
|
+
f"LIMIT 1"
|
2656
|
+
)
|
2629
2657
|
if self.flavor == 'mssql':
|
2630
|
-
|
2658
|
+
base_query = (
|
2659
|
+
f"SELECT TOP 1 {dt_col_name}\n"
|
2660
|
+
f"FROM {src_name}{where}\n"
|
2661
|
+
f"ORDER BY {dt_col_name} {ASC_or_DESC}"
|
2662
|
+
)
|
2631
2663
|
elif self.flavor == 'oracle':
|
2632
|
-
|
2664
|
+
base_query = (
|
2633
2665
|
"SELECT * FROM (\n"
|
2634
|
-
|
2635
|
-
|
2636
|
-
|
2666
|
+
f" SELECT {dt_col_name}\n"
|
2667
|
+
f" FROM {src_name}{where}\n"
|
2668
|
+
f" ORDER BY {dt_col_name} {ASC_or_DESC}\n"
|
2669
|
+
") WHERE ROWNUM = 1"
|
2637
2670
|
)
|
2638
2671
|
|
2672
|
+
query = wrap_query_with_cte(src_query, base_query, flavor)
|
2673
|
+
|
2639
2674
|
try:
|
2640
|
-
db_time = self.value(
|
2675
|
+
db_time = self.value(query, silent=True, debug=debug)
|
2641
2676
|
|
2642
2677
|
### No datetime could be found.
|
2643
2678
|
if db_time is None:
|
2644
2679
|
return None
|
2645
2680
|
### sqlite returns str.
|
2646
2681
|
if isinstance(db_time, str):
|
2647
|
-
|
2648
|
-
dateutil_parser = attempt_import('dateutil.parser')
|
2682
|
+
dateutil_parser = mrsm.attempt_import('dateutil.parser')
|
2649
2683
|
st = dateutil_parser.parse(db_time)
|
2650
2684
|
### Do nothing if a datetime object is returned.
|
2651
2685
|
elif isinstance(db_time, datetime):
|
@@ -2743,7 +2777,7 @@ def get_pipe_rowcount(
|
|
2743
2777
|
An `int` for the number of rows if the `pipe` exists, otherwise `None`.
|
2744
2778
|
|
2745
2779
|
"""
|
2746
|
-
from meerschaum.utils.sql import dateadd_str, sql_item_name, wrap_query_with_cte
|
2780
|
+
from meerschaum.utils.sql import dateadd_str, sql_item_name, wrap_query_with_cte, build_where
|
2747
2781
|
from meerschaum.connectors.sql._fetch import get_pipe_query
|
2748
2782
|
from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
|
2749
2783
|
if remote:
|
@@ -2755,18 +2789,20 @@ def get_pipe_rowcount(
|
|
2755
2789
|
error(msg)
|
2756
2790
|
return None
|
2757
2791
|
|
2758
|
-
_pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
|
2759
2792
|
|
2793
|
+
flavor = self.flavor if not remote else pipe.connector.flavor
|
2794
|
+
conn = self if not remote else pipe.connector
|
2795
|
+
_pipe_name = sql_item_name(pipe.target, flavor, self.get_pipe_schema(pipe))
|
2760
2796
|
dt_col = pipe.columns.get('datetime', None)
|
2761
2797
|
dt_typ = pipe.dtypes.get(dt_col, 'datetime') if dt_col else None
|
2762
|
-
dt_db_type = get_db_type_from_pd_type(dt_typ,
|
2798
|
+
dt_db_type = get_db_type_from_pd_type(dt_typ, flavor) if dt_typ else None
|
2763
2799
|
if not dt_col:
|
2764
2800
|
dt_col = pipe.guess_datetime()
|
2765
|
-
dt_name = sql_item_name(dt_col,
|
2801
|
+
dt_name = sql_item_name(dt_col, flavor, None) if dt_col else None
|
2766
2802
|
is_guess = True
|
2767
2803
|
else:
|
2768
2804
|
dt_col = pipe.get_columns('datetime')
|
2769
|
-
dt_name = sql_item_name(dt_col,
|
2805
|
+
dt_name = sql_item_name(dt_col, flavor, None)
|
2770
2806
|
is_guess = False
|
2771
2807
|
|
2772
2808
|
if begin is not None or end is not None:
|
@@ -2786,32 +2822,15 @@ def get_pipe_rowcount(
|
|
2786
2822
|
)
|
2787
2823
|
|
2788
2824
|
|
2789
|
-
_datetime_name = sql_item_name(
|
2790
|
-
dt_col,
|
2791
|
-
(
|
2792
|
-
pipe.instance_connector.flavor
|
2793
|
-
if not remote
|
2794
|
-
else pipe.connector.flavor
|
2795
|
-
),
|
2796
|
-
None,
|
2797
|
-
)
|
2825
|
+
_datetime_name = sql_item_name(dt_col, flavor)
|
2798
2826
|
_cols_names = [
|
2799
|
-
sql_item_name(
|
2800
|
-
col,
|
2801
|
-
(
|
2802
|
-
pipe.instance_connector.flavor
|
2803
|
-
if not remote
|
2804
|
-
else pipe.connector.flavor
|
2805
|
-
),
|
2806
|
-
None,
|
2807
|
-
)
|
2827
|
+
sql_item_name(col, flavor)
|
2808
2828
|
for col in set(
|
2809
2829
|
(
|
2810
2830
|
[dt_col]
|
2811
2831
|
if dt_col
|
2812
2832
|
else []
|
2813
|
-
)
|
2814
|
-
+ (
|
2833
|
+
) + (
|
2815
2834
|
[]
|
2816
2835
|
if params is None
|
2817
2836
|
else list(params.keys())
|
@@ -2826,34 +2845,33 @@ def get_pipe_rowcount(
|
|
2826
2845
|
if not remote
|
2827
2846
|
else get_pipe_query(pipe)
|
2828
2847
|
)
|
2829
|
-
parent_query = f"SELECT COUNT(*)\nFROM {sql_item_name('src',
|
2830
|
-
query = wrap_query_with_cte(src, parent_query,
|
2848
|
+
parent_query = f"SELECT COUNT(*)\nFROM {sql_item_name('src', flavor)}"
|
2849
|
+
query = wrap_query_with_cte(src, parent_query, flavor)
|
2831
2850
|
if begin is not None or end is not None:
|
2832
2851
|
query += "\nWHERE"
|
2833
2852
|
if begin is not None:
|
2834
2853
|
query += (
|
2835
2854
|
f"\n {dt_name} >= "
|
2836
|
-
+ dateadd_str(
|
2855
|
+
+ dateadd_str(flavor, datepart='minute', number=0, begin=begin, db_type=dt_db_type)
|
2837
2856
|
)
|
2838
2857
|
if end is not None and begin is not None:
|
2839
2858
|
query += "\n AND"
|
2840
2859
|
if end is not None:
|
2841
2860
|
query += (
|
2842
2861
|
f"\n {dt_name} < "
|
2843
|
-
+ dateadd_str(
|
2862
|
+
+ dateadd_str(flavor, datepart='minute', number=0, begin=end, db_type=dt_db_type)
|
2844
2863
|
)
|
2845
2864
|
if params is not None:
|
2846
|
-
from meerschaum.utils.sql import build_where
|
2847
2865
|
existing_cols = pipe.get_columns_types(debug=debug)
|
2848
2866
|
valid_params = {k: v for k, v in params.items() if k in existing_cols}
|
2849
2867
|
if valid_params:
|
2850
|
-
query += build_where(valid_params,
|
2868
|
+
query += build_where(valid_params, conn).replace('WHERE', (
|
2851
2869
|
'AND' if (begin is not None or end is not None)
|
2852
2870
|
else 'WHERE'
|
2853
2871
|
)
|
2854
2872
|
)
|
2855
2873
|
|
2856
|
-
result =
|
2874
|
+
result = conn.value(query, debug=debug, silent=True)
|
2857
2875
|
try:
|
2858
2876
|
return int(result)
|
2859
2877
|
except Exception:
|
@@ -3634,7 +3652,6 @@ def deduplicate_pipe(
|
|
3634
3652
|
if not pipe.exists(debug=debug):
|
3635
3653
|
return False, f"Table {pipe_table_name} does not exist."
|
3636
3654
|
|
3637
|
-
### TODO: Handle deleting duplicates without a datetime axis.
|
3638
3655
|
dt_col = pipe.columns.get('datetime', None)
|
3639
3656
|
cols_types = pipe.get_columns_types(debug=debug)
|
3640
3657
|
existing_cols = pipe.get_columns_types(debug=debug)
|
@@ -3738,9 +3755,8 @@ def deduplicate_pipe(
|
|
3738
3755
|
|
3739
3756
|
session_id = generate_password(3)
|
3740
3757
|
|
3741
|
-
dedup_table =
|
3742
|
-
temp_old_table =
|
3743
|
-
|
3758
|
+
dedup_table = self.get_temporary_target(pipe.target, transact_id=session_id, label='dedup')
|
3759
|
+
temp_old_table = self.get_temporary_target(pipe.target, transact_id=session_id, label='old')
|
3744
3760
|
temp_old_table_name = sql_item_name(temp_old_table, self.flavor, self.get_pipe_schema(pipe))
|
3745
3761
|
|
3746
3762
|
create_temporary_table_query = get_create_table_query(
|
@@ -3753,16 +3769,21 @@ def deduplicate_pipe(
|
|
3753
3769
|
if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
|
3754
3770
|
alter_queries = flatten_list([
|
3755
3771
|
get_rename_table_queries(
|
3756
|
-
pipe.target,
|
3772
|
+
pipe.target,
|
3773
|
+
temp_old_table,
|
3774
|
+
self.flavor,
|
3775
|
+
schema=self.get_pipe_schema(pipe),
|
3757
3776
|
),
|
3758
3777
|
get_rename_table_queries(
|
3759
|
-
dedup_table,
|
3778
|
+
dedup_table,
|
3779
|
+
pipe.target,
|
3780
|
+
self.flavor,
|
3781
|
+
schema=self.get_pipe_schema(pipe),
|
3760
3782
|
),
|
3761
|
-
f""
|
3762
|
-
DROP TABLE {if_exists_str} {temp_old_table_name}
|
3763
|
-
""",
|
3783
|
+
f"DROP TABLE {if_exists_str} {temp_old_table_name}",
|
3764
3784
|
])
|
3765
3785
|
|
3786
|
+
self._log_temporary_tables_creation(temp_old_table, create=(not pipe.temporary), debug=debug)
|
3766
3787
|
create_temporary_result = self.execute(create_temporary_table_query, debug=debug)
|
3767
3788
|
if create_temporary_result is None:
|
3768
3789
|
return False, f"Failed to deduplicate table {pipe_table_name}."
|
@@ -3794,8 +3815,7 @@ def deduplicate_pipe(
|
|
3794
3815
|
f"\nfrom {old_rowcount:,} to {new_rowcount:,} rows"
|
3795
3816
|
if old_rowcount != new_rowcount
|
3796
3817
|
else ''
|
3797
|
-
)
|
3798
|
-
+ '.'
|
3818
|
+
) + '.'
|
3799
3819
|
)
|
3800
3820
|
if success
|
3801
3821
|
else f"Failed to execute query:\n{fail_query}"
|
@@ -773,7 +773,6 @@ def to_sql(
|
|
773
773
|
"""
|
774
774
|
import time
|
775
775
|
import json
|
776
|
-
from decimal import Decimal
|
777
776
|
from datetime import timedelta
|
778
777
|
from meerschaum.utils.warnings import error, warn
|
779
778
|
import warnings
|
@@ -823,6 +822,7 @@ def to_sql(
|
|
823
822
|
|
824
823
|
bytes_cols = get_bytes_cols(df)
|
825
824
|
numeric_cols = get_numeric_cols(df)
|
825
|
+
### NOTE: This excludes non-numeric serialized Decimals (e.g. SQLite).
|
826
826
|
numeric_cols_dtypes = {
|
827
827
|
col: typ
|
828
828
|
for col, typ in kw.get('dtype', {}).items()
|
@@ -833,6 +833,27 @@ def to_sql(
|
|
833
833
|
|
834
834
|
}
|
835
835
|
numeric_cols.extend([col for col in numeric_cols_dtypes if col not in numeric_cols])
|
836
|
+
numeric_cols_precisions_scales = {
|
837
|
+
col: (
|
838
|
+
(typ.precision, typ.scale)
|
839
|
+
if hasattr(typ, 'precision')
|
840
|
+
else get_numeric_precision_scale(self.flavor)
|
841
|
+
)
|
842
|
+
for col, typ in numeric_cols_dtypes.items()
|
843
|
+
}
|
844
|
+
cols_pd_types = {
|
845
|
+
col: get_pd_type_from_db_type(str(typ))
|
846
|
+
for col, typ in kw.get('dtype', {}).items()
|
847
|
+
}
|
848
|
+
cols_pd_types.update({
|
849
|
+
col: f'numeric[{precision},{scale}]'
|
850
|
+
for col, (precision, scale) in numeric_cols_precisions_scales.items()
|
851
|
+
if precision and scale
|
852
|
+
})
|
853
|
+
cols_db_types = {
|
854
|
+
col: get_db_type_from_pd_type(typ, flavor=self.flavor)
|
855
|
+
for col, typ in cols_pd_types.items()
|
856
|
+
}
|
836
857
|
|
837
858
|
enable_bulk_insert = mrsm.get_config(
|
838
859
|
'system', 'connectors', 'sql', 'bulk_insert'
|
@@ -844,7 +865,7 @@ def to_sql(
|
|
844
865
|
if method == "":
|
845
866
|
if enable_bulk_insert:
|
846
867
|
method = (
|
847
|
-
functools.partial(mssql_insert_json, debug=debug)
|
868
|
+
functools.partial(mssql_insert_json, cols_types=cols_db_types, debug=debug)
|
848
869
|
if self.flavor == 'mssql'
|
849
870
|
else functools.partial(psql_insert_copy, debug=debug)
|
850
871
|
)
|
@@ -867,14 +888,10 @@ def to_sql(
|
|
867
888
|
|
868
889
|
### Check for numeric columns.
|
869
890
|
for col in numeric_cols:
|
870
|
-
|
871
|
-
|
872
|
-
|
873
|
-
(typ.precision, typ.scale)
|
874
|
-
if hasattr(typ, 'precision')
|
875
|
-
else get_numeric_precision_scale(self.flavor)
|
891
|
+
precision, scale = numeric_cols_precisions_scales.get(
|
892
|
+
col,
|
893
|
+
get_numeric_precision_scale(self.flavor)
|
876
894
|
)
|
877
|
-
|
878
895
|
df[col] = df[col].apply(
|
879
896
|
functools.partial(
|
880
897
|
serialize_decimal,
|
meerschaum/core/Pipe/__init__.py
CHANGED
@@ -92,6 +92,7 @@ class Pipe:
|
|
92
92
|
_get_data_as_iterator,
|
93
93
|
get_chunk_interval,
|
94
94
|
get_chunk_bounds,
|
95
|
+
get_chunk_bounds_batches,
|
95
96
|
parse_date_bounds,
|
96
97
|
)
|
97
98
|
from ._register import register
|
@@ -117,6 +118,7 @@ class Pipe:
|
|
117
118
|
id,
|
118
119
|
get_val_column,
|
119
120
|
parents,
|
121
|
+
parent,
|
120
122
|
children,
|
121
123
|
target,
|
122
124
|
_target_legacy,
|
@@ -590,7 +590,7 @@ def get_val_column(self, debug: bool = False) -> Union[str, None]:
|
|
590
590
|
|
591
591
|
|
592
592
|
@property
|
593
|
-
def parents(self) -> List[
|
593
|
+
def parents(self) -> List[mrsm.Pipe]:
|
594
594
|
"""
|
595
595
|
Return a list of `meerschaum.Pipe` objects to be designated as parents.
|
596
596
|
"""
|
@@ -617,7 +617,18 @@ def parents(self) -> List[meerschaum.Pipe]:
|
|
617
617
|
|
618
618
|
|
619
619
|
@property
|
620
|
-
def
|
620
|
+
def parent(self) -> Union[mrsm.Pipe, None]:
|
621
|
+
"""
|
622
|
+
Return the first pipe in `self.parents` or `None`.
|
623
|
+
"""
|
624
|
+
parents = self.parents
|
625
|
+
if not parents:
|
626
|
+
return None
|
627
|
+
return parents[0]
|
628
|
+
|
629
|
+
|
630
|
+
@property
|
631
|
+
def children(self) -> List[mrsm.Pipe]:
|
621
632
|
"""
|
622
633
|
Return a list of `meerschaum.Pipe` objects to be designated as children.
|
623
634
|
"""
|
meerschaum/core/Pipe/_data.py
CHANGED
@@ -544,11 +544,33 @@ def get_rowcount(
|
|
544
544
|
from meerschaum.utils.warnings import warn
|
545
545
|
from meerschaum.utils.venv import Venv
|
546
546
|
from meerschaum.connectors import get_connector_plugin
|
547
|
+
from meerschaum.utils.misc import filter_keywords
|
547
548
|
|
548
549
|
begin, end = self.parse_date_bounds(begin, end)
|
549
550
|
connector = self.instance_connector if not remote else self.connector
|
550
551
|
try:
|
551
552
|
with Venv(get_connector_plugin(connector)):
|
553
|
+
if not hasattr(connector, 'get_pipe_rowcount'):
|
554
|
+
warn(
|
555
|
+
f"Connectors of type '{connector.type}' "
|
556
|
+
"do not implement `get_pipe_rowcount()`.",
|
557
|
+
stack=False,
|
558
|
+
)
|
559
|
+
return 0
|
560
|
+
kwargs = filter_keywords(
|
561
|
+
connector.get_pipe_rowcount,
|
562
|
+
begin=begin,
|
563
|
+
end=end,
|
564
|
+
params=params,
|
565
|
+
remote=remote,
|
566
|
+
debug=debug,
|
567
|
+
)
|
568
|
+
if remote and 'remote' not in kwargs:
|
569
|
+
warn(
|
570
|
+
f"Connectors of type '{connector.type}' do not support remote rowcounts.",
|
571
|
+
stack=False,
|
572
|
+
)
|
573
|
+
return 0
|
552
574
|
rowcount = connector.get_pipe_rowcount(
|
553
575
|
self,
|
554
576
|
begin=begin,
|
@@ -651,12 +673,19 @@ def get_chunk_bounds(
|
|
651
673
|
A list of chunk bounds (datetimes or integers).
|
652
674
|
If unbounded, the first and last chunks will include `None`.
|
653
675
|
"""
|
676
|
+
from datetime import timedelta
|
677
|
+
from meerschaum.utils.dtypes import are_dtypes_equal
|
678
|
+
from meerschaum.utils.misc import interval_str
|
654
679
|
include_less_than_begin = not bounded and begin is None
|
655
680
|
include_greater_than_end = not bounded and end is None
|
656
681
|
if begin is None:
|
657
682
|
begin = self.get_sync_time(newest=False, debug=debug)
|
658
683
|
if end is None:
|
659
684
|
end = self.get_sync_time(newest=True, debug=debug)
|
685
|
+
if end is not None and hasattr(end, 'tzinfo'):
|
686
|
+
end += timedelta(minutes=1)
|
687
|
+
elif are_dtypes_equal(str(type(end)), 'int'):
|
688
|
+
end += 1
|
660
689
|
if begin is None and end is None:
|
661
690
|
return [(None, None)]
|
662
691
|
|
@@ -670,10 +699,17 @@ def get_chunk_bounds(
|
|
670
699
|
### Run `verify pipes --workers 1` to sync chunks in series.
|
671
700
|
chunk_bounds = []
|
672
701
|
begin_cursor = begin
|
702
|
+
num_chunks = 0
|
703
|
+
max_chunks = 1_000_000
|
673
704
|
while begin_cursor < end:
|
674
705
|
end_cursor = begin_cursor + chunk_interval
|
675
706
|
chunk_bounds.append((begin_cursor, end_cursor))
|
676
707
|
begin_cursor = end_cursor
|
708
|
+
num_chunks += 1
|
709
|
+
if num_chunks >= max_chunks:
|
710
|
+
raise ValueError(
|
711
|
+
f"Too many chunks of size '{interval_str(chunk_interval)}' between '{begin}' and '{end}'."
|
712
|
+
)
|
677
713
|
|
678
714
|
### The chunk interval might be too large.
|
679
715
|
if not chunk_bounds and end >= begin:
|
@@ -695,6 +731,55 @@ def get_chunk_bounds(
|
|
695
731
|
return chunk_bounds
|
696
732
|
|
697
733
|
|
734
|
+
def get_chunk_bounds_batches(
|
735
|
+
self,
|
736
|
+
chunk_bounds: List[Tuple[Union[datetime, int, None], Union[datetime, int, None]]],
|
737
|
+
batchsize: Optional[int] = None,
|
738
|
+
workers: Optional[int] = None,
|
739
|
+
debug: bool = False,
|
740
|
+
) -> List[
|
741
|
+
Tuple[
|
742
|
+
Tuple[
|
743
|
+
Union[datetime, int, None],
|
744
|
+
Union[datetime, int, None],
|
745
|
+
], ...
|
746
|
+
]
|
747
|
+
]:
|
748
|
+
"""
|
749
|
+
Return a list of tuples of chunk bounds of size `batchsize`.
|
750
|
+
|
751
|
+
Parameters
|
752
|
+
----------
|
753
|
+
chunk_bounds: List[Tuple[Union[datetime, int, None], Union[datetime, int, None]]]
|
754
|
+
A list of chunk_bounds (see `Pipe.get_chunk_bounds()`).
|
755
|
+
|
756
|
+
batchsize: Optional[int], default None
|
757
|
+
How many chunks to include in a batch. Defaults to `Pipe.get_num_workers()`.
|
758
|
+
|
759
|
+
workers: Optional[int], default None
|
760
|
+
If `batchsize` is `None`, use this as the desired number of workers.
|
761
|
+
Passed to `Pipe.get_num_workers()`.
|
762
|
+
|
763
|
+
Returns
|
764
|
+
-------
|
765
|
+
A list of tuples of chunk bound tuples.
|
766
|
+
"""
|
767
|
+
from meerschaum.utils.misc import iterate_chunks
|
768
|
+
|
769
|
+
if batchsize is None:
|
770
|
+
batchsize = self.get_num_workers(workers=workers)
|
771
|
+
|
772
|
+
return [
|
773
|
+
tuple(
|
774
|
+
_batch_chunk_bounds
|
775
|
+
for _batch_chunk_bounds in batch
|
776
|
+
if _batch_chunk_bounds is not None
|
777
|
+
)
|
778
|
+
for batch in iterate_chunks(chunk_bounds, batchsize)
|
779
|
+
if batch
|
780
|
+
]
|
781
|
+
|
782
|
+
|
698
783
|
def parse_date_bounds(self, *dt_vals: Union[datetime, int, None]) -> Union[
|
699
784
|
datetime,
|
700
785
|
int,
|
@@ -110,13 +110,12 @@ def deduplicate(
|
|
110
110
|
)
|
111
111
|
if bounded and end is None:
|
112
112
|
end = self.get_sync_time(newest=True, debug=debug)
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
)
|
113
|
+
if end is not None:
|
114
|
+
end += (
|
115
|
+
timedelta(minutes=1)
|
116
|
+
if hasattr(end, 'tzinfo')
|
117
|
+
else 1
|
118
|
+
)
|
120
119
|
|
121
120
|
chunk_bounds = self.get_chunk_bounds(
|
122
121
|
bounded=bounded,
|
@@ -129,7 +128,6 @@ def deduplicate(
|
|
129
128
|
indices = [col for col in self.columns.values() if col]
|
130
129
|
if not indices:
|
131
130
|
return False, "Cannot deduplicate without index columns."
|
132
|
-
dt_col = self.columns.get('datetime', None)
|
133
131
|
|
134
132
|
def process_chunk_bounds(bounds) -> Tuple[
|
135
133
|
Tuple[
|