meerschaum 2.4.7__py3-none-any.whl → 2.4.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/_internal/arguments/_parser.py +36 -3
- meerschaum/actions/show.py +15 -4
- meerschaum/actions/sql.py +1 -1
- meerschaum/api/routes/_pipes.py +38 -38
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/api/_pipes.py +3 -3
- meerschaum/connectors/sql/_SQLConnector.py +1 -1
- meerschaum/connectors/sql/_instance.py +12 -12
- meerschaum/connectors/sql/_pipes.py +75 -52
- meerschaum/connectors/sql/_sql.py +3 -1
- meerschaum/core/Pipe/_data.py +12 -13
- meerschaum/core/Pipe/_sync.py +1 -1
- meerschaum/utils/dataframe.py +34 -23
- meerschaum/utils/dtypes/sql.py +32 -18
- meerschaum/utils/formatting/_pipes.py +4 -4
- meerschaum/utils/misc.py +4 -4
- meerschaum/utils/packages/_packages.py +2 -1
- meerschaum/utils/sql.py +11 -7
- {meerschaum-2.4.7.dist-info → meerschaum-2.4.9.dist-info}/METADATA +7 -4
- {meerschaum-2.4.7.dist-info → meerschaum-2.4.9.dist-info}/RECORD +26 -26
- {meerschaum-2.4.7.dist-info → meerschaum-2.4.9.dist-info}/LICENSE +0 -0
- {meerschaum-2.4.7.dist-info → meerschaum-2.4.9.dist-info}/NOTICE +0 -0
- {meerschaum-2.4.7.dist-info → meerschaum-2.4.9.dist-info}/WHEEL +0 -0
- {meerschaum-2.4.7.dist-info → meerschaum-2.4.9.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.4.7.dist-info → meerschaum-2.4.9.dist-info}/top_level.txt +0 -0
- {meerschaum-2.4.7.dist-info → meerschaum-2.4.9.dist-info}/zip-safe +0 -0
@@ -459,6 +459,11 @@ def get_create_index_queries(
|
|
459
459
|
+ 'if_not_exists => true, '
|
460
460
|
+ "migrate_data => true);"
|
461
461
|
)
|
462
|
+
elif self.flavor == 'mssql':
|
463
|
+
dt_query = (
|
464
|
+
f"CREATE CLUSTERED INDEX {_datetime_index_name} "
|
465
|
+
f"ON {_pipe_name} ({_datetime_name})"
|
466
|
+
)
|
462
467
|
else: ### mssql, sqlite, etc.
|
463
468
|
dt_query = (
|
464
469
|
f"CREATE INDEX {_datetime_index_name} "
|
@@ -563,7 +568,12 @@ def get_drop_index_queries(
|
|
563
568
|
return {}
|
564
569
|
if not pipe.exists(debug=debug):
|
565
570
|
return {}
|
566
|
-
from meerschaum.utils.sql import
|
571
|
+
from meerschaum.utils.sql import (
|
572
|
+
sql_item_name,
|
573
|
+
table_exists,
|
574
|
+
hypertable_queries,
|
575
|
+
DROP_IF_EXISTS_FLAVORS,
|
576
|
+
)
|
567
577
|
drop_queries = {}
|
568
578
|
schema = self.get_pipe_schema(pipe)
|
569
579
|
schema_prefix = (schema + '_') if schema else ''
|
@@ -580,16 +590,17 @@ def get_drop_index_queries(
|
|
580
590
|
is_hypertable_query = hypertable_queries[self.flavor].format(table_name=pipe_name)
|
581
591
|
is_hypertable = self.value(is_hypertable_query, silent=True, debug=debug) is not None
|
582
592
|
|
593
|
+
if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
|
583
594
|
if is_hypertable:
|
584
595
|
nuke_queries = []
|
585
596
|
temp_table = '_' + pipe.target + '_temp_migration'
|
586
597
|
temp_table_name = sql_item_name(temp_table, self.flavor, self.get_pipe_schema(pipe))
|
587
598
|
|
588
599
|
if table_exists(temp_table, self, schema=self.get_pipe_schema(pipe), debug=debug):
|
589
|
-
nuke_queries.append(f"DROP TABLE {temp_table_name}")
|
600
|
+
nuke_queries.append(f"DROP TABLE {if_exists_str} {temp_table_name}")
|
590
601
|
nuke_queries += [
|
591
602
|
f"SELECT * INTO {temp_table_name} FROM {pipe_name}",
|
592
|
-
f"DROP TABLE {pipe_name}",
|
603
|
+
f"DROP TABLE {if_exists_str} {pipe_name}",
|
593
604
|
f"ALTER TABLE {temp_table_name} RENAME TO {pipe_name_no_schema}",
|
594
605
|
]
|
595
606
|
nuke_ix_keys = ('datetime', 'id')
|
@@ -811,7 +822,7 @@ def get_pipe_data(
|
|
811
822
|
parse_df_datetimes(
|
812
823
|
c,
|
813
824
|
ignore_cols=ignore_dt_cols,
|
814
|
-
chunksize
|
825
|
+
chunksize=kw.get('chunksize', None),
|
815
826
|
debug=debug,
|
816
827
|
)
|
817
828
|
for c in df
|
@@ -1017,7 +1028,7 @@ def get_pipe_data_query(
|
|
1017
1028
|
if _dt and _dt in existing_cols:
|
1018
1029
|
order_by += dt + ' ' + order + ','
|
1019
1030
|
for key, quoted_col_name in quoted_indices.items():
|
1020
|
-
if
|
1031
|
+
if dt == quoted_col_name:
|
1021
1032
|
continue
|
1022
1033
|
order_by += ' ' + quoted_col_name + ' ' + order + ','
|
1023
1034
|
order_by = order_by[:-1]
|
@@ -1034,7 +1045,7 @@ def get_pipe_data_query(
|
|
1034
1045
|
)
|
1035
1046
|
else:
|
1036
1047
|
query += f"\nLIMIT {limit}"
|
1037
|
-
|
1048
|
+
|
1038
1049
|
if debug:
|
1039
1050
|
to_print = (
|
1040
1051
|
[]
|
@@ -1315,7 +1326,7 @@ def sync_pipe(
|
|
1315
1326
|
) if dt_col else None
|
1316
1327
|
|
1317
1328
|
transact_id = generate_password(3)
|
1318
|
-
temp_target = '
|
1329
|
+
temp_target = '##' + transact_id + '_' + pipe.target
|
1319
1330
|
self._log_temporary_tables_creation(temp_target, create=(not pipe.temporary), debug=debug)
|
1320
1331
|
temp_pipe = Pipe(
|
1321
1332
|
pipe.connector_keys.replace(':', '_') + '_', pipe.metric_key, pipe.location_key,
|
@@ -1721,7 +1732,7 @@ def sync_pipe_inplace(
|
|
1721
1732
|
|
1722
1733
|
delta_cols_types = get_table_cols_types(
|
1723
1734
|
temp_tables['delta'],
|
1724
|
-
connectable
|
1735
|
+
connectable=connectable,
|
1725
1736
|
flavor=self.flavor,
|
1726
1737
|
schema=internal_schema,
|
1727
1738
|
database=database,
|
@@ -1779,7 +1790,7 @@ def sync_pipe_inplace(
|
|
1779
1790
|
create_joined_success, create_joined_msg = session_execute(
|
1780
1791
|
session,
|
1781
1792
|
create_joined_query,
|
1782
|
-
debug
|
1793
|
+
debug=debug,
|
1783
1794
|
) if on_cols and not upsert else (True, "Success")
|
1784
1795
|
if not create_joined_success:
|
1785
1796
|
_ = clean_up_temp_tables()
|
@@ -1790,14 +1801,14 @@ def sync_pipe_inplace(
|
|
1790
1801
|
+ (', '.join([
|
1791
1802
|
(
|
1792
1803
|
"CASE\n WHEN " + sql_item_name(c + '_delta', self.flavor, None)
|
1793
|
-
+ " != " + get_null_replacement(typ, self.flavor)
|
1804
|
+
+ " != " + get_null_replacement(typ, self.flavor)
|
1794
1805
|
+ " THEN " + sql_item_name(c + '_delta', self.flavor, None)
|
1795
1806
|
+ "\n ELSE NULL\nEND "
|
1796
1807
|
+ " AS " + sql_item_name(c, self.flavor, None)
|
1797
1808
|
) for c, typ in delta_cols.items()
|
1798
1809
|
]))
|
1799
1810
|
+ f"\nFROM {temp_table_names['joined']}\n"
|
1800
|
-
+
|
1811
|
+
+ "WHERE "
|
1801
1812
|
+ '\nAND\n'.join([
|
1802
1813
|
(
|
1803
1814
|
sql_item_name(c + '_backtrack', self.flavor, None) + ' IS NULL'
|
@@ -1813,8 +1824,8 @@ def sync_pipe_inplace(
|
|
1813
1824
|
(create_unseen_success, create_unseen_msg), create_unseen_results = session_execute(
|
1814
1825
|
session,
|
1815
1826
|
create_unseen_query,
|
1816
|
-
with_results
|
1817
|
-
debug
|
1827
|
+
with_results=True,
|
1828
|
+
debug=debug
|
1818
1829
|
) if not upsert else (True, "Success"), None
|
1819
1830
|
if not create_unseen_success:
|
1820
1831
|
_ = clean_up_temp_tables()
|
@@ -1832,7 +1843,7 @@ def sync_pipe_inplace(
|
|
1832
1843
|
) for c, typ in delta_cols.items()
|
1833
1844
|
]))
|
1834
1845
|
+ f"\nFROM {temp_table_names['joined']}\n"
|
1835
|
-
+
|
1846
|
+
+ "WHERE "
|
1836
1847
|
+ '\nOR\n'.join([
|
1837
1848
|
(
|
1838
1849
|
sql_item_name(c + '_backtrack', self.flavor, None) + ' IS NOT NULL'
|
@@ -1849,8 +1860,8 @@ def sync_pipe_inplace(
|
|
1849
1860
|
(create_update_success, create_update_msg), create_update_results = session_execute(
|
1850
1861
|
session,
|
1851
1862
|
create_update_query,
|
1852
|
-
with_results
|
1853
|
-
debug
|
1863
|
+
with_results=True,
|
1864
|
+
debug=debug,
|
1854
1865
|
) if on_cols and not upsert else ((True, "Success"), [])
|
1855
1866
|
apply_update_queries = (
|
1856
1867
|
get_update_queries(
|
@@ -1858,12 +1869,12 @@ def sync_pipe_inplace(
|
|
1858
1869
|
temp_tables['update'],
|
1859
1870
|
session,
|
1860
1871
|
on_cols,
|
1861
|
-
upsert
|
1862
|
-
schema
|
1863
|
-
patch_schema
|
1864
|
-
datetime_col
|
1865
|
-
flavor
|
1866
|
-
debug
|
1872
|
+
upsert=upsert,
|
1873
|
+
schema=self.get_pipe_schema(pipe),
|
1874
|
+
patch_schema=internal_schema,
|
1875
|
+
datetime_col=pipe.columns.get('datetime', None),
|
1876
|
+
flavor=self.flavor,
|
1877
|
+
debug=debug,
|
1867
1878
|
)
|
1868
1879
|
if on_cols else []
|
1869
1880
|
)
|
@@ -1883,8 +1894,8 @@ def sync_pipe_inplace(
|
|
1883
1894
|
(apply_unseen_success, apply_unseen_msg), apply_unseen_results = session_execute(
|
1884
1895
|
session,
|
1885
1896
|
apply_unseen_queries,
|
1886
|
-
with_results
|
1887
|
-
debug
|
1897
|
+
with_results=True,
|
1898
|
+
debug=debug,
|
1888
1899
|
) if not upsert else (True, "Success"), None
|
1889
1900
|
if not apply_unseen_success:
|
1890
1901
|
_ = clean_up_temp_tables()
|
@@ -1894,8 +1905,8 @@ def sync_pipe_inplace(
|
|
1894
1905
|
(apply_update_success, apply_update_msg), apply_update_results = session_execute(
|
1895
1906
|
session,
|
1896
1907
|
apply_update_queries,
|
1897
|
-
with_results
|
1898
|
-
debug
|
1908
|
+
with_results=True,
|
1909
|
+
debug=debug,
|
1899
1910
|
)
|
1900
1911
|
if not apply_update_success:
|
1901
1912
|
_ = clean_up_temp_tables()
|
@@ -2064,7 +2075,7 @@ def get_pipe_rowcount(
|
|
2064
2075
|
----------
|
2065
2076
|
pipe: mrsm.Pipe
|
2066
2077
|
The pipe to query with.
|
2067
|
-
|
2078
|
+
|
2068
2079
|
begin: Union[datetime, int, None], default None
|
2069
2080
|
The begin datetime value.
|
2070
2081
|
|
@@ -2113,14 +2124,14 @@ def get_pipe_rowcount(
|
|
2113
2124
|
warn(
|
2114
2125
|
f"No datetime could be determined for {pipe}."
|
2115
2126
|
+ "\n Ignoring begin and end...",
|
2116
|
-
stack
|
2127
|
+
stack=False,
|
2117
2128
|
)
|
2118
2129
|
begin, end = None, None
|
2119
2130
|
else:
|
2120
2131
|
warn(
|
2121
2132
|
f"A datetime wasn't specified for {pipe}.\n"
|
2122
2133
|
+ f" Using column \"{_dt}\" for datetime bounds...",
|
2123
|
-
stack
|
2134
|
+
stack=False,
|
2124
2135
|
)
|
2125
2136
|
|
2126
2137
|
|
@@ -2176,6 +2187,8 @@ def get_pipe_rowcount(
|
|
2176
2187
|
FROM ({src}) AS src
|
2177
2188
|
"""
|
2178
2189
|
)
|
2190
|
+
print(f"{begin=}")
|
2191
|
+
print(f"{end=}")
|
2179
2192
|
if begin is not None or end is not None:
|
2180
2193
|
query += "WHERE"
|
2181
2194
|
if begin is not None:
|
@@ -2198,7 +2211,7 @@ def get_pipe_rowcount(
|
|
2198
2211
|
else 'WHERE'
|
2199
2212
|
)
|
2200
2213
|
)
|
2201
|
-
|
2214
|
+
|
2202
2215
|
result = self.value(query, debug=debug, silent=True)
|
2203
2216
|
try:
|
2204
2217
|
return int(result)
|
@@ -2207,11 +2220,11 @@ def get_pipe_rowcount(
|
|
2207
2220
|
|
2208
2221
|
|
2209
2222
|
def drop_pipe(
|
2210
|
-
|
2211
|
-
|
2212
|
-
|
2213
|
-
|
2214
|
-
|
2223
|
+
self,
|
2224
|
+
pipe: mrsm.Pipe,
|
2225
|
+
debug: bool = False,
|
2226
|
+
**kw
|
2227
|
+
) -> SuccessTuple:
|
2215
2228
|
"""
|
2216
2229
|
Drop a pipe's tables but maintain its registration.
|
2217
2230
|
|
@@ -2219,30 +2232,36 @@ def drop_pipe(
|
|
2219
2232
|
----------
|
2220
2233
|
pipe: mrsm.Pipe
|
2221
2234
|
The pipe to drop.
|
2222
|
-
|
2235
|
+
|
2236
|
+
Returns
|
2237
|
+
-------
|
2238
|
+
A `SuccessTuple` indicated success.
|
2223
2239
|
"""
|
2224
|
-
from meerschaum.utils.sql import table_exists, sql_item_name
|
2240
|
+
from meerschaum.utils.sql import table_exists, sql_item_name, DROP_IF_EXISTS_FLAVORS
|
2225
2241
|
success = True
|
2226
2242
|
target = pipe.target
|
2227
2243
|
target_name = (
|
2228
2244
|
sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
|
2229
2245
|
)
|
2230
2246
|
if table_exists(target, self, debug=debug):
|
2231
|
-
|
2247
|
+
if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
|
2248
|
+
success = self.exec(
|
2249
|
+
f"DROP TABLE {if_exists_str} {target_name}", silent=True, debug=debug
|
2250
|
+
) is not None
|
2232
2251
|
|
2233
2252
|
msg = "Success" if success else f"Failed to drop {pipe}."
|
2234
2253
|
return success, msg
|
2235
2254
|
|
2236
2255
|
|
2237
2256
|
def clear_pipe(
|
2238
|
-
|
2239
|
-
|
2240
|
-
|
2241
|
-
|
2242
|
-
|
2243
|
-
|
2244
|
-
|
2245
|
-
|
2257
|
+
self,
|
2258
|
+
pipe: mrsm.Pipe,
|
2259
|
+
begin: Union[datetime, int, None] = None,
|
2260
|
+
end: Union[datetime, int, None] = None,
|
2261
|
+
params: Optional[Dict[str, Any]] = None,
|
2262
|
+
debug: bool = False,
|
2263
|
+
**kw
|
2264
|
+
) -> SuccessTuple:
|
2246
2265
|
"""
|
2247
2266
|
Delete a pipe's data within a bounded or unbounded interval without dropping the table.
|
2248
2267
|
|
@@ -2535,7 +2554,7 @@ def get_alter_columns_queries(
|
|
2535
2554
|
"""
|
2536
2555
|
if not pipe.exists(debug=debug):
|
2537
2556
|
return []
|
2538
|
-
from meerschaum.utils.sql import sql_item_name
|
2557
|
+
from meerschaum.utils.sql import sql_item_name, DROP_IF_EXISTS_FLAVORS
|
2539
2558
|
from meerschaum.utils.dataframe import get_numeric_cols
|
2540
2559
|
from meerschaum.utils.dtypes import are_dtypes_equal
|
2541
2560
|
from meerschaum.utils.dtypes.sql import (
|
@@ -2691,7 +2710,9 @@ def get_alter_columns_queries(
|
|
2691
2710
|
f"\nFROM {sql_item_name(temp_table_name, self.flavor, self.get_pipe_schema(pipe))}"
|
2692
2711
|
)
|
2693
2712
|
|
2694
|
-
|
2713
|
+
if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
|
2714
|
+
|
2715
|
+
drop_query = f"DROP TABLE {if_exists_str}" + sql_item_name(
|
2695
2716
|
temp_table_name, self.flavor, self.get_pipe_schema(pipe)
|
2696
2717
|
)
|
2697
2718
|
return [
|
@@ -2882,6 +2903,7 @@ def deduplicate_pipe(
|
|
2882
2903
|
NO_CTE_FLAVORS,
|
2883
2904
|
get_rename_table_queries,
|
2884
2905
|
NO_SELECT_INTO_FLAVORS,
|
2906
|
+
DROP_IF_EXISTS_FLAVORS,
|
2885
2907
|
get_create_table_query,
|
2886
2908
|
format_cte_subquery,
|
2887
2909
|
get_null_replacement,
|
@@ -3012,6 +3034,7 @@ def deduplicate_pipe(
|
|
3012
3034
|
) + f"""
|
3013
3035
|
ORDER BY {index_list_str_ordered}
|
3014
3036
|
"""
|
3037
|
+
if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
|
3015
3038
|
alter_queries = flatten_list([
|
3016
3039
|
get_rename_table_queries(
|
3017
3040
|
pipe.target, temp_old_table, self.flavor, schema=self.get_pipe_schema(pipe)
|
@@ -3020,7 +3043,7 @@ def deduplicate_pipe(
|
|
3020
3043
|
dedup_table, pipe.target, self.flavor, schema=self.get_pipe_schema(pipe)
|
3021
3044
|
),
|
3022
3045
|
f"""
|
3023
|
-
DROP TABLE {temp_old_table_name}
|
3046
|
+
DROP TABLE {if_exists_str} {temp_old_table_name}
|
3024
3047
|
""",
|
3025
3048
|
])
|
3026
3049
|
|
@@ -3030,9 +3053,9 @@ def deduplicate_pipe(
|
|
3030
3053
|
|
3031
3054
|
results = self.exec_queries(
|
3032
3055
|
alter_queries,
|
3033
|
-
break_on_error
|
3034
|
-
rollback
|
3035
|
-
debug
|
3056
|
+
break_on_error=True,
|
3057
|
+
rollback=True,
|
3058
|
+
debug=debug,
|
3036
3059
|
)
|
3037
3060
|
|
3038
3061
|
fail_query = None
|
@@ -753,6 +753,7 @@ def to_sql(
|
|
753
753
|
table_exists,
|
754
754
|
json_flavors,
|
755
755
|
truncate_item_name,
|
756
|
+
DROP_IF_EXISTS_FLAVORS,
|
756
757
|
)
|
757
758
|
from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
|
758
759
|
from meerschaum.utils.dtypes import are_dtypes_equal, quantize_decimal
|
@@ -827,12 +828,13 @@ def to_sql(
|
|
827
828
|
'parallel': True,
|
828
829
|
})
|
829
830
|
|
831
|
+
if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
|
830
832
|
if self.flavor == 'oracle':
|
831
833
|
### For some reason 'replace' doesn't work properly in pandas,
|
832
834
|
### so try dropping first.
|
833
835
|
if if_exists == 'replace' and table_exists(name, self, schema=schema, debug=debug):
|
834
836
|
success = self.exec(
|
835
|
-
"DROP TABLE " + sql_item_name(name, 'oracle', schema)
|
837
|
+
f"DROP TABLE {if_exists_str}" + sql_item_name(name, 'oracle', schema)
|
836
838
|
) is not None
|
837
839
|
if not success:
|
838
840
|
warn(f"Unable to drop {name}")
|
meerschaum/core/Pipe/_data.py
CHANGED
@@ -515,15 +515,14 @@ def get_backtrack_data(
|
|
515
515
|
)
|
516
516
|
|
517
517
|
|
518
|
-
|
519
518
|
def get_rowcount(
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
519
|
+
self,
|
520
|
+
begin: Union[datetime, int, None] = None,
|
521
|
+
end: Union[datetime, int, None] = None,
|
522
|
+
params: Optional[Dict[str, Any]] = None,
|
523
|
+
remote: bool = False,
|
524
|
+
debug: bool = False
|
525
|
+
) -> int:
|
527
526
|
"""
|
528
527
|
Get a Pipe's instance or remote rowcount.
|
529
528
|
|
@@ -556,11 +555,11 @@ def get_rowcount(
|
|
556
555
|
with Venv(get_connector_plugin(connector)):
|
557
556
|
rowcount = connector.get_pipe_rowcount(
|
558
557
|
self,
|
559
|
-
begin
|
560
|
-
end
|
561
|
-
params
|
562
|
-
remote
|
563
|
-
debug
|
558
|
+
begin=begin,
|
559
|
+
end=end,
|
560
|
+
params=params,
|
561
|
+
remote=remote,
|
562
|
+
debug=debug,
|
564
563
|
)
|
565
564
|
if rowcount is None:
|
566
565
|
return 0
|
meerschaum/core/Pipe/_sync.py
CHANGED
meerschaum/utils/dataframe.py
CHANGED
@@ -61,12 +61,10 @@ def add_missing_cols_to_df(
|
|
61
61
|
if set(df.columns) == set(dtypes):
|
62
62
|
return df
|
63
63
|
|
64
|
-
import
|
65
|
-
from meerschaum.utils.packages import import_pandas, attempt_import
|
66
|
-
from meerschaum.utils.warnings import warn
|
64
|
+
from meerschaum.utils.packages import attempt_import
|
67
65
|
from meerschaum.utils.dtypes import to_pandas_dtype
|
68
66
|
pandas = attempt_import('pandas')
|
69
|
-
|
67
|
+
|
70
68
|
def build_series(dtype: str):
|
71
69
|
return pandas.Series([], dtype=to_pandas_dtype(dtype))
|
72
70
|
|
@@ -75,7 +73,10 @@ def add_missing_cols_to_df(
|
|
75
73
|
for col, typ in dtypes.items()
|
76
74
|
if col not in df.columns
|
77
75
|
}
|
78
|
-
|
76
|
+
df_with_cols = df.assign(**assign_kwargs)
|
77
|
+
for col in assign_kwargs:
|
78
|
+
df_with_cols[col] = df_with_cols[col].fillna(pandas.NA)
|
79
|
+
return df_with_cols
|
79
80
|
|
80
81
|
|
81
82
|
def filter_unseen_df(
|
@@ -152,6 +153,7 @@ def filter_unseen_df(
|
|
152
153
|
is_dask = 'dask' in new_df.__module__
|
153
154
|
if is_dask:
|
154
155
|
pandas = attempt_import('pandas')
|
156
|
+
_ = attempt_import('partd', lazy=False)
|
155
157
|
dd = attempt_import('dask.dataframe')
|
156
158
|
merge = dd.merge
|
157
159
|
NA = pandas.NA
|
@@ -301,21 +303,28 @@ def filter_unseen_df(
|
|
301
303
|
lambda x: f'{x:f}' if isinstance(x, Decimal) else x
|
302
304
|
)
|
303
305
|
|
306
|
+
old_dt_cols = [
|
307
|
+
col
|
308
|
+
for col, typ in old_df.dtypes.items()
|
309
|
+
if are_dtypes_equal(str(typ), 'datetime')
|
310
|
+
]
|
311
|
+
for col in old_dt_cols:
|
312
|
+
old_df[col] = coerce_timezone(old_df[col])
|
313
|
+
|
314
|
+
new_dt_cols = [
|
315
|
+
col
|
316
|
+
for col, typ in old_df.dtypes.items()
|
317
|
+
if are_dtypes_equal(str(typ), 'datetime')
|
318
|
+
]
|
319
|
+
for col in new_dt_cols:
|
320
|
+
new_df[col] = coerce_timezone(new_df[col])
|
321
|
+
|
304
322
|
old_uuid_cols = get_uuid_cols(old_df)
|
305
323
|
new_uuid_cols = get_uuid_cols(new_df)
|
306
324
|
uuid_cols = set(new_uuid_cols + old_uuid_cols)
|
307
|
-
for uuid_col in old_uuid_cols:
|
308
|
-
old_df[uuid_col] = old_df[uuid_col].apply(
|
309
|
-
lambda x: f'{x}' if isinstance(x, UUID) else x
|
310
|
-
)
|
311
|
-
for uuid_col in new_uuid_cols:
|
312
|
-
new_df[uuid_col] = new_df[uuid_col].apply(
|
313
|
-
lambda x: f'{x}' if isinstance(x, UUID) else x
|
314
|
-
)
|
315
|
-
|
316
325
|
joined_df = merge(
|
317
|
-
new_df.fillna(NA),
|
318
|
-
old_df.fillna(NA),
|
326
|
+
new_df.infer_objects(copy=False).fillna(NA),
|
327
|
+
old_df.infer_objects(copy=False).fillna(NA),
|
319
328
|
how='left',
|
320
329
|
on=None,
|
321
330
|
indicator=True,
|
@@ -558,10 +567,10 @@ def get_json_cols(df: 'pd.DataFrame') -> List[str]:
|
|
558
567
|
-------
|
559
568
|
A list of columns to be encoded as JSON.
|
560
569
|
"""
|
561
|
-
is_dask = 'dask' in df.__module__
|
570
|
+
is_dask = 'dask' in df.__module__ if hasattr(df, '__module__') else False
|
562
571
|
if is_dask:
|
563
572
|
df = get_first_valid_dask_partition(df)
|
564
|
-
|
573
|
+
|
565
574
|
if len(df) == 0:
|
566
575
|
return []
|
567
576
|
|
@@ -618,12 +627,12 @@ def get_numeric_cols(df: 'pd.DataFrame') -> List[str]:
|
|
618
627
|
|
619
628
|
def get_uuid_cols(df: 'pd.DataFrame') -> List[str]:
|
620
629
|
"""
|
621
|
-
Get the columns which contain `
|
630
|
+
Get the columns which contain `uuid.UUID` objects from a Pandas DataFrame.
|
622
631
|
|
623
632
|
Parameters
|
624
633
|
----------
|
625
634
|
df: pd.DataFrame
|
626
|
-
The DataFrame which may contain
|
635
|
+
The DataFrame which may contain UUID objects.
|
627
636
|
|
628
637
|
Returns
|
629
638
|
-------
|
@@ -699,6 +708,7 @@ def enforce_dtypes(
|
|
699
708
|
is_dtype_numeric,
|
700
709
|
attempt_cast_to_numeric,
|
701
710
|
attempt_cast_to_uuid,
|
711
|
+
coerce_timezone,
|
702
712
|
)
|
703
713
|
if safe_copy:
|
704
714
|
df = df.copy()
|
@@ -1065,6 +1075,7 @@ def get_first_valid_dask_partition(ddf: 'dask.dataframe.DataFrame') -> Union['pd
|
|
1065
1075
|
continue
|
1066
1076
|
if len(pdf) > 0:
|
1067
1077
|
return pdf
|
1078
|
+
_ = mrsm.attempt_import('partd', lazy=False)
|
1068
1079
|
return ddf.compute()
|
1069
1080
|
|
1070
1081
|
|
@@ -1171,9 +1182,9 @@ def query_df(
|
|
1171
1182
|
dtypes = {col: str(typ) for col, typ in df.dtypes.items()}
|
1172
1183
|
|
1173
1184
|
if inplace:
|
1174
|
-
df.fillna(NA, inplace=True)
|
1185
|
+
df.infer_objects(copy=False).fillna(NA, inplace=True)
|
1175
1186
|
else:
|
1176
|
-
df = df.fillna(NA)
|
1187
|
+
df = df.infer_objects(copy=False).fillna(NA)
|
1177
1188
|
|
1178
1189
|
if isinstance(begin, str):
|
1179
1190
|
begin = dateutil_parser.parse(begin)
|
@@ -1346,7 +1357,7 @@ def to_json(
|
|
1346
1357
|
df = df.copy()
|
1347
1358
|
for col in uuid_cols:
|
1348
1359
|
df[col] = df[col].astype(str)
|
1349
|
-
return df.fillna(pd.NA).to_json(
|
1360
|
+
return df.infer_objects(copy=False).fillna(pd.NA).to_json(
|
1350
1361
|
date_format=date_format,
|
1351
1362
|
date_unit=date_unit,
|
1352
1363
|
orient=orient,
|