meerschaum 2.7.2__py3-none-any.whl → 2.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/_internal/arguments/_parse_arguments.py +2 -0
- meerschaum/_internal/arguments/_parser.py +17 -11
- meerschaum/actions/clear.py +1 -1
- meerschaum/actions/edit.py +1 -1
- meerschaum/actions/verify.py +18 -21
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/sql/_fetch.py +45 -26
- meerschaum/connectors/sql/_instance.py +4 -4
- meerschaum/connectors/sql/_pipes.py +135 -103
- meerschaum/core/Pipe/_attributes.py +1 -1
- meerschaum/core/Pipe/_dtypes.py +9 -9
- meerschaum/core/Pipe/_fetch.py +2 -3
- meerschaum/core/Pipe/_sync.py +11 -3
- meerschaum/core/Pipe/_verify.py +9 -5
- meerschaum/jobs/__init__.py +1 -1
- meerschaum/utils/dataframe.py +10 -2
- meerschaum/utils/dtypes/sql.py +1 -1
- meerschaum/utils/formatting/__init__.py +5 -25
- meerschaum/utils/formatting/_pipes.py +9 -6
- meerschaum/utils/sql.py +156 -87
- meerschaum/utils/venv/__init__.py +44 -6
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.3.dist-info}/METADATA +1 -1
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.3.dist-info}/RECORD +29 -29
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.3.dist-info}/LICENSE +0 -0
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.3.dist-info}/NOTICE +0 -0
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.3.dist-info}/WHEEL +0 -0
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.3.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.3.dist-info}/top_level.txt +0 -0
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.3.dist-info}/zip-safe +0 -0
@@ -391,7 +391,7 @@ def get_create_index_queries(
|
|
391
391
|
from meerschaum.utils.sql import (
|
392
392
|
sql_item_name,
|
393
393
|
get_distinct_col_count,
|
394
|
-
|
394
|
+
UPDATE_QUERIES,
|
395
395
|
get_null_replacement,
|
396
396
|
get_create_table_queries,
|
397
397
|
get_rename_table_queries,
|
@@ -405,7 +405,7 @@ def get_create_index_queries(
|
|
405
405
|
from meerschaum.config import get_config
|
406
406
|
index_queries = {}
|
407
407
|
|
408
|
-
upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in
|
408
|
+
upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in UPDATE_QUERIES
|
409
409
|
static = pipe.parameters.get('static', False)
|
410
410
|
index_names = pipe.get_indices()
|
411
411
|
indices = pipe.indices
|
@@ -512,7 +512,7 @@ def get_create_index_queries(
|
|
512
512
|
+ 'if_not_exists => true, '
|
513
513
|
+ "migrate_data => true);"
|
514
514
|
)
|
515
|
-
elif _datetime_index_name:
|
515
|
+
elif _datetime_index_name and _datetime != primary_key:
|
516
516
|
if self.flavor == 'mssql':
|
517
517
|
dt_query = (
|
518
518
|
f"CREATE {datetime_clustered} INDEX {_datetime_index_name} "
|
@@ -1105,12 +1105,13 @@ def get_pipe_data_query(
|
|
1105
1105
|
from meerschaum.utils.misc import items_str
|
1106
1106
|
from meerschaum.utils.sql import sql_item_name, dateadd_str
|
1107
1107
|
from meerschaum.utils.dtypes import coerce_timezone
|
1108
|
-
from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
|
1108
|
+
from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type, get_db_type_from_pd_type
|
1109
1109
|
|
1110
1110
|
dt_col = pipe.columns.get('datetime', None)
|
1111
1111
|
existing_cols = pipe.get_columns_types(debug=debug) if pipe.enforce else []
|
1112
1112
|
skip_existing_cols_check = skip_existing_cols_check or not pipe.enforce
|
1113
1113
|
dt_typ = get_pd_type_from_db_type(existing_cols[dt_col]) if dt_col in existing_cols else None
|
1114
|
+
dt_db_type = get_db_type_from_pd_type(dt_typ, self.flavor) if dt_typ else None
|
1114
1115
|
select_columns = (
|
1115
1116
|
[col for col in existing_cols]
|
1116
1117
|
if not select_columns
|
@@ -1200,6 +1201,7 @@ def get_pipe_data_query(
|
|
1200
1201
|
datepart='minute',
|
1201
1202
|
number=begin_add_minutes,
|
1202
1203
|
begin=begin,
|
1204
|
+
db_type=dt_db_type,
|
1203
1205
|
)
|
1204
1206
|
where += f"\n {dt} >= {begin_da}" + ("\n AND\n " if end is not None else "")
|
1205
1207
|
is_dt_bound = True
|
@@ -1211,7 +1213,8 @@ def get_pipe_data_query(
|
|
1211
1213
|
flavor=self.flavor,
|
1212
1214
|
datepart='minute',
|
1213
1215
|
number=end_add_minutes,
|
1214
|
-
begin=end
|
1216
|
+
begin=end,
|
1217
|
+
db_type=dt_db_type,
|
1215
1218
|
)
|
1216
1219
|
where += f"{dt} < {end_da}"
|
1217
1220
|
is_dt_bound = True
|
@@ -1362,7 +1365,18 @@ def create_pipe_table_from_df(
|
|
1362
1365
|
get_bytes_cols,
|
1363
1366
|
)
|
1364
1367
|
from meerschaum.utils.sql import get_create_table_queries, sql_item_name
|
1368
|
+
from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
|
1365
1369
|
primary_key = pipe.columns.get('primary', None)
|
1370
|
+
primary_key_typ = (
|
1371
|
+
pipe.dtypes.get(primary_key, str(df.dtypes.get(primary_key)))
|
1372
|
+
if primary_key
|
1373
|
+
else None
|
1374
|
+
)
|
1375
|
+
primary_key_db_type = (
|
1376
|
+
get_db_type_from_pd_type(primary_key_typ, self.flavor)
|
1377
|
+
if primary_key
|
1378
|
+
else None
|
1379
|
+
)
|
1366
1380
|
dt_col = pipe.columns.get('datetime', None)
|
1367
1381
|
new_dtypes = {
|
1368
1382
|
**{
|
@@ -1413,6 +1427,7 @@ def create_pipe_table_from_df(
|
|
1413
1427
|
self.flavor,
|
1414
1428
|
schema=self.get_pipe_schema(pipe),
|
1415
1429
|
primary_key=primary_key,
|
1430
|
+
primary_key_db_type=primary_key_db_type,
|
1416
1431
|
datetime_column=dt_col,
|
1417
1432
|
)
|
1418
1433
|
success = all(
|
@@ -1487,7 +1502,7 @@ def sync_pipe(
|
|
1487
1502
|
from meerschaum.utils.sql import (
|
1488
1503
|
get_update_queries,
|
1489
1504
|
sql_item_name,
|
1490
|
-
|
1505
|
+
UPDATE_QUERIES,
|
1491
1506
|
get_reset_autoincrement_queries,
|
1492
1507
|
)
|
1493
1508
|
from meerschaum.utils.misc import generate_password
|
@@ -1563,7 +1578,7 @@ def sync_pipe(
|
|
1563
1578
|
if not infer_bool_success:
|
1564
1579
|
return infer_bool_success, infer_bool_msg
|
1565
1580
|
|
1566
|
-
upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in
|
1581
|
+
upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in UPDATE_QUERIES
|
1567
1582
|
if upsert:
|
1568
1583
|
check_existing = False
|
1569
1584
|
kw['safe_copy'] = kw.get('safe_copy', False)
|
@@ -1794,12 +1809,12 @@ def sync_pipe(
|
|
1794
1809
|
update_count = len(update_df.index) if update_df is not None else 0
|
1795
1810
|
msg = (
|
1796
1811
|
(
|
1797
|
-
f"Inserted {unseen_count}, "
|
1798
|
-
+ f"updated {update_count} rows."
|
1812
|
+
f"Inserted {unseen_count:,}, "
|
1813
|
+
+ f"updated {update_count:,} rows."
|
1799
1814
|
)
|
1800
1815
|
if not upsert
|
1801
1816
|
else (
|
1802
|
-
f"Upserted {update_count} row"
|
1817
|
+
f"Upserted {update_count:,} row"
|
1803
1818
|
+ ('s' if update_count != 1 else '')
|
1804
1819
|
+ "."
|
1805
1820
|
)
|
@@ -1886,10 +1901,12 @@ def sync_pipe_inplace(
|
|
1886
1901
|
get_create_table_queries,
|
1887
1902
|
get_table_cols_types,
|
1888
1903
|
session_execute,
|
1889
|
-
|
1904
|
+
dateadd_str,
|
1905
|
+
UPDATE_QUERIES,
|
1890
1906
|
)
|
1891
1907
|
from meerschaum.utils.dtypes.sql import (
|
1892
1908
|
get_pd_type_from_db_type,
|
1909
|
+
get_db_type_from_pd_type,
|
1893
1910
|
)
|
1894
1911
|
from meerschaum.utils.misc import generate_password
|
1895
1912
|
|
@@ -1905,13 +1922,14 @@ def sync_pipe_inplace(
|
|
1905
1922
|
for table_root in temp_table_roots
|
1906
1923
|
}
|
1907
1924
|
temp_table_names = {
|
1908
|
-
table_root: sql_item_name(
|
1909
|
-
table_name_raw,
|
1910
|
-
self.flavor,
|
1911
|
-
internal_schema,
|
1912
|
-
)
|
1925
|
+
table_root: sql_item_name(table_name_raw, self.flavor, internal_schema)
|
1913
1926
|
for table_root, table_name_raw in temp_tables.items()
|
1914
1927
|
}
|
1928
|
+
temp_table_aliases = {
|
1929
|
+
table_root: sql_item_name(table_root, self.flavor)
|
1930
|
+
for table_root in temp_table_roots
|
1931
|
+
}
|
1932
|
+
table_alias_as = " AS" if self.flavor != 'oracle' else ''
|
1915
1933
|
metadef = self.get_pipe_metadef(
|
1916
1934
|
pipe,
|
1917
1935
|
params=params,
|
@@ -1921,13 +1939,21 @@ def sync_pipe_inplace(
|
|
1921
1939
|
debug=debug,
|
1922
1940
|
)
|
1923
1941
|
pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
|
1924
|
-
upsert = pipe.parameters.get('upsert', False) and f'{self.flavor}-upsert' in
|
1942
|
+
upsert = pipe.parameters.get('upsert', False) and f'{self.flavor}-upsert' in UPDATE_QUERIES
|
1925
1943
|
static = pipe.parameters.get('static', False)
|
1926
1944
|
database = getattr(self, 'database', self.parse_uri(self.URI).get('database', None))
|
1927
1945
|
primary_key = pipe.columns.get('primary', None)
|
1946
|
+
primary_key_typ = pipe.dtypes.get(primary_key, None) if primary_key else None
|
1947
|
+
primary_key_db_type = (
|
1948
|
+
get_db_type_from_pd_type(primary_key_typ, self.flavor)
|
1949
|
+
if primary_key_typ
|
1950
|
+
else None
|
1951
|
+
)
|
1928
1952
|
autoincrement = pipe.parameters.get('autoincrement', False)
|
1929
1953
|
dt_col = pipe.columns.get('datetime', None)
|
1930
1954
|
dt_col_name = sql_item_name(dt_col, self.flavor, None) if dt_col else None
|
1955
|
+
dt_typ = pipe.dtypes.get(dt_col, 'datetime') if dt_col else None
|
1956
|
+
dt_db_type = get_db_type_from_pd_type(dt_typ, self.flavor) if dt_typ else None
|
1931
1957
|
|
1932
1958
|
def clean_up_temp_tables(ready_to_drop: bool = False):
|
1933
1959
|
log_success, log_msg = self._log_temporary_tables_creation(
|
@@ -1957,6 +1983,7 @@ def sync_pipe_inplace(
|
|
1957
1983
|
self.flavor,
|
1958
1984
|
schema=self.get_pipe_schema(pipe),
|
1959
1985
|
primary_key=primary_key,
|
1986
|
+
primary_key_db_type=primary_key_db_type,
|
1960
1987
|
autoincrement=autoincrement,
|
1961
1988
|
datetime_column=dt_col,
|
1962
1989
|
)
|
@@ -1970,7 +1997,7 @@ def sync_pipe_inplace(
|
|
1970
1997
|
|
1971
1998
|
rowcount = pipe.get_rowcount(debug=debug)
|
1972
1999
|
_ = clean_up_temp_tables()
|
1973
|
-
return True, f"Inserted {rowcount}, updated 0 rows."
|
2000
|
+
return True, f"Inserted {rowcount:,}, updated 0 rows."
|
1974
2001
|
|
1975
2002
|
session = sqlalchemy_orm.Session(self.engine)
|
1976
2003
|
connectable = session if self.flavor != 'duckdb' else self
|
@@ -2007,7 +2034,7 @@ def sync_pipe_inplace(
|
|
2007
2034
|
str(col_name): get_pd_type_from_db_type(str(col_type))
|
2008
2035
|
for col_name, col_type in new_cols_types.items()
|
2009
2036
|
}
|
2010
|
-
new_cols_str = '
|
2037
|
+
new_cols_str = '\n ' + ',\n '.join([
|
2011
2038
|
sql_item_name(col, self.flavor)
|
2012
2039
|
for col in new_cols
|
2013
2040
|
])
|
@@ -2030,7 +2057,8 @@ def sync_pipe_inplace(
|
|
2030
2057
|
insert_queries = [
|
2031
2058
|
(
|
2032
2059
|
f"INSERT INTO {pipe_name} ({new_cols_str})\n"
|
2033
|
-
|
2060
|
+
f"SELECT {new_cols_str}\nFROM {temp_table_names['new']}{table_alias_as}"
|
2061
|
+
f" {temp_table_aliases['new']}"
|
2034
2062
|
)
|
2035
2063
|
] if not check_existing and not upsert else []
|
2036
2064
|
|
@@ -2049,12 +2077,13 @@ def sync_pipe_inplace(
|
|
2049
2077
|
_ = clean_up_temp_tables()
|
2050
2078
|
return True, f"Inserted {new_count}, updated 0 rows."
|
2051
2079
|
|
2080
|
+
dt_col_name_da = dateadd_str(flavor=self.flavor, begin=dt_col_name, db_type=dt_db_type)
|
2052
2081
|
(new_dt_bounds_success, new_dt_bounds_msg), new_dt_bounds_results = session_execute(
|
2053
2082
|
session,
|
2054
2083
|
[
|
2055
2084
|
"SELECT\n"
|
2056
|
-
f" MIN({
|
2057
|
-
f" MAX({
|
2085
|
+
f" MIN({dt_col_name_da}) AS {sql_item_name('min_dt', self.flavor)},\n"
|
2086
|
+
f" MAX({dt_col_name_da}) AS {sql_item_name('max_dt', self.flavor)}\n"
|
2058
2087
|
f"FROM {temp_table_names['new' if not upsert else 'update']}\n"
|
2059
2088
|
f"WHERE {dt_col_name} IS NOT NULL"
|
2060
2089
|
],
|
@@ -2121,9 +2150,9 @@ def sync_pipe_inplace(
|
|
2121
2150
|
} if not primary_key or self.flavor == 'oracle' else {primary_key: new_cols.get(primary_key)}
|
2122
2151
|
|
2123
2152
|
null_replace_new_cols_str = (
|
2124
|
-
'
|
2125
|
-
f"COALESCE({
|
2126
|
-
+ get_null_replacement(get_col_typ(col,
|
2153
|
+
'\n ' + ',\n '.join([
|
2154
|
+
f"COALESCE({temp_table_aliases['new']}.{sql_item_name(col, self.flavor)}, "
|
2155
|
+
+ get_null_replacement(get_col_typ(col, new_cols_types), self.flavor)
|
2127
2156
|
+ ") AS "
|
2128
2157
|
+ sql_item_name(col, self.flavor, None)
|
2129
2158
|
for col, typ in new_cols.items()
|
@@ -2131,29 +2160,30 @@ def sync_pipe_inplace(
|
|
2131
2160
|
)
|
2132
2161
|
|
2133
2162
|
select_delta_query = (
|
2134
|
-
"SELECT
|
2135
|
-
+ null_replace_new_cols_str
|
2136
|
-
+ f"\nFROM {temp_table_names['new']}\n"
|
2137
|
-
+ f"LEFT OUTER JOIN {temp_table_names['backtrack']}
|
2138
|
-
+
|
2163
|
+
"SELECT"
|
2164
|
+
+ null_replace_new_cols_str
|
2165
|
+
+ f"\nFROM {temp_table_names['new']}{table_alias_as} {temp_table_aliases['new']}\n"
|
2166
|
+
+ f"LEFT OUTER JOIN {temp_table_names['backtrack']}{table_alias_as} {temp_table_aliases['backtrack']}"
|
2167
|
+
+ "\n ON\n "
|
2168
|
+
+ '\n AND\n '.join([
|
2139
2169
|
(
|
2140
|
-
f"COALESCE({
|
2170
|
+
f" COALESCE({temp_table_aliases['new']}."
|
2141
2171
|
+ sql_item_name(c, self.flavor, None)
|
2142
2172
|
+ ", "
|
2143
|
-
+ get_null_replacement(get_col_typ(c,
|
2144
|
-
+ ")
|
2145
|
-
+ '
|
2146
|
-
+ f"COALESCE({
|
2173
|
+
+ get_null_replacement(get_col_typ(c, new_cols_types), self.flavor)
|
2174
|
+
+ ")"
|
2175
|
+
+ '\n =\n '
|
2176
|
+
+ f" COALESCE({temp_table_aliases['backtrack']}."
|
2147
2177
|
+ sql_item_name(c, self.flavor, None)
|
2148
2178
|
+ ", "
|
2149
|
-
+ get_null_replacement(
|
2179
|
+
+ get_null_replacement(get_col_typ(c, backtrack_cols_types), self.flavor)
|
2150
2180
|
+ ") "
|
2151
2181
|
) for c in common_cols
|
2152
2182
|
])
|
2153
|
-
+ "\nWHERE\n"
|
2154
|
-
+ '\
|
2183
|
+
+ "\nWHERE\n "
|
2184
|
+
+ '\n AND\n '.join([
|
2155
2185
|
(
|
2156
|
-
f"{
|
2186
|
+
f"{temp_table_aliases['backtrack']}." + sql_item_name(c, self.flavor) + ' IS NULL'
|
2157
2187
|
) for c in common_cols
|
2158
2188
|
])
|
2159
2189
|
)
|
@@ -2196,37 +2226,33 @@ def sync_pipe_inplace(
|
|
2196
2226
|
])
|
2197
2227
|
|
2198
2228
|
select_joined_query = (
|
2199
|
-
"SELECT
|
2200
|
-
+ ('
|
2229
|
+
"SELECT\n "
|
2230
|
+
+ (',\n '.join([
|
2201
2231
|
(
|
2202
|
-
f"{
|
2232
|
+
f"{temp_table_aliases['delta']}." + sql_item_name(c, self.flavor, None)
|
2203
2233
|
+ " AS " + sql_item_name(c + '_delta', self.flavor, None)
|
2204
2234
|
) for c in delta_cols
|
2205
2235
|
]))
|
2206
|
-
+ "
|
2207
|
-
+ ('
|
2236
|
+
+ ",\n "
|
2237
|
+
+ (',\n '.join([
|
2208
2238
|
(
|
2209
|
-
f"{
|
2239
|
+
f"{temp_table_aliases['backtrack']}." + sql_item_name(c, self.flavor, None)
|
2210
2240
|
+ " AS " + sql_item_name(c + '_backtrack', self.flavor, None)
|
2211
2241
|
) for c in backtrack_cols_types
|
2212
2242
|
]))
|
2213
|
-
+ f"\nFROM {temp_table_names['delta']}\n"
|
2214
|
-
+ f"LEFT OUTER JOIN {temp_table_names['backtrack']}
|
2215
|
-
+ '
|
2243
|
+
+ f"\nFROM {temp_table_names['delta']}{table_alias_as} {temp_table_aliases['delta']}\n"
|
2244
|
+
+ f"LEFT OUTER JOIN {temp_table_names['backtrack']}{table_alias_as}"
|
2245
|
+
+ f" {temp_table_aliases['backtrack']}"
|
2246
|
+
+ "\n ON\n "
|
2247
|
+
+ '\n AND\n '.join([
|
2216
2248
|
(
|
2217
|
-
f"COALESCE({
|
2249
|
+
f" COALESCE({temp_table_aliases['delta']}." + sql_item_name(c, self.flavor)
|
2218
2250
|
+ ", "
|
2219
|
-
+ get_null_replacement(
|
2220
|
-
|
2221
|
-
|
2222
|
-
) + ")"
|
2223
|
-
+ ' = '
|
2224
|
-
+ f"COALESCE({temp_table_names['backtrack']}." + sql_item_name(c, self.flavor, None)
|
2251
|
+
+ get_null_replacement(get_col_typ(c, new_cols_types), self.flavor) + ")"
|
2252
|
+
+ '\n =\n '
|
2253
|
+
+ f" COALESCE({temp_table_aliases['backtrack']}." + sql_item_name(c, self.flavor)
|
2225
2254
|
+ ", "
|
2226
|
-
+ get_null_replacement(
|
2227
|
-
get_col_typ(c, on_cols),
|
2228
|
-
self.flavor
|
2229
|
-
) + ")"
|
2255
|
+
+ get_null_replacement(get_col_typ(c, new_cols_types), self.flavor) + ")"
|
2230
2256
|
) for c, typ in on_cols.items()
|
2231
2257
|
])
|
2232
2258
|
)
|
@@ -2247,19 +2273,19 @@ def sync_pipe_inplace(
|
|
2247
2273
|
return create_joined_success, create_joined_msg
|
2248
2274
|
|
2249
2275
|
select_unseen_query = (
|
2250
|
-
"SELECT
|
2251
|
-
+ ('
|
2276
|
+
"SELECT\n "
|
2277
|
+
+ (',\n '.join([
|
2252
2278
|
(
|
2253
|
-
"CASE\n
|
2254
|
-
+ " != " + get_null_replacement(get_col_typ(c,
|
2279
|
+
"CASE\n WHEN " + sql_item_name(c + '_delta', self.flavor, None)
|
2280
|
+
+ " != " + get_null_replacement(get_col_typ(c, delta_cols_types), self.flavor)
|
2255
2281
|
+ " THEN " + sql_item_name(c + '_delta', self.flavor, None)
|
2256
|
-
+ "\n
|
2282
|
+
+ "\n ELSE NULL\n END"
|
2257
2283
|
+ " AS " + sql_item_name(c, self.flavor, None)
|
2258
2284
|
) for c, typ in delta_cols.items()
|
2259
2285
|
]))
|
2260
|
-
+ f"\nFROM {temp_table_names['joined']}\n"
|
2261
|
-
+ "WHERE
|
2262
|
-
+ '\
|
2286
|
+
+ f"\nFROM {temp_table_names['joined']}{table_alias_as} {temp_table_aliases['joined']}\n"
|
2287
|
+
+ "WHERE\n "
|
2288
|
+
+ '\n AND\n '.join([
|
2263
2289
|
(
|
2264
2290
|
sql_item_name(c + '_backtrack', self.flavor, None) + ' IS NULL'
|
2265
2291
|
) for c in delta_cols
|
@@ -2282,19 +2308,19 @@ def sync_pipe_inplace(
|
|
2282
2308
|
return create_unseen_success, create_unseen_msg
|
2283
2309
|
|
2284
2310
|
select_update_query = (
|
2285
|
-
"SELECT
|
2286
|
-
+ ('
|
2311
|
+
"SELECT\n "
|
2312
|
+
+ (',\n '.join([
|
2287
2313
|
(
|
2288
|
-
"CASE\n
|
2289
|
-
+ " != " + get_null_replacement(get_col_typ(c,
|
2314
|
+
"CASE\n WHEN " + sql_item_name(c + '_delta', self.flavor, None)
|
2315
|
+
+ " != " + get_null_replacement(get_col_typ(c, delta_cols_types), self.flavor)
|
2290
2316
|
+ " THEN " + sql_item_name(c + '_delta', self.flavor, None)
|
2291
|
-
+ "\n
|
2317
|
+
+ "\n ELSE NULL\n END"
|
2292
2318
|
+ " AS " + sql_item_name(c, self.flavor, None)
|
2293
2319
|
) for c, typ in delta_cols.items()
|
2294
2320
|
]))
|
2295
|
-
+ f"\nFROM {temp_table_names['joined']}\n"
|
2296
|
-
+ "WHERE
|
2297
|
-
+ '\
|
2321
|
+
+ f"\nFROM {temp_table_names['joined']}{table_alias_as} {temp_table_aliases['joined']}\n"
|
2322
|
+
+ "WHERE\n "
|
2323
|
+
+ '\n OR\n '.join([
|
2298
2324
|
(
|
2299
2325
|
sql_item_name(c + '_backtrack', self.flavor, None) + ' IS NOT NULL'
|
2300
2326
|
) for c in delta_cols
|
@@ -2366,9 +2392,9 @@ def sync_pipe_inplace(
|
|
2366
2392
|
session.commit()
|
2367
2393
|
|
2368
2394
|
msg = (
|
2369
|
-
f"Inserted {unseen_count}, updated {update_count} rows."
|
2395
|
+
f"Inserted {unseen_count:,}, updated {update_count:,} rows."
|
2370
2396
|
if not upsert
|
2371
|
-
else f"Upserted {update_count} row" + ('s' if update_count != 1 else '') + "."
|
2397
|
+
else f"Upserted {update_count:,} row" + ('s' if update_count != 1 else '') + "."
|
2372
2398
|
)
|
2373
2399
|
_ = clean_up_temp_tables(ready_to_drop=True)
|
2374
2400
|
|
@@ -2540,6 +2566,7 @@ def get_pipe_rowcount(
|
|
2540
2566
|
"""
|
2541
2567
|
from meerschaum.utils.sql import dateadd_str, sql_item_name, wrap_query_with_cte
|
2542
2568
|
from meerschaum.connectors.sql._fetch import get_pipe_query
|
2569
|
+
from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
|
2543
2570
|
if remote:
|
2544
2571
|
msg = f"'fetch:definition' must be an attribute of {pipe} to get a remote rowcount."
|
2545
2572
|
if 'fetch' not in pipe.parameters:
|
@@ -2551,18 +2578,21 @@ def get_pipe_rowcount(
|
|
2551
2578
|
|
2552
2579
|
_pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
|
2553
2580
|
|
2554
|
-
|
2555
|
-
|
2556
|
-
|
2581
|
+
dt_col = pipe.columns.get('datetime', None)
|
2582
|
+
dt_typ = pipe.dtypes.get(dt_col, 'datetime') if dt_col else None
|
2583
|
+
dt_db_type = get_db_type_from_pd_type(dt_typ, self.flavor) if dt_typ else None
|
2584
|
+
if not dt_col:
|
2585
|
+
dt_col = pipe.guess_datetime()
|
2586
|
+
dt_name = sql_item_name(dt_col, self.flavor, None) if dt_col else None
|
2557
2587
|
is_guess = True
|
2558
2588
|
else:
|
2559
|
-
|
2560
|
-
|
2589
|
+
dt_col = pipe.get_columns('datetime')
|
2590
|
+
dt_name = sql_item_name(dt_col, self.flavor, None)
|
2561
2591
|
is_guess = False
|
2562
2592
|
|
2563
2593
|
if begin is not None or end is not None:
|
2564
2594
|
if is_guess:
|
2565
|
-
if
|
2595
|
+
if dt_col is None:
|
2566
2596
|
warn(
|
2567
2597
|
f"No datetime could be determined for {pipe}."
|
2568
2598
|
+ "\n Ignoring begin and end...",
|
@@ -2572,13 +2602,13 @@ def get_pipe_rowcount(
|
|
2572
2602
|
else:
|
2573
2603
|
warn(
|
2574
2604
|
f"A datetime wasn't specified for {pipe}.\n"
|
2575
|
-
+ f" Using column \"{
|
2605
|
+
+ f" Using column \"{dt_col}\" for datetime bounds...",
|
2576
2606
|
stack=False,
|
2577
2607
|
)
|
2578
2608
|
|
2579
2609
|
|
2580
2610
|
_datetime_name = sql_item_name(
|
2581
|
-
|
2611
|
+
dt_col,
|
2582
2612
|
(
|
2583
2613
|
pipe.instance_connector.flavor
|
2584
2614
|
if not remote
|
@@ -2598,8 +2628,8 @@ def get_pipe_rowcount(
|
|
2598
2628
|
)
|
2599
2629
|
for col in set(
|
2600
2630
|
(
|
2601
|
-
[
|
2602
|
-
if
|
2631
|
+
[dt_col]
|
2632
|
+
if dt_col
|
2603
2633
|
else []
|
2604
2634
|
)
|
2605
2635
|
+ (
|
@@ -2623,13 +2653,13 @@ def get_pipe_rowcount(
|
|
2623
2653
|
query += "\nWHERE"
|
2624
2654
|
if begin is not None:
|
2625
2655
|
query += f"""
|
2626
|
-
{
|
2656
|
+
{dt_name} >= {dateadd_str(self.flavor, datepart='minute', number=0, begin=begin, db_type=dt_db_type)}
|
2627
2657
|
"""
|
2628
2658
|
if end is not None and begin is not None:
|
2629
2659
|
query += "AND"
|
2630
2660
|
if end is not None:
|
2631
2661
|
query += f"""
|
2632
|
-
{
|
2662
|
+
{dt_name} < {dateadd_str(self.flavor, datepart='minute', number=0, begin=end, db_type=dt_db_type)}
|
2633
2663
|
"""
|
2634
2664
|
if params is not None:
|
2635
2665
|
from meerschaum.utils.sql import build_where
|
@@ -2715,31 +2745,35 @@ def clear_pipe(
|
|
2715
2745
|
return True, f"{pipe} does not exist, so nothing was cleared."
|
2716
2746
|
|
2717
2747
|
from meerschaum.utils.sql import sql_item_name, build_where, dateadd_str
|
2748
|
+
from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
|
2718
2749
|
pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
|
2719
2750
|
|
2751
|
+
dt_col = pipe.columns.get('datetime', None)
|
2752
|
+
dt_typ = pipe.dtypes.get(dt_col, 'datetime') if dt_col else None
|
2753
|
+
dt_db_type = get_db_type_from_pd_type(dt_typ, self.flavor) if dt_typ else None
|
2720
2754
|
if not pipe.columns.get('datetime', None):
|
2721
|
-
|
2722
|
-
dt_name = sql_item_name(
|
2755
|
+
dt_col = pipe.guess_datetime()
|
2756
|
+
dt_name = sql_item_name(dt_col, self.flavor, None) if dt_col else None
|
2723
2757
|
is_guess = True
|
2724
2758
|
else:
|
2725
|
-
|
2726
|
-
dt_name = sql_item_name(
|
2759
|
+
dt_col = pipe.get_columns('datetime')
|
2760
|
+
dt_name = sql_item_name(dt_col, self.flavor, None)
|
2727
2761
|
is_guess = False
|
2728
2762
|
|
2729
2763
|
if begin is not None or end is not None:
|
2730
2764
|
if is_guess:
|
2731
|
-
if
|
2765
|
+
if dt_col is None:
|
2732
2766
|
warn(
|
2733
2767
|
f"No datetime could be determined for {pipe}."
|
2734
2768
|
+ "\n Ignoring datetime bounds...",
|
2735
|
-
stack
|
2769
|
+
stack=False,
|
2736
2770
|
)
|
2737
2771
|
begin, end = None, None
|
2738
2772
|
else:
|
2739
2773
|
warn(
|
2740
2774
|
f"A datetime wasn't specified for {pipe}.\n"
|
2741
|
-
+ f" Using column \"{
|
2742
|
-
stack
|
2775
|
+
+ f" Using column \"{dt_col}\" for datetime bounds...",
|
2776
|
+
stack=False,
|
2743
2777
|
)
|
2744
2778
|
|
2745
2779
|
valid_params = {}
|
@@ -2750,10 +2784,10 @@ def clear_pipe(
|
|
2750
2784
|
f"DELETE FROM {pipe_name}\nWHERE 1 = 1\n"
|
2751
2785
|
+ (' AND ' + build_where(valid_params, self, with_where=False) if valid_params else '')
|
2752
2786
|
+ (
|
2753
|
-
f' AND {dt_name} >= ' + dateadd_str(self.flavor, 'day', 0, begin)
|
2787
|
+
f' AND {dt_name} >= ' + dateadd_str(self.flavor, 'day', 0, begin, db_type=dt_db_type)
|
2754
2788
|
if begin is not None else ''
|
2755
2789
|
) + (
|
2756
|
-
f' AND {dt_name} < ' + dateadd_str(self.flavor, 'day', 0, end)
|
2790
|
+
f' AND {dt_name} < ' + dateadd_str(self.flavor, 'day', 0, end, db_type=dt_db_type)
|
2757
2791
|
if end is not None else ''
|
2758
2792
|
)
|
2759
2793
|
)
|
@@ -3400,7 +3434,6 @@ def deduplicate_pipe(
|
|
3400
3434
|
|
3401
3435
|
### TODO: Handle deleting duplicates without a datetime axis.
|
3402
3436
|
dt_col = pipe.columns.get('datetime', None)
|
3403
|
-
dt_col_name = sql_item_name(dt_col, self.flavor, None)
|
3404
3437
|
cols_types = pipe.get_columns_types(debug=debug)
|
3405
3438
|
existing_cols = pipe.get_columns_types(debug=debug)
|
3406
3439
|
|
@@ -3417,7 +3450,6 @@ def deduplicate_pipe(
|
|
3417
3450
|
]
|
3418
3451
|
indices_names = [sql_item_name(index_col, self.flavor, None) for index_col in indices]
|
3419
3452
|
existing_cols_names = [sql_item_name(col, self.flavor, None) for col in existing_cols]
|
3420
|
-
duplicates_cte_name = sql_item_name('dups', self.flavor, None)
|
3421
3453
|
duplicate_row_number_name = sql_item_name('dup_row_num', self.flavor, None)
|
3422
3454
|
previous_row_number_name = sql_item_name('prev_row_num', self.flavor, None)
|
3423
3455
|
|
@@ -3450,7 +3482,7 @@ def deduplicate_pipe(
|
|
3450
3482
|
and
|
3451
3483
|
int(self.db_version.split('.')[0]) < 8
|
3452
3484
|
)
|
3453
|
-
except Exception
|
3485
|
+
except Exception:
|
3454
3486
|
is_old_mysql = False
|
3455
3487
|
|
3456
3488
|
src_query = f"""
|
@@ -3557,7 +3589,7 @@ def deduplicate_pipe(
|
|
3557
3589
|
(
|
3558
3590
|
f"Successfully deduplicated table {pipe_table_name}"
|
3559
3591
|
+ (
|
3560
|
-
f"\nfrom {old_rowcount} to {new_rowcount} rows"
|
3592
|
+
f"\nfrom {old_rowcount:,} to {new_rowcount:,} rows"
|
3561
3593
|
if old_rowcount != new_rowcount
|
3562
3594
|
else ''
|
3563
3595
|
)
|
@@ -310,7 +310,7 @@ def enforce(self, _enforce: bool) -> None:
|
|
310
310
|
"""
|
311
311
|
Set the `enforce` parameter for the pipe.
|
312
312
|
"""
|
313
|
-
self.parameters['
|
313
|
+
self.parameters['enforce'] = _enforce
|
314
314
|
|
315
315
|
|
316
316
|
def get_columns(self, *args: str, error: bool = False) -> Union[str, Tuple[str]]:
|
meerschaum/core/Pipe/_dtypes.py
CHANGED
@@ -20,6 +20,7 @@ def enforce_dtypes(
|
|
20
20
|
self,
|
21
21
|
df: 'pd.DataFrame',
|
22
22
|
chunksize: Optional[int] = -1,
|
23
|
+
enforce: bool = True,
|
23
24
|
safe_copy: bool = True,
|
24
25
|
debug: bool = False,
|
25
26
|
) -> 'pd.DataFrame':
|
@@ -31,7 +32,7 @@ def enforce_dtypes(
|
|
31
32
|
from meerschaum.utils.warnings import warn
|
32
33
|
from meerschaum.utils.debug import dprint
|
33
34
|
from meerschaum.utils.dataframe import parse_df_datetimes, enforce_dtypes as _enforce_dtypes
|
34
|
-
from meerschaum.utils.dtypes import are_dtypes_equal
|
35
|
+
from meerschaum.utils.dtypes import are_dtypes_equal
|
35
36
|
from meerschaum.utils.packages import import_pandas
|
36
37
|
pd = import_pandas(debug=debug)
|
37
38
|
if df is None:
|
@@ -42,11 +43,9 @@ def enforce_dtypes(
|
|
42
43
|
)
|
43
44
|
return df
|
44
45
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
if typ in MRSM_PD_DTYPES
|
49
|
-
}
|
46
|
+
if not self.enforce:
|
47
|
+
enforce = False
|
48
|
+
pipe_dtypes = self.dtypes if enforce else {}
|
50
49
|
|
51
50
|
try:
|
52
51
|
if isinstance(df, str):
|
@@ -55,8 +54,9 @@ def enforce_dtypes(
|
|
55
54
|
ignore_cols=[
|
56
55
|
col
|
57
56
|
for col, dtype in pipe_dtypes.items()
|
58
|
-
if
|
57
|
+
if (not enforce or not are_dtypes_equal(dtype, 'datetime'))
|
59
58
|
],
|
59
|
+
ignore_all=(not enforce),
|
60
60
|
strip_timezone=(self.tzinfo is None),
|
61
61
|
chunksize=chunksize,
|
62
62
|
debug=debug,
|
@@ -67,7 +67,7 @@ def enforce_dtypes(
|
|
67
67
|
ignore_cols=[
|
68
68
|
col
|
69
69
|
for col, dtype in pipe_dtypes.items()
|
70
|
-
if not are_dtypes_equal(str(dtype), 'datetime')
|
70
|
+
if (not enforce or not are_dtypes_equal(str(dtype), 'datetime'))
|
71
71
|
],
|
72
72
|
strip_timezone=(self.tzinfo is None),
|
73
73
|
chunksize=chunksize,
|
@@ -90,7 +90,7 @@ def enforce_dtypes(
|
|
90
90
|
pipe_dtypes,
|
91
91
|
safe_copy=safe_copy,
|
92
92
|
strip_timezone=(self.tzinfo is None),
|
93
|
-
coerce_timezone=
|
93
|
+
coerce_timezone=enforce,
|
94
94
|
debug=debug,
|
95
95
|
)
|
96
96
|
|
meerschaum/core/Pipe/_fetch.py
CHANGED
@@ -10,7 +10,7 @@ from __future__ import annotations
|
|
10
10
|
from datetime import timedelta, datetime
|
11
11
|
|
12
12
|
import meerschaum as mrsm
|
13
|
-
from meerschaum.utils.typing import
|
13
|
+
from meerschaum.utils.typing import Any, Union, SuccessTuple, Iterator, TYPE_CHECKING
|
14
14
|
from meerschaum.config import get_config
|
15
15
|
from meerschaum.utils.warnings import warn
|
16
16
|
|
@@ -56,8 +56,7 @@ def fetch(
|
|
56
56
|
warn(f"No `fetch()` function defined for connector '{self.connector}'")
|
57
57
|
return None
|
58
58
|
|
59
|
-
from meerschaum.connectors import
|
60
|
-
from meerschaum.utils.debug import dprint, _checkpoint
|
59
|
+
from meerschaum.connectors import get_connector_plugin
|
61
60
|
from meerschaum.utils.misc import filter_arguments
|
62
61
|
|
63
62
|
_chunk_hook = kw.pop('chunk_hook', None)
|