meerschaum 2.7.2__py3-none-any.whl → 2.7.3__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- meerschaum/_internal/arguments/_parse_arguments.py +2 -0
- meerschaum/_internal/arguments/_parser.py +17 -11
- meerschaum/actions/clear.py +1 -1
- meerschaum/actions/edit.py +1 -1
- meerschaum/actions/verify.py +18 -21
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/sql/_fetch.py +45 -26
- meerschaum/connectors/sql/_instance.py +4 -4
- meerschaum/connectors/sql/_pipes.py +135 -103
- meerschaum/core/Pipe/_attributes.py +1 -1
- meerschaum/core/Pipe/_dtypes.py +9 -9
- meerschaum/core/Pipe/_fetch.py +2 -3
- meerschaum/core/Pipe/_sync.py +11 -3
- meerschaum/core/Pipe/_verify.py +9 -5
- meerschaum/jobs/__init__.py +1 -1
- meerschaum/utils/dataframe.py +10 -2
- meerschaum/utils/dtypes/sql.py +1 -1
- meerschaum/utils/formatting/__init__.py +5 -25
- meerschaum/utils/formatting/_pipes.py +9 -6
- meerschaum/utils/sql.py +156 -87
- meerschaum/utils/venv/__init__.py +44 -6
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.3.dist-info}/METADATA +1 -1
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.3.dist-info}/RECORD +29 -29
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.3.dist-info}/LICENSE +0 -0
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.3.dist-info}/NOTICE +0 -0
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.3.dist-info}/WHEEL +0 -0
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.3.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.3.dist-info}/top_level.txt +0 -0
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.3.dist-info}/zip-safe +0 -0
@@ -391,7 +391,7 @@ def get_create_index_queries(
|
|
391
391
|
from meerschaum.utils.sql import (
|
392
392
|
sql_item_name,
|
393
393
|
get_distinct_col_count,
|
394
|
-
|
394
|
+
UPDATE_QUERIES,
|
395
395
|
get_null_replacement,
|
396
396
|
get_create_table_queries,
|
397
397
|
get_rename_table_queries,
|
@@ -405,7 +405,7 @@ def get_create_index_queries(
|
|
405
405
|
from meerschaum.config import get_config
|
406
406
|
index_queries = {}
|
407
407
|
|
408
|
-
upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in
|
408
|
+
upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in UPDATE_QUERIES
|
409
409
|
static = pipe.parameters.get('static', False)
|
410
410
|
index_names = pipe.get_indices()
|
411
411
|
indices = pipe.indices
|
@@ -512,7 +512,7 @@ def get_create_index_queries(
|
|
512
512
|
+ 'if_not_exists => true, '
|
513
513
|
+ "migrate_data => true);"
|
514
514
|
)
|
515
|
-
elif _datetime_index_name:
|
515
|
+
elif _datetime_index_name and _datetime != primary_key:
|
516
516
|
if self.flavor == 'mssql':
|
517
517
|
dt_query = (
|
518
518
|
f"CREATE {datetime_clustered} INDEX {_datetime_index_name} "
|
@@ -1105,12 +1105,13 @@ def get_pipe_data_query(
|
|
1105
1105
|
from meerschaum.utils.misc import items_str
|
1106
1106
|
from meerschaum.utils.sql import sql_item_name, dateadd_str
|
1107
1107
|
from meerschaum.utils.dtypes import coerce_timezone
|
1108
|
-
from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
|
1108
|
+
from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type, get_db_type_from_pd_type
|
1109
1109
|
|
1110
1110
|
dt_col = pipe.columns.get('datetime', None)
|
1111
1111
|
existing_cols = pipe.get_columns_types(debug=debug) if pipe.enforce else []
|
1112
1112
|
skip_existing_cols_check = skip_existing_cols_check or not pipe.enforce
|
1113
1113
|
dt_typ = get_pd_type_from_db_type(existing_cols[dt_col]) if dt_col in existing_cols else None
|
1114
|
+
dt_db_type = get_db_type_from_pd_type(dt_typ, self.flavor) if dt_typ else None
|
1114
1115
|
select_columns = (
|
1115
1116
|
[col for col in existing_cols]
|
1116
1117
|
if not select_columns
|
@@ -1200,6 +1201,7 @@ def get_pipe_data_query(
|
|
1200
1201
|
datepart='minute',
|
1201
1202
|
number=begin_add_minutes,
|
1202
1203
|
begin=begin,
|
1204
|
+
db_type=dt_db_type,
|
1203
1205
|
)
|
1204
1206
|
where += f"\n {dt} >= {begin_da}" + ("\n AND\n " if end is not None else "")
|
1205
1207
|
is_dt_bound = True
|
@@ -1211,7 +1213,8 @@ def get_pipe_data_query(
|
|
1211
1213
|
flavor=self.flavor,
|
1212
1214
|
datepart='minute',
|
1213
1215
|
number=end_add_minutes,
|
1214
|
-
begin=end
|
1216
|
+
begin=end,
|
1217
|
+
db_type=dt_db_type,
|
1215
1218
|
)
|
1216
1219
|
where += f"{dt} < {end_da}"
|
1217
1220
|
is_dt_bound = True
|
@@ -1362,7 +1365,18 @@ def create_pipe_table_from_df(
|
|
1362
1365
|
get_bytes_cols,
|
1363
1366
|
)
|
1364
1367
|
from meerschaum.utils.sql import get_create_table_queries, sql_item_name
|
1368
|
+
from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
|
1365
1369
|
primary_key = pipe.columns.get('primary', None)
|
1370
|
+
primary_key_typ = (
|
1371
|
+
pipe.dtypes.get(primary_key, str(df.dtypes.get(primary_key)))
|
1372
|
+
if primary_key
|
1373
|
+
else None
|
1374
|
+
)
|
1375
|
+
primary_key_db_type = (
|
1376
|
+
get_db_type_from_pd_type(primary_key_typ, self.flavor)
|
1377
|
+
if primary_key
|
1378
|
+
else None
|
1379
|
+
)
|
1366
1380
|
dt_col = pipe.columns.get('datetime', None)
|
1367
1381
|
new_dtypes = {
|
1368
1382
|
**{
|
@@ -1413,6 +1427,7 @@ def create_pipe_table_from_df(
|
|
1413
1427
|
self.flavor,
|
1414
1428
|
schema=self.get_pipe_schema(pipe),
|
1415
1429
|
primary_key=primary_key,
|
1430
|
+
primary_key_db_type=primary_key_db_type,
|
1416
1431
|
datetime_column=dt_col,
|
1417
1432
|
)
|
1418
1433
|
success = all(
|
@@ -1487,7 +1502,7 @@ def sync_pipe(
|
|
1487
1502
|
from meerschaum.utils.sql import (
|
1488
1503
|
get_update_queries,
|
1489
1504
|
sql_item_name,
|
1490
|
-
|
1505
|
+
UPDATE_QUERIES,
|
1491
1506
|
get_reset_autoincrement_queries,
|
1492
1507
|
)
|
1493
1508
|
from meerschaum.utils.misc import generate_password
|
@@ -1563,7 +1578,7 @@ def sync_pipe(
|
|
1563
1578
|
if not infer_bool_success:
|
1564
1579
|
return infer_bool_success, infer_bool_msg
|
1565
1580
|
|
1566
|
-
upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in
|
1581
|
+
upsert = pipe.parameters.get('upsert', False) and (self.flavor + '-upsert') in UPDATE_QUERIES
|
1567
1582
|
if upsert:
|
1568
1583
|
check_existing = False
|
1569
1584
|
kw['safe_copy'] = kw.get('safe_copy', False)
|
@@ -1794,12 +1809,12 @@ def sync_pipe(
|
|
1794
1809
|
update_count = len(update_df.index) if update_df is not None else 0
|
1795
1810
|
msg = (
|
1796
1811
|
(
|
1797
|
-
f"Inserted {unseen_count}, "
|
1798
|
-
+ f"updated {update_count} rows."
|
1812
|
+
f"Inserted {unseen_count:,}, "
|
1813
|
+
+ f"updated {update_count:,} rows."
|
1799
1814
|
)
|
1800
1815
|
if not upsert
|
1801
1816
|
else (
|
1802
|
-
f"Upserted {update_count} row"
|
1817
|
+
f"Upserted {update_count:,} row"
|
1803
1818
|
+ ('s' if update_count != 1 else '')
|
1804
1819
|
+ "."
|
1805
1820
|
)
|
@@ -1886,10 +1901,12 @@ def sync_pipe_inplace(
|
|
1886
1901
|
get_create_table_queries,
|
1887
1902
|
get_table_cols_types,
|
1888
1903
|
session_execute,
|
1889
|
-
|
1904
|
+
dateadd_str,
|
1905
|
+
UPDATE_QUERIES,
|
1890
1906
|
)
|
1891
1907
|
from meerschaum.utils.dtypes.sql import (
|
1892
1908
|
get_pd_type_from_db_type,
|
1909
|
+
get_db_type_from_pd_type,
|
1893
1910
|
)
|
1894
1911
|
from meerschaum.utils.misc import generate_password
|
1895
1912
|
|
@@ -1905,13 +1922,14 @@ def sync_pipe_inplace(
|
|
1905
1922
|
for table_root in temp_table_roots
|
1906
1923
|
}
|
1907
1924
|
temp_table_names = {
|
1908
|
-
table_root: sql_item_name(
|
1909
|
-
table_name_raw,
|
1910
|
-
self.flavor,
|
1911
|
-
internal_schema,
|
1912
|
-
)
|
1925
|
+
table_root: sql_item_name(table_name_raw, self.flavor, internal_schema)
|
1913
1926
|
for table_root, table_name_raw in temp_tables.items()
|
1914
1927
|
}
|
1928
|
+
temp_table_aliases = {
|
1929
|
+
table_root: sql_item_name(table_root, self.flavor)
|
1930
|
+
for table_root in temp_table_roots
|
1931
|
+
}
|
1932
|
+
table_alias_as = " AS" if self.flavor != 'oracle' else ''
|
1915
1933
|
metadef = self.get_pipe_metadef(
|
1916
1934
|
pipe,
|
1917
1935
|
params=params,
|
@@ -1921,13 +1939,21 @@ def sync_pipe_inplace(
|
|
1921
1939
|
debug=debug,
|
1922
1940
|
)
|
1923
1941
|
pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
|
1924
|
-
upsert = pipe.parameters.get('upsert', False) and f'{self.flavor}-upsert' in
|
1942
|
+
upsert = pipe.parameters.get('upsert', False) and f'{self.flavor}-upsert' in UPDATE_QUERIES
|
1925
1943
|
static = pipe.parameters.get('static', False)
|
1926
1944
|
database = getattr(self, 'database', self.parse_uri(self.URI).get('database', None))
|
1927
1945
|
primary_key = pipe.columns.get('primary', None)
|
1946
|
+
primary_key_typ = pipe.dtypes.get(primary_key, None) if primary_key else None
|
1947
|
+
primary_key_db_type = (
|
1948
|
+
get_db_type_from_pd_type(primary_key_typ, self.flavor)
|
1949
|
+
if primary_key_typ
|
1950
|
+
else None
|
1951
|
+
)
|
1928
1952
|
autoincrement = pipe.parameters.get('autoincrement', False)
|
1929
1953
|
dt_col = pipe.columns.get('datetime', None)
|
1930
1954
|
dt_col_name = sql_item_name(dt_col, self.flavor, None) if dt_col else None
|
1955
|
+
dt_typ = pipe.dtypes.get(dt_col, 'datetime') if dt_col else None
|
1956
|
+
dt_db_type = get_db_type_from_pd_type(dt_typ, self.flavor) if dt_typ else None
|
1931
1957
|
|
1932
1958
|
def clean_up_temp_tables(ready_to_drop: bool = False):
|
1933
1959
|
log_success, log_msg = self._log_temporary_tables_creation(
|
@@ -1957,6 +1983,7 @@ def sync_pipe_inplace(
|
|
1957
1983
|
self.flavor,
|
1958
1984
|
schema=self.get_pipe_schema(pipe),
|
1959
1985
|
primary_key=primary_key,
|
1986
|
+
primary_key_db_type=primary_key_db_type,
|
1960
1987
|
autoincrement=autoincrement,
|
1961
1988
|
datetime_column=dt_col,
|
1962
1989
|
)
|
@@ -1970,7 +1997,7 @@ def sync_pipe_inplace(
|
|
1970
1997
|
|
1971
1998
|
rowcount = pipe.get_rowcount(debug=debug)
|
1972
1999
|
_ = clean_up_temp_tables()
|
1973
|
-
return True, f"Inserted {rowcount}, updated 0 rows."
|
2000
|
+
return True, f"Inserted {rowcount:,}, updated 0 rows."
|
1974
2001
|
|
1975
2002
|
session = sqlalchemy_orm.Session(self.engine)
|
1976
2003
|
connectable = session if self.flavor != 'duckdb' else self
|
@@ -2007,7 +2034,7 @@ def sync_pipe_inplace(
|
|
2007
2034
|
str(col_name): get_pd_type_from_db_type(str(col_type))
|
2008
2035
|
for col_name, col_type in new_cols_types.items()
|
2009
2036
|
}
|
2010
|
-
new_cols_str = '
|
2037
|
+
new_cols_str = '\n ' + ',\n '.join([
|
2011
2038
|
sql_item_name(col, self.flavor)
|
2012
2039
|
for col in new_cols
|
2013
2040
|
])
|
@@ -2030,7 +2057,8 @@ def sync_pipe_inplace(
|
|
2030
2057
|
insert_queries = [
|
2031
2058
|
(
|
2032
2059
|
f"INSERT INTO {pipe_name} ({new_cols_str})\n"
|
2033
|
-
|
2060
|
+
f"SELECT {new_cols_str}\nFROM {temp_table_names['new']}{table_alias_as}"
|
2061
|
+
f" {temp_table_aliases['new']}"
|
2034
2062
|
)
|
2035
2063
|
] if not check_existing and not upsert else []
|
2036
2064
|
|
@@ -2049,12 +2077,13 @@ def sync_pipe_inplace(
|
|
2049
2077
|
_ = clean_up_temp_tables()
|
2050
2078
|
return True, f"Inserted {new_count}, updated 0 rows."
|
2051
2079
|
|
2080
|
+
dt_col_name_da = dateadd_str(flavor=self.flavor, begin=dt_col_name, db_type=dt_db_type)
|
2052
2081
|
(new_dt_bounds_success, new_dt_bounds_msg), new_dt_bounds_results = session_execute(
|
2053
2082
|
session,
|
2054
2083
|
[
|
2055
2084
|
"SELECT\n"
|
2056
|
-
f" MIN({
|
2057
|
-
f" MAX({
|
2085
|
+
f" MIN({dt_col_name_da}) AS {sql_item_name('min_dt', self.flavor)},\n"
|
2086
|
+
f" MAX({dt_col_name_da}) AS {sql_item_name('max_dt', self.flavor)}\n"
|
2058
2087
|
f"FROM {temp_table_names['new' if not upsert else 'update']}\n"
|
2059
2088
|
f"WHERE {dt_col_name} IS NOT NULL"
|
2060
2089
|
],
|
@@ -2121,9 +2150,9 @@ def sync_pipe_inplace(
|
|
2121
2150
|
} if not primary_key or self.flavor == 'oracle' else {primary_key: new_cols.get(primary_key)}
|
2122
2151
|
|
2123
2152
|
null_replace_new_cols_str = (
|
2124
|
-
'
|
2125
|
-
f"COALESCE({
|
2126
|
-
+ get_null_replacement(get_col_typ(col,
|
2153
|
+
'\n ' + ',\n '.join([
|
2154
|
+
f"COALESCE({temp_table_aliases['new']}.{sql_item_name(col, self.flavor)}, "
|
2155
|
+
+ get_null_replacement(get_col_typ(col, new_cols_types), self.flavor)
|
2127
2156
|
+ ") AS "
|
2128
2157
|
+ sql_item_name(col, self.flavor, None)
|
2129
2158
|
for col, typ in new_cols.items()
|
@@ -2131,29 +2160,30 @@ def sync_pipe_inplace(
|
|
2131
2160
|
)
|
2132
2161
|
|
2133
2162
|
select_delta_query = (
|
2134
|
-
"SELECT
|
2135
|
-
+ null_replace_new_cols_str
|
2136
|
-
+ f"\nFROM {temp_table_names['new']}\n"
|
2137
|
-
+ f"LEFT OUTER JOIN {temp_table_names['backtrack']}
|
2138
|
-
+
|
2163
|
+
"SELECT"
|
2164
|
+
+ null_replace_new_cols_str
|
2165
|
+
+ f"\nFROM {temp_table_names['new']}{table_alias_as} {temp_table_aliases['new']}\n"
|
2166
|
+
+ f"LEFT OUTER JOIN {temp_table_names['backtrack']}{table_alias_as} {temp_table_aliases['backtrack']}"
|
2167
|
+
+ "\n ON\n "
|
2168
|
+
+ '\n AND\n '.join([
|
2139
2169
|
(
|
2140
|
-
f"COALESCE({
|
2170
|
+
f" COALESCE({temp_table_aliases['new']}."
|
2141
2171
|
+ sql_item_name(c, self.flavor, None)
|
2142
2172
|
+ ", "
|
2143
|
-
+ get_null_replacement(get_col_typ(c,
|
2144
|
-
+ ")
|
2145
|
-
+ '
|
2146
|
-
+ f"COALESCE({
|
2173
|
+
+ get_null_replacement(get_col_typ(c, new_cols_types), self.flavor)
|
2174
|
+
+ ")"
|
2175
|
+
+ '\n =\n '
|
2176
|
+
+ f" COALESCE({temp_table_aliases['backtrack']}."
|
2147
2177
|
+ sql_item_name(c, self.flavor, None)
|
2148
2178
|
+ ", "
|
2149
|
-
+ get_null_replacement(
|
2179
|
+
+ get_null_replacement(get_col_typ(c, backtrack_cols_types), self.flavor)
|
2150
2180
|
+ ") "
|
2151
2181
|
) for c in common_cols
|
2152
2182
|
])
|
2153
|
-
+ "\nWHERE\n"
|
2154
|
-
+ '\
|
2183
|
+
+ "\nWHERE\n "
|
2184
|
+
+ '\n AND\n '.join([
|
2155
2185
|
(
|
2156
|
-
f"{
|
2186
|
+
f"{temp_table_aliases['backtrack']}." + sql_item_name(c, self.flavor) + ' IS NULL'
|
2157
2187
|
) for c in common_cols
|
2158
2188
|
])
|
2159
2189
|
)
|
@@ -2196,37 +2226,33 @@ def sync_pipe_inplace(
|
|
2196
2226
|
])
|
2197
2227
|
|
2198
2228
|
select_joined_query = (
|
2199
|
-
"SELECT
|
2200
|
-
+ ('
|
2229
|
+
"SELECT\n "
|
2230
|
+
+ (',\n '.join([
|
2201
2231
|
(
|
2202
|
-
f"{
|
2232
|
+
f"{temp_table_aliases['delta']}." + sql_item_name(c, self.flavor, None)
|
2203
2233
|
+ " AS " + sql_item_name(c + '_delta', self.flavor, None)
|
2204
2234
|
) for c in delta_cols
|
2205
2235
|
]))
|
2206
|
-
+ "
|
2207
|
-
+ ('
|
2236
|
+
+ ",\n "
|
2237
|
+
+ (',\n '.join([
|
2208
2238
|
(
|
2209
|
-
f"{
|
2239
|
+
f"{temp_table_aliases['backtrack']}." + sql_item_name(c, self.flavor, None)
|
2210
2240
|
+ " AS " + sql_item_name(c + '_backtrack', self.flavor, None)
|
2211
2241
|
) for c in backtrack_cols_types
|
2212
2242
|
]))
|
2213
|
-
+ f"\nFROM {temp_table_names['delta']}\n"
|
2214
|
-
+ f"LEFT OUTER JOIN {temp_table_names['backtrack']}
|
2215
|
-
+ '
|
2243
|
+
+ f"\nFROM {temp_table_names['delta']}{table_alias_as} {temp_table_aliases['delta']}\n"
|
2244
|
+
+ f"LEFT OUTER JOIN {temp_table_names['backtrack']}{table_alias_as}"
|
2245
|
+
+ f" {temp_table_aliases['backtrack']}"
|
2246
|
+
+ "\n ON\n "
|
2247
|
+
+ '\n AND\n '.join([
|
2216
2248
|
(
|
2217
|
-
f"COALESCE({
|
2249
|
+
f" COALESCE({temp_table_aliases['delta']}." + sql_item_name(c, self.flavor)
|
2218
2250
|
+ ", "
|
2219
|
-
+ get_null_replacement(
|
2220
|
-
|
2221
|
-
|
2222
|
-
) + ")"
|
2223
|
-
+ ' = '
|
2224
|
-
+ f"COALESCE({temp_table_names['backtrack']}." + sql_item_name(c, self.flavor, None)
|
2251
|
+
+ get_null_replacement(get_col_typ(c, new_cols_types), self.flavor) + ")"
|
2252
|
+
+ '\n =\n '
|
2253
|
+
+ f" COALESCE({temp_table_aliases['backtrack']}." + sql_item_name(c, self.flavor)
|
2225
2254
|
+ ", "
|
2226
|
-
+ get_null_replacement(
|
2227
|
-
get_col_typ(c, on_cols),
|
2228
|
-
self.flavor
|
2229
|
-
) + ")"
|
2255
|
+
+ get_null_replacement(get_col_typ(c, new_cols_types), self.flavor) + ")"
|
2230
2256
|
) for c, typ in on_cols.items()
|
2231
2257
|
])
|
2232
2258
|
)
|
@@ -2247,19 +2273,19 @@ def sync_pipe_inplace(
|
|
2247
2273
|
return create_joined_success, create_joined_msg
|
2248
2274
|
|
2249
2275
|
select_unseen_query = (
|
2250
|
-
"SELECT
|
2251
|
-
+ ('
|
2276
|
+
"SELECT\n "
|
2277
|
+
+ (',\n '.join([
|
2252
2278
|
(
|
2253
|
-
"CASE\n
|
2254
|
-
+ " != " + get_null_replacement(get_col_typ(c,
|
2279
|
+
"CASE\n WHEN " + sql_item_name(c + '_delta', self.flavor, None)
|
2280
|
+
+ " != " + get_null_replacement(get_col_typ(c, delta_cols_types), self.flavor)
|
2255
2281
|
+ " THEN " + sql_item_name(c + '_delta', self.flavor, None)
|
2256
|
-
+ "\n
|
2282
|
+
+ "\n ELSE NULL\n END"
|
2257
2283
|
+ " AS " + sql_item_name(c, self.flavor, None)
|
2258
2284
|
) for c, typ in delta_cols.items()
|
2259
2285
|
]))
|
2260
|
-
+ f"\nFROM {temp_table_names['joined']}\n"
|
2261
|
-
+ "WHERE
|
2262
|
-
+ '\
|
2286
|
+
+ f"\nFROM {temp_table_names['joined']}{table_alias_as} {temp_table_aliases['joined']}\n"
|
2287
|
+
+ "WHERE\n "
|
2288
|
+
+ '\n AND\n '.join([
|
2263
2289
|
(
|
2264
2290
|
sql_item_name(c + '_backtrack', self.flavor, None) + ' IS NULL'
|
2265
2291
|
) for c in delta_cols
|
@@ -2282,19 +2308,19 @@ def sync_pipe_inplace(
|
|
2282
2308
|
return create_unseen_success, create_unseen_msg
|
2283
2309
|
|
2284
2310
|
select_update_query = (
|
2285
|
-
"SELECT
|
2286
|
-
+ ('
|
2311
|
+
"SELECT\n "
|
2312
|
+
+ (',\n '.join([
|
2287
2313
|
(
|
2288
|
-
"CASE\n
|
2289
|
-
+ " != " + get_null_replacement(get_col_typ(c,
|
2314
|
+
"CASE\n WHEN " + sql_item_name(c + '_delta', self.flavor, None)
|
2315
|
+
+ " != " + get_null_replacement(get_col_typ(c, delta_cols_types), self.flavor)
|
2290
2316
|
+ " THEN " + sql_item_name(c + '_delta', self.flavor, None)
|
2291
|
-
+ "\n
|
2317
|
+
+ "\n ELSE NULL\n END"
|
2292
2318
|
+ " AS " + sql_item_name(c, self.flavor, None)
|
2293
2319
|
) for c, typ in delta_cols.items()
|
2294
2320
|
]))
|
2295
|
-
+ f"\nFROM {temp_table_names['joined']}\n"
|
2296
|
-
+ "WHERE
|
2297
|
-
+ '\
|
2321
|
+
+ f"\nFROM {temp_table_names['joined']}{table_alias_as} {temp_table_aliases['joined']}\n"
|
2322
|
+
+ "WHERE\n "
|
2323
|
+
+ '\n OR\n '.join([
|
2298
2324
|
(
|
2299
2325
|
sql_item_name(c + '_backtrack', self.flavor, None) + ' IS NOT NULL'
|
2300
2326
|
) for c in delta_cols
|
@@ -2366,9 +2392,9 @@ def sync_pipe_inplace(
|
|
2366
2392
|
session.commit()
|
2367
2393
|
|
2368
2394
|
msg = (
|
2369
|
-
f"Inserted {unseen_count}, updated {update_count} rows."
|
2395
|
+
f"Inserted {unseen_count:,}, updated {update_count:,} rows."
|
2370
2396
|
if not upsert
|
2371
|
-
else f"Upserted {update_count} row" + ('s' if update_count != 1 else '') + "."
|
2397
|
+
else f"Upserted {update_count:,} row" + ('s' if update_count != 1 else '') + "."
|
2372
2398
|
)
|
2373
2399
|
_ = clean_up_temp_tables(ready_to_drop=True)
|
2374
2400
|
|
@@ -2540,6 +2566,7 @@ def get_pipe_rowcount(
|
|
2540
2566
|
"""
|
2541
2567
|
from meerschaum.utils.sql import dateadd_str, sql_item_name, wrap_query_with_cte
|
2542
2568
|
from meerschaum.connectors.sql._fetch import get_pipe_query
|
2569
|
+
from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
|
2543
2570
|
if remote:
|
2544
2571
|
msg = f"'fetch:definition' must be an attribute of {pipe} to get a remote rowcount."
|
2545
2572
|
if 'fetch' not in pipe.parameters:
|
@@ -2551,18 +2578,21 @@ def get_pipe_rowcount(
|
|
2551
2578
|
|
2552
2579
|
_pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
|
2553
2580
|
|
2554
|
-
|
2555
|
-
|
2556
|
-
|
2581
|
+
dt_col = pipe.columns.get('datetime', None)
|
2582
|
+
dt_typ = pipe.dtypes.get(dt_col, 'datetime') if dt_col else None
|
2583
|
+
dt_db_type = get_db_type_from_pd_type(dt_typ, self.flavor) if dt_typ else None
|
2584
|
+
if not dt_col:
|
2585
|
+
dt_col = pipe.guess_datetime()
|
2586
|
+
dt_name = sql_item_name(dt_col, self.flavor, None) if dt_col else None
|
2557
2587
|
is_guess = True
|
2558
2588
|
else:
|
2559
|
-
|
2560
|
-
|
2589
|
+
dt_col = pipe.get_columns('datetime')
|
2590
|
+
dt_name = sql_item_name(dt_col, self.flavor, None)
|
2561
2591
|
is_guess = False
|
2562
2592
|
|
2563
2593
|
if begin is not None or end is not None:
|
2564
2594
|
if is_guess:
|
2565
|
-
if
|
2595
|
+
if dt_col is None:
|
2566
2596
|
warn(
|
2567
2597
|
f"No datetime could be determined for {pipe}."
|
2568
2598
|
+ "\n Ignoring begin and end...",
|
@@ -2572,13 +2602,13 @@ def get_pipe_rowcount(
|
|
2572
2602
|
else:
|
2573
2603
|
warn(
|
2574
2604
|
f"A datetime wasn't specified for {pipe}.\n"
|
2575
|
-
+ f" Using column \"{
|
2605
|
+
+ f" Using column \"{dt_col}\" for datetime bounds...",
|
2576
2606
|
stack=False,
|
2577
2607
|
)
|
2578
2608
|
|
2579
2609
|
|
2580
2610
|
_datetime_name = sql_item_name(
|
2581
|
-
|
2611
|
+
dt_col,
|
2582
2612
|
(
|
2583
2613
|
pipe.instance_connector.flavor
|
2584
2614
|
if not remote
|
@@ -2598,8 +2628,8 @@ def get_pipe_rowcount(
|
|
2598
2628
|
)
|
2599
2629
|
for col in set(
|
2600
2630
|
(
|
2601
|
-
[
|
2602
|
-
if
|
2631
|
+
[dt_col]
|
2632
|
+
if dt_col
|
2603
2633
|
else []
|
2604
2634
|
)
|
2605
2635
|
+ (
|
@@ -2623,13 +2653,13 @@ def get_pipe_rowcount(
|
|
2623
2653
|
query += "\nWHERE"
|
2624
2654
|
if begin is not None:
|
2625
2655
|
query += f"""
|
2626
|
-
{
|
2656
|
+
{dt_name} >= {dateadd_str(self.flavor, datepart='minute', number=0, begin=begin, db_type=dt_db_type)}
|
2627
2657
|
"""
|
2628
2658
|
if end is not None and begin is not None:
|
2629
2659
|
query += "AND"
|
2630
2660
|
if end is not None:
|
2631
2661
|
query += f"""
|
2632
|
-
{
|
2662
|
+
{dt_name} < {dateadd_str(self.flavor, datepart='minute', number=0, begin=end, db_type=dt_db_type)}
|
2633
2663
|
"""
|
2634
2664
|
if params is not None:
|
2635
2665
|
from meerschaum.utils.sql import build_where
|
@@ -2715,31 +2745,35 @@ def clear_pipe(
|
|
2715
2745
|
return True, f"{pipe} does not exist, so nothing was cleared."
|
2716
2746
|
|
2717
2747
|
from meerschaum.utils.sql import sql_item_name, build_where, dateadd_str
|
2748
|
+
from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
|
2718
2749
|
pipe_name = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
|
2719
2750
|
|
2751
|
+
dt_col = pipe.columns.get('datetime', None)
|
2752
|
+
dt_typ = pipe.dtypes.get(dt_col, 'datetime') if dt_col else None
|
2753
|
+
dt_db_type = get_db_type_from_pd_type(dt_typ, self.flavor) if dt_typ else None
|
2720
2754
|
if not pipe.columns.get('datetime', None):
|
2721
|
-
|
2722
|
-
dt_name = sql_item_name(
|
2755
|
+
dt_col = pipe.guess_datetime()
|
2756
|
+
dt_name = sql_item_name(dt_col, self.flavor, None) if dt_col else None
|
2723
2757
|
is_guess = True
|
2724
2758
|
else:
|
2725
|
-
|
2726
|
-
dt_name = sql_item_name(
|
2759
|
+
dt_col = pipe.get_columns('datetime')
|
2760
|
+
dt_name = sql_item_name(dt_col, self.flavor, None)
|
2727
2761
|
is_guess = False
|
2728
2762
|
|
2729
2763
|
if begin is not None or end is not None:
|
2730
2764
|
if is_guess:
|
2731
|
-
if
|
2765
|
+
if dt_col is None:
|
2732
2766
|
warn(
|
2733
2767
|
f"No datetime could be determined for {pipe}."
|
2734
2768
|
+ "\n Ignoring datetime bounds...",
|
2735
|
-
stack
|
2769
|
+
stack=False,
|
2736
2770
|
)
|
2737
2771
|
begin, end = None, None
|
2738
2772
|
else:
|
2739
2773
|
warn(
|
2740
2774
|
f"A datetime wasn't specified for {pipe}.\n"
|
2741
|
-
+ f" Using column \"{
|
2742
|
-
stack
|
2775
|
+
+ f" Using column \"{dt_col}\" for datetime bounds...",
|
2776
|
+
stack=False,
|
2743
2777
|
)
|
2744
2778
|
|
2745
2779
|
valid_params = {}
|
@@ -2750,10 +2784,10 @@ def clear_pipe(
|
|
2750
2784
|
f"DELETE FROM {pipe_name}\nWHERE 1 = 1\n"
|
2751
2785
|
+ (' AND ' + build_where(valid_params, self, with_where=False) if valid_params else '')
|
2752
2786
|
+ (
|
2753
|
-
f' AND {dt_name} >= ' + dateadd_str(self.flavor, 'day', 0, begin)
|
2787
|
+
f' AND {dt_name} >= ' + dateadd_str(self.flavor, 'day', 0, begin, db_type=dt_db_type)
|
2754
2788
|
if begin is not None else ''
|
2755
2789
|
) + (
|
2756
|
-
f' AND {dt_name} < ' + dateadd_str(self.flavor, 'day', 0, end)
|
2790
|
+
f' AND {dt_name} < ' + dateadd_str(self.flavor, 'day', 0, end, db_type=dt_db_type)
|
2757
2791
|
if end is not None else ''
|
2758
2792
|
)
|
2759
2793
|
)
|
@@ -3400,7 +3434,6 @@ def deduplicate_pipe(
|
|
3400
3434
|
|
3401
3435
|
### TODO: Handle deleting duplicates without a datetime axis.
|
3402
3436
|
dt_col = pipe.columns.get('datetime', None)
|
3403
|
-
dt_col_name = sql_item_name(dt_col, self.flavor, None)
|
3404
3437
|
cols_types = pipe.get_columns_types(debug=debug)
|
3405
3438
|
existing_cols = pipe.get_columns_types(debug=debug)
|
3406
3439
|
|
@@ -3417,7 +3450,6 @@ def deduplicate_pipe(
|
|
3417
3450
|
]
|
3418
3451
|
indices_names = [sql_item_name(index_col, self.flavor, None) for index_col in indices]
|
3419
3452
|
existing_cols_names = [sql_item_name(col, self.flavor, None) for col in existing_cols]
|
3420
|
-
duplicates_cte_name = sql_item_name('dups', self.flavor, None)
|
3421
3453
|
duplicate_row_number_name = sql_item_name('dup_row_num', self.flavor, None)
|
3422
3454
|
previous_row_number_name = sql_item_name('prev_row_num', self.flavor, None)
|
3423
3455
|
|
@@ -3450,7 +3482,7 @@ def deduplicate_pipe(
|
|
3450
3482
|
and
|
3451
3483
|
int(self.db_version.split('.')[0]) < 8
|
3452
3484
|
)
|
3453
|
-
except Exception
|
3485
|
+
except Exception:
|
3454
3486
|
is_old_mysql = False
|
3455
3487
|
|
3456
3488
|
src_query = f"""
|
@@ -3557,7 +3589,7 @@ def deduplicate_pipe(
|
|
3557
3589
|
(
|
3558
3590
|
f"Successfully deduplicated table {pipe_table_name}"
|
3559
3591
|
+ (
|
3560
|
-
f"\nfrom {old_rowcount} to {new_rowcount} rows"
|
3592
|
+
f"\nfrom {old_rowcount:,} to {new_rowcount:,} rows"
|
3561
3593
|
if old_rowcount != new_rowcount
|
3562
3594
|
else ''
|
3563
3595
|
)
|
@@ -310,7 +310,7 @@ def enforce(self, _enforce: bool) -> None:
|
|
310
310
|
"""
|
311
311
|
Set the `enforce` parameter for the pipe.
|
312
312
|
"""
|
313
|
-
self.parameters['
|
313
|
+
self.parameters['enforce'] = _enforce
|
314
314
|
|
315
315
|
|
316
316
|
def get_columns(self, *args: str, error: bool = False) -> Union[str, Tuple[str]]:
|
meerschaum/core/Pipe/_dtypes.py
CHANGED
@@ -20,6 +20,7 @@ def enforce_dtypes(
|
|
20
20
|
self,
|
21
21
|
df: 'pd.DataFrame',
|
22
22
|
chunksize: Optional[int] = -1,
|
23
|
+
enforce: bool = True,
|
23
24
|
safe_copy: bool = True,
|
24
25
|
debug: bool = False,
|
25
26
|
) -> 'pd.DataFrame':
|
@@ -31,7 +32,7 @@ def enforce_dtypes(
|
|
31
32
|
from meerschaum.utils.warnings import warn
|
32
33
|
from meerschaum.utils.debug import dprint
|
33
34
|
from meerschaum.utils.dataframe import parse_df_datetimes, enforce_dtypes as _enforce_dtypes
|
34
|
-
from meerschaum.utils.dtypes import are_dtypes_equal
|
35
|
+
from meerschaum.utils.dtypes import are_dtypes_equal
|
35
36
|
from meerschaum.utils.packages import import_pandas
|
36
37
|
pd = import_pandas(debug=debug)
|
37
38
|
if df is None:
|
@@ -42,11 +43,9 @@ def enforce_dtypes(
|
|
42
43
|
)
|
43
44
|
return df
|
44
45
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
if typ in MRSM_PD_DTYPES
|
49
|
-
}
|
46
|
+
if not self.enforce:
|
47
|
+
enforce = False
|
48
|
+
pipe_dtypes = self.dtypes if enforce else {}
|
50
49
|
|
51
50
|
try:
|
52
51
|
if isinstance(df, str):
|
@@ -55,8 +54,9 @@ def enforce_dtypes(
|
|
55
54
|
ignore_cols=[
|
56
55
|
col
|
57
56
|
for col, dtype in pipe_dtypes.items()
|
58
|
-
if
|
57
|
+
if (not enforce or not are_dtypes_equal(dtype, 'datetime'))
|
59
58
|
],
|
59
|
+
ignore_all=(not enforce),
|
60
60
|
strip_timezone=(self.tzinfo is None),
|
61
61
|
chunksize=chunksize,
|
62
62
|
debug=debug,
|
@@ -67,7 +67,7 @@ def enforce_dtypes(
|
|
67
67
|
ignore_cols=[
|
68
68
|
col
|
69
69
|
for col, dtype in pipe_dtypes.items()
|
70
|
-
if not are_dtypes_equal(str(dtype), 'datetime')
|
70
|
+
if (not enforce or not are_dtypes_equal(str(dtype), 'datetime'))
|
71
71
|
],
|
72
72
|
strip_timezone=(self.tzinfo is None),
|
73
73
|
chunksize=chunksize,
|
@@ -90,7 +90,7 @@ def enforce_dtypes(
|
|
90
90
|
pipe_dtypes,
|
91
91
|
safe_copy=safe_copy,
|
92
92
|
strip_timezone=(self.tzinfo is None),
|
93
|
-
coerce_timezone=
|
93
|
+
coerce_timezone=enforce,
|
94
94
|
debug=debug,
|
95
95
|
)
|
96
96
|
|
meerschaum/core/Pipe/_fetch.py
CHANGED
@@ -10,7 +10,7 @@ from __future__ import annotations
|
|
10
10
|
from datetime import timedelta, datetime
|
11
11
|
|
12
12
|
import meerschaum as mrsm
|
13
|
-
from meerschaum.utils.typing import
|
13
|
+
from meerschaum.utils.typing import Any, Union, SuccessTuple, Iterator, TYPE_CHECKING
|
14
14
|
from meerschaum.config import get_config
|
15
15
|
from meerschaum.utils.warnings import warn
|
16
16
|
|
@@ -56,8 +56,7 @@ def fetch(
|
|
56
56
|
warn(f"No `fetch()` function defined for connector '{self.connector}'")
|
57
57
|
return None
|
58
58
|
|
59
|
-
from meerschaum.connectors import
|
60
|
-
from meerschaum.utils.debug import dprint, _checkpoint
|
59
|
+
from meerschaum.connectors import get_connector_plugin
|
61
60
|
from meerschaum.utils.misc import filter_arguments
|
62
61
|
|
63
62
|
_chunk_hook = kw.pop('chunk_hook', None)
|