meerschaum 2.5.1__py3-none-any.whl → 2.6.0.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/_internal/arguments/_parser.py +6 -1
- meerschaum/actions/edit.py +6 -6
- meerschaum/actions/sql.py +12 -11
- meerschaum/config/_edit.py +46 -19
- meerschaum/config/_read_config.py +20 -9
- meerschaum/config/_version.py +1 -1
- meerschaum/config/stack/__init__.py +1 -1
- meerschaum/connectors/sql/_pipes.py +80 -24
- meerschaum/connectors/sql/_sql.py +29 -10
- meerschaum/connectors/valkey/_pipes.py +1 -1
- meerschaum/core/Pipe/__init__.py +8 -9
- meerschaum/core/Pipe/_attributes.py +33 -11
- meerschaum/core/Pipe/_data.py +26 -7
- meerschaum/core/Pipe/_dtypes.py +4 -4
- meerschaum/core/Pipe/_fetch.py +1 -1
- meerschaum/core/Pipe/_sync.py +16 -4
- meerschaum/core/Pipe/_verify.py +1 -1
- meerschaum/utils/dataframe.py +56 -29
- meerschaum/utils/dtypes/__init__.py +16 -5
- meerschaum/utils/dtypes/sql.py +58 -28
- meerschaum/utils/misc.py +49 -16
- meerschaum/utils/sql.py +224 -40
- {meerschaum-2.5.1.dist-info → meerschaum-2.6.0.dev1.dist-info}/METADATA +1 -1
- {meerschaum-2.5.1.dist-info → meerschaum-2.6.0.dev1.dist-info}/RECORD +30 -30
- {meerschaum-2.5.1.dist-info → meerschaum-2.6.0.dev1.dist-info}/WHEEL +1 -1
- {meerschaum-2.5.1.dist-info → meerschaum-2.6.0.dev1.dist-info}/LICENSE +0 -0
- {meerschaum-2.5.1.dist-info → meerschaum-2.6.0.dev1.dist-info}/NOTICE +0 -0
- {meerschaum-2.5.1.dist-info → meerschaum-2.6.0.dev1.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.5.1.dist-info → meerschaum-2.6.0.dev1.dist-info}/top_level.txt +0 -0
- {meerschaum-2.5.1.dist-info → meerschaum-2.6.0.dev1.dist-info}/zip-safe +0 -0
@@ -90,8 +90,13 @@ def parse_datetime(dt_str: str) -> Union[datetime, int, str]:
|
|
90
90
|
except Exception as e:
|
91
91
|
dt = None
|
92
92
|
if dt is None:
|
93
|
-
from meerschaum.utils.warnings import
|
93
|
+
from meerschaum.utils.warnings import error
|
94
94
|
error(f"'{dt_str}' is not a valid datetime format.", stack=False)
|
95
|
+
|
96
|
+
if isinstance(dt, datetime):
|
97
|
+
from meerschaum.utils.dtypes import coerce_timezone
|
98
|
+
dt = coerce_timezone(dt)
|
99
|
+
|
95
100
|
return dt
|
96
101
|
|
97
102
|
|
meerschaum/actions/edit.py
CHANGED
@@ -65,24 +65,24 @@ def _complete_edit(
|
|
65
65
|
from meerschaum._internal.shell import default_action_completer
|
66
66
|
return default_action_completer(action=(['edit'] + action), **kw)
|
67
67
|
|
68
|
-
|
68
|
+
|
69
|
+
def _edit_config(action: Optional[List[str]] = None, **kw : Any) -> SuccessTuple:
|
69
70
|
"""
|
70
71
|
Edit Meerschaum configuration files.
|
71
|
-
|
72
|
+
|
72
73
|
Specify a specific configuration key to edit.
|
73
74
|
Defaults to editing `meerschaum` configuration (connectors, instance, etc.).
|
74
|
-
|
75
|
+
|
75
76
|
Examples:
|
76
77
|
```
|
77
78
|
### Edit the main 'meerschaum' configuration.
|
78
79
|
edit config
|
79
|
-
|
80
|
+
|
80
81
|
### Edit 'system' configuration.
|
81
82
|
edit config system
|
82
|
-
|
83
|
+
|
83
84
|
### Create a new configuration file called 'myconfig'.
|
84
85
|
edit config myconfig
|
85
|
-
|
86
86
|
```
|
87
87
|
"""
|
88
88
|
from meerschaum.config._edit import edit_config
|
meerschaum/actions/sql.py
CHANGED
@@ -14,6 +14,7 @@ exec_methods = {
|
|
14
14
|
'exec',
|
15
15
|
}
|
16
16
|
|
17
|
+
|
17
18
|
def sql(
|
18
19
|
action: Optional[List[str]] = None,
|
19
20
|
gui: bool = False,
|
@@ -22,40 +23,40 @@ def sql(
|
|
22
23
|
**kw: Any
|
23
24
|
):
|
24
25
|
"""Execute a SQL query or launch an interactive CLI. All positional arguments are optional.
|
25
|
-
|
26
|
+
|
26
27
|
Usage:
|
27
28
|
`sql {label} {method} {query / table}`
|
28
|
-
|
29
|
+
|
29
30
|
Options:
|
30
31
|
- `sql {label}`
|
31
32
|
Launch an interactive CLI. If {label} is omitted, use 'main'.
|
32
|
-
|
33
|
+
|
33
34
|
- `sql {label} read [query / table]`
|
34
35
|
Read a table or query as a pandas DataFrame and print the result.
|
35
|
-
|
36
|
+
|
36
37
|
- `sql {label} exec [query]`
|
37
38
|
Execute a query and print the success status.
|
38
|
-
|
39
|
+
|
39
40
|
Examples:
|
40
41
|
- `sql`
|
41
42
|
Open an interactive CLI for `sql:main`.
|
42
|
-
|
43
|
+
|
43
44
|
- `sql local`
|
44
45
|
Open an interactive CLI for `sql:local`.
|
45
|
-
|
46
|
+
|
46
47
|
- `sql table`
|
47
48
|
Read from `table` on `sql:main`
|
48
49
|
(translates to `SELECT * FROM table`).
|
49
|
-
|
50
|
+
|
50
51
|
- `sql local table`
|
51
52
|
Read from `table` on `sql:local`.
|
52
|
-
|
53
|
+
|
53
54
|
- `sql local read table`
|
54
55
|
Read from `table` on `sql:local`.
|
55
|
-
|
56
|
+
|
56
57
|
- `sql "SELECT * FROM table WHERE id = 1"`
|
57
58
|
Execute the above query on `sql:main` and print the results.
|
58
|
-
|
59
|
+
|
59
60
|
- `sql local exec "INSERT INTO table (id) VALUES (1)"
|
60
61
|
Execute the above query on `sql:local`.
|
61
62
|
"""
|
meerschaum/config/_edit.py
CHANGED
@@ -7,44 +7,71 @@ Functions for editing the configuration file
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
from __future__ import annotations
|
10
|
-
import sys
|
11
10
|
import pathlib
|
12
11
|
from meerschaum.utils.typing import Optional, Any, SuccessTuple, Mapping, Dict, List, Union
|
13
12
|
|
13
|
+
|
14
14
|
def edit_config(
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
15
|
+
keys: Optional[List[str]] = None,
|
16
|
+
params: Optional[Dict[str, Any]] = None,
|
17
|
+
debug: bool = False,
|
18
|
+
**kw: Any
|
19
|
+
) -> SuccessTuple:
|
20
20
|
"""Edit the configuration files."""
|
21
21
|
from meerschaum.config import get_config, config
|
22
|
-
from meerschaum.config._read_config import get_keyfile_path
|
22
|
+
from meerschaum.config._read_config import get_keyfile_path, read_config
|
23
23
|
from meerschaum.config._paths import CONFIG_DIR_PATH
|
24
24
|
from meerschaum.utils.packages import reload_meerschaum
|
25
25
|
from meerschaum.utils.misc import edit_file
|
26
|
-
from meerschaum.utils.
|
26
|
+
from meerschaum.utils.warnings import warn, dprint
|
27
|
+
from meerschaum.utils.prompt import prompt
|
27
28
|
|
28
29
|
if keys is None:
|
29
30
|
keys = []
|
30
31
|
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
32
|
+
def _edit_key(key: str):
|
33
|
+
while True:
|
34
|
+
### If defined in default, create the config file.
|
35
|
+
key_config = config.pop(key, None)
|
36
|
+
keyfile_path = get_keyfile_path(key, create_new=True)
|
37
|
+
get_config(key, write_missing=True, warn=False)
|
38
|
+
|
39
|
+
edit_file(get_keyfile_path(key, create_new=True))
|
40
|
+
|
41
|
+
### TODO: verify that the file is valid. Retry if not.
|
42
|
+
try:
|
43
|
+
new_key_config = read_config(
|
44
|
+
CONFIG_DIR_PATH,
|
45
|
+
[key],
|
46
|
+
write_missing=False,
|
47
|
+
raise_parsing_errors=True,
|
48
|
+
)
|
49
|
+
except Exception:
|
50
|
+
if key_config:
|
51
|
+
config[key] = key_config
|
52
|
+
warn(f"Could not parse key '{key}'.", stack=False)
|
53
|
+
_ = prompt(f"Press [Enter] to edit '{keyfile_path}', [CTRL+C] to exit.")
|
54
|
+
continue
|
55
|
+
|
56
|
+
if new_key_config:
|
57
|
+
break
|
58
|
+
|
59
|
+
try:
|
60
|
+
for k in keys:
|
61
|
+
_edit_key(k)
|
62
|
+
except KeyboardInterrupt:
|
63
|
+
return False, f""
|
37
64
|
|
38
65
|
reload_meerschaum(debug=debug)
|
39
66
|
return (True, "Success")
|
40
67
|
|
41
68
|
|
42
69
|
def write_config(
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
70
|
+
config_dict: Optional[Dict[str, Any]] = None,
|
71
|
+
directory: Union[str, pathlib.Path, None] = None,
|
72
|
+
debug: bool = False,
|
73
|
+
**kw: Any
|
74
|
+
) -> bool:
|
48
75
|
"""Write YAML and JSON files to the configuration directory.
|
49
76
|
|
50
77
|
Parameters
|
@@ -6,22 +6,26 @@ Import the config yaml file
|
|
6
6
|
"""
|
7
7
|
|
8
8
|
from __future__ import annotations
|
9
|
+
import pathlib
|
10
|
+
|
9
11
|
from meerschaum.utils.typing import Optional, Dict, Any, List, Tuple, Union
|
10
12
|
from meerschaum.config import get_config
|
11
13
|
|
14
|
+
|
12
15
|
def read_config(
|
13
|
-
directory:
|
16
|
+
directory: Union[pathlib.Path, str, None] = None,
|
14
17
|
keys: Optional[List[str]] = None,
|
15
|
-
write_missing
|
16
|
-
substitute
|
17
|
-
with_filenames
|
18
|
+
write_missing: bool = True,
|
19
|
+
substitute: bool = True,
|
20
|
+
with_filenames: bool = False,
|
21
|
+
raise_parsing_errors: bool = False,
|
18
22
|
) -> Union[Dict[str, Any], Tuple[Dict[str, Any], List[str]]]:
|
19
23
|
"""
|
20
24
|
Read the configuration directory.
|
21
25
|
|
22
26
|
Parameters
|
23
27
|
----------
|
24
|
-
directory:
|
28
|
+
directory: Union[pathlib.Path, str, None], default None
|
25
29
|
The directory with configuration files (.json and .yaml).
|
26
30
|
|
27
31
|
keys: Optional[List[str]], default None
|
@@ -36,7 +40,10 @@ def read_config(
|
|
36
40
|
|
37
41
|
with_filename: bool, default False
|
38
42
|
If `True`, return a tuple of the configuration dictionary with a list of read filenames.
|
39
|
-
|
43
|
+
|
44
|
+
raise_parsing_errors: bool, default False
|
45
|
+
If `True`, re-raise parsing exceptions.
|
46
|
+
|
40
47
|
Examples
|
41
48
|
--------
|
42
49
|
>>> read_config(keys=['meerschaum'], with_filename=True)
|
@@ -63,9 +70,9 @@ def read_config(
|
|
63
70
|
|
64
71
|
default_filetype = STATIC_CONFIG['config']['default_filetype']
|
65
72
|
filetype_loaders = {
|
66
|
-
'yml'
|
67
|
-
'yaml'
|
68
|
-
'json'
|
73
|
+
'yml': yaml.load,
|
74
|
+
'yaml': yaml.load,
|
75
|
+
'json': json.load,
|
69
76
|
}
|
70
77
|
|
71
78
|
### Construct filekeys (files to parse).
|
@@ -167,6 +174,8 @@ def read_config(
|
|
167
174
|
_config_key = filetype_loaders[_type](f)
|
168
175
|
except Exception as e:
|
169
176
|
print(f"Error processing file: {filepath}")
|
177
|
+
if raise_parsing_errors:
|
178
|
+
raise e
|
170
179
|
import traceback
|
171
180
|
traceback.print_exc()
|
172
181
|
_config_key = {}
|
@@ -184,6 +193,8 @@ def read_config(
|
|
184
193
|
config[symlinks_key][key] = _single_key_config[symlinks_key][key]
|
185
194
|
break
|
186
195
|
except Exception as e:
|
196
|
+
if raise_parsing_errors:
|
197
|
+
raise e
|
187
198
|
print(f"Unable to parse {filename}!")
|
188
199
|
import traceback
|
189
200
|
traceback.print_exc()
|
meerschaum/config/_version.py
CHANGED
@@ -39,7 +39,7 @@ valkey_password = 'MRSM{meerschaum:connectors:valkey:main:password}'
|
|
39
39
|
|
40
40
|
env_dict = {
|
41
41
|
'COMPOSE_PROJECT_NAME': 'mrsm',
|
42
|
-
'TIMESCALEDB_VERSION': 'latest-pg16
|
42
|
+
'TIMESCALEDB_VERSION': 'latest-pg16',
|
43
43
|
'POSTGRES_USER': db_user,
|
44
44
|
'POSTGRES_PASSWORD': db_pass,
|
45
45
|
'POSTGRES_DB': db_base,
|
@@ -404,7 +404,7 @@ def get_create_index_queries(
|
|
404
404
|
indices = pipe.indices
|
405
405
|
|
406
406
|
_datetime = pipe.get_columns('datetime', error=False)
|
407
|
-
_datetime_type = pipe.dtypes.get(_datetime, 'datetime64[ns]')
|
407
|
+
_datetime_type = pipe.dtypes.get(_datetime, 'datetime64[ns, UTC]')
|
408
408
|
_datetime_name = (
|
409
409
|
sql_item_name(_datetime, self.flavor, None)
|
410
410
|
if _datetime is not None else None
|
@@ -738,7 +738,7 @@ def get_pipe_data(
|
|
738
738
|
dt_type = dtypes.get(_dt, 'object').lower()
|
739
739
|
if 'datetime' not in dt_type:
|
740
740
|
if 'int' not in dt_type:
|
741
|
-
dtypes[_dt] = 'datetime64[ns]'
|
741
|
+
dtypes[_dt] = 'datetime64[ns, UTC]'
|
742
742
|
existing_cols = pipe.get_columns_types(debug=debug)
|
743
743
|
select_columns = (
|
744
744
|
[
|
@@ -1197,7 +1197,12 @@ def sync_pipe(
|
|
1197
1197
|
A `SuccessTuple` of success (`bool`) and message (`str`).
|
1198
1198
|
"""
|
1199
1199
|
from meerschaum.utils.packages import import_pandas
|
1200
|
-
from meerschaum.utils.sql import
|
1200
|
+
from meerschaum.utils.sql import (
|
1201
|
+
get_update_queries,
|
1202
|
+
sql_item_name,
|
1203
|
+
update_queries,
|
1204
|
+
get_create_table_queries,
|
1205
|
+
)
|
1201
1206
|
from meerschaum.utils.misc import generate_password
|
1202
1207
|
from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols
|
1203
1208
|
from meerschaum.utils.dtypes import are_dtypes_equal
|
@@ -1232,7 +1237,6 @@ def sync_pipe(
|
|
1232
1237
|
|
1233
1238
|
### if table does not exist, create it with indices
|
1234
1239
|
is_new = False
|
1235
|
-
add_cols_query = None
|
1236
1240
|
if not pipe.exists(debug=debug):
|
1237
1241
|
check_existing = False
|
1238
1242
|
is_new = True
|
@@ -1252,9 +1256,7 @@ def sync_pipe(
|
|
1252
1256
|
|
1253
1257
|
### NOTE: Oracle SQL < 23c (2023) and SQLite does not support booleans,
|
1254
1258
|
### so infer bools and persist them to `dtypes`.
|
1255
|
-
|
1256
|
-
### to avoid merge issues.
|
1257
|
-
if self.flavor in ('oracle', 'sqlite', 'mssql', 'mysql', 'mariadb'):
|
1259
|
+
if self.flavor in ('oracle', 'sqlite', 'mysql', 'mariadb'):
|
1258
1260
|
pipe_dtypes = pipe.dtypes
|
1259
1261
|
new_bool_cols = {
|
1260
1262
|
col: 'bool[pyarrow]'
|
@@ -1309,7 +1311,60 @@ def sync_pipe(
|
|
1309
1311
|
'schema': self.get_pipe_schema(pipe),
|
1310
1312
|
})
|
1311
1313
|
|
1314
|
+
primary_key = pipe.columns.get('primary', None)
|
1315
|
+
new_dtypes = {
|
1316
|
+
**{
|
1317
|
+
col: str(typ)
|
1318
|
+
for col, typ in unseen_df.dtypes.items()
|
1319
|
+
},
|
1320
|
+
**{
|
1321
|
+
col: 'int'
|
1322
|
+
for col_ix, col in pipe.columns.items()
|
1323
|
+
if col_ix != 'primary'
|
1324
|
+
},
|
1325
|
+
**pipe.dtypes
|
1326
|
+
} if is_new else {}
|
1327
|
+
autoincrement = (
|
1328
|
+
pipe.parameters.get('autoincrement', False)
|
1329
|
+
or (is_new and primary_key and primary_key not in new_dtypes)
|
1330
|
+
)
|
1331
|
+
if autoincrement and autoincrement not in pipe.parameters:
|
1332
|
+
pipe.parameters['autoincrement'] = autoincrement
|
1333
|
+
edit_success, edit_msg = pipe.edit(debug=debug)
|
1334
|
+
if not edit_success:
|
1335
|
+
return edit_success, edit_msg
|
1336
|
+
|
1337
|
+
if autoincrement and primary_key and primary_key not in df.columns:
|
1338
|
+
if unseen_df is not None and primary_key in unseen_df.columns:
|
1339
|
+
del unseen_df[primary_key]
|
1340
|
+
if update_df is not None and primary_key in update_df.columns:
|
1341
|
+
del update_df[primary_key]
|
1342
|
+
if delta_df is not None and primary_key in delta_df.columns:
|
1343
|
+
del delta_df[primary_key]
|
1344
|
+
|
1345
|
+
if is_new:
|
1346
|
+
if autoincrement:
|
1347
|
+
_ = new_dtypes.pop(primary_key, None)
|
1348
|
+
|
1349
|
+
### TODO: see if this can be removed
|
1350
|
+
if 'datetime' in pipe.columns and self.flavor == 'timescaledb':
|
1351
|
+
primary_key = None
|
1352
|
+
|
1353
|
+
create_table_queries = get_create_table_queries(
|
1354
|
+
new_dtypes,
|
1355
|
+
pipe.target,
|
1356
|
+
self.flavor,
|
1357
|
+
schema=self.get_pipe_schema(pipe),
|
1358
|
+
primary_key=primary_key,
|
1359
|
+
)
|
1360
|
+
create_success = all(
|
1361
|
+
self.exec_queries(create_table_queries, break_on_error=True, rollback=True, debug=debug)
|
1362
|
+
)
|
1363
|
+
if not create_success:
|
1364
|
+
warn(f"Failed to create '{pipe.target}'. Continuing...")
|
1365
|
+
|
1312
1366
|
stats = self.to_sql(unseen_df, **unseen_kw)
|
1367
|
+
|
1313
1368
|
if is_new:
|
1314
1369
|
if not self.create_indices(pipe, debug=debug):
|
1315
1370
|
warn(f"Failed to create indices for {pipe}. Continuing...")
|
@@ -1358,7 +1413,7 @@ def sync_pipe(
|
|
1358
1413
|
]
|
1359
1414
|
update_queries = get_update_queries(
|
1360
1415
|
pipe.target,
|
1361
|
-
temp_target,
|
1416
|
+
temp_target,
|
1362
1417
|
self,
|
1363
1418
|
join_cols,
|
1364
1419
|
upsert=upsert,
|
@@ -1960,7 +2015,7 @@ def get_sync_time(
|
|
1960
2015
|
table = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
|
1961
2016
|
|
1962
2017
|
dt_col = pipe.columns.get('datetime', None)
|
1963
|
-
dt_type = pipe.dtypes.get(dt_col, 'datetime64[ns]')
|
2018
|
+
dt_type = pipe.dtypes.get(dt_col, 'datetime64[ns, UTC]')
|
1964
2019
|
if not dt_col:
|
1965
2020
|
_dt = pipe.guess_datetime()
|
1966
2021
|
dt = sql_item_name(_dt, self.flavor, None) if _dt else None
|
@@ -2366,7 +2421,7 @@ def get_pipe_columns_types(
|
|
2366
2421
|
----------
|
2367
2422
|
pipe: mrsm.Pipe:
|
2368
2423
|
The pipe to get the columns for.
|
2369
|
-
|
2424
|
+
|
2370
2425
|
Returns
|
2371
2426
|
-------
|
2372
2427
|
A dictionary of columns names (`str`) and types (`str`).
|
@@ -2381,17 +2436,18 @@ def get_pipe_columns_types(
|
|
2381
2436
|
}
|
2382
2437
|
>>>
|
2383
2438
|
"""
|
2439
|
+
from meerschaum.utils.sql import get_table_cols_types
|
2384
2440
|
if not pipe.exists(debug=debug):
|
2385
2441
|
return {}
|
2386
2442
|
|
2387
|
-
if self.flavor
|
2388
|
-
|
2389
|
-
|
2390
|
-
|
2391
|
-
|
2392
|
-
|
2393
|
-
|
2394
|
-
|
2443
|
+
# if self.flavor not in ('oracle', 'mysql', 'mariadb'):
|
2444
|
+
return get_table_cols_types(
|
2445
|
+
pipe.target,
|
2446
|
+
self,
|
2447
|
+
flavor=self.flavor,
|
2448
|
+
schema=self.get_pipe_schema(pipe),
|
2449
|
+
debug=debug,
|
2450
|
+
)
|
2395
2451
|
|
2396
2452
|
table_columns = {}
|
2397
2453
|
try:
|
@@ -2823,11 +2879,11 @@ def get_alter_columns_queries(
|
|
2823
2879
|
|
2824
2880
|
|
2825
2881
|
def get_to_sql_dtype(
|
2826
|
-
|
2827
|
-
|
2828
|
-
|
2829
|
-
|
2830
|
-
|
2882
|
+
self,
|
2883
|
+
pipe: 'mrsm.Pipe',
|
2884
|
+
df: 'pd.DataFrame',
|
2885
|
+
update_dtypes: bool = True,
|
2886
|
+
) -> Dict[str, 'sqlalchemy.sql.visitors.TraversibleType']:
|
2831
2887
|
"""
|
2832
2888
|
Given a pipe and DataFrame, return the `dtype` dictionary for `to_sql()`.
|
2833
2889
|
|
@@ -2947,7 +3003,7 @@ def deduplicate_pipe(
|
|
2947
3003
|
duplicates_cte_name = sql_item_name('dups', self.flavor, None)
|
2948
3004
|
duplicate_row_number_name = sql_item_name('dup_row_num', self.flavor, None)
|
2949
3005
|
previous_row_number_name = sql_item_name('prev_row_num', self.flavor, None)
|
2950
|
-
|
3006
|
+
|
2951
3007
|
index_list_str = (
|
2952
3008
|
sql_item_name(dt_col, self.flavor, None)
|
2953
3009
|
if dt_col
|
@@ -17,8 +17,8 @@ from meerschaum.utils.warnings import warn
|
|
17
17
|
### database flavors that can use bulk insert
|
18
18
|
_bulk_flavors = {'postgresql', 'timescaledb', 'citus'}
|
19
19
|
### flavors that do not support chunks
|
20
|
-
_disallow_chunks_flavors = [
|
21
|
-
_max_chunks_flavors = {'sqlite': 1000
|
20
|
+
_disallow_chunks_flavors = []
|
21
|
+
_max_chunks_flavors = {'sqlite': 1000}
|
22
22
|
SKIP_READ_TRANSACTION_FLAVORS: list[str] = ['mssql']
|
23
23
|
|
24
24
|
|
@@ -123,7 +123,8 @@ def read(
|
|
123
123
|
if chunks is not None and chunks <= 0:
|
124
124
|
return []
|
125
125
|
from meerschaum.utils.sql import sql_item_name, truncate_item_name
|
126
|
-
from meerschaum.utils.dtypes
|
126
|
+
from meerschaum.utils.dtypes import are_dtypes_equal, coerce_timezone
|
127
|
+
from meerschaum.utils.dtypes.sql import NUMERIC_PRECISION_FLAVORS, TIMEZONE_NAIVE_FLAVORS
|
127
128
|
from meerschaum.utils.packages import attempt_import, import_pandas
|
128
129
|
from meerschaum.utils.pool import get_pool
|
129
130
|
from meerschaum.utils.dataframe import chunksize_to_npartitions, get_numeric_cols
|
@@ -139,6 +140,16 @@ def read(
|
|
139
140
|
if is_dask:
|
140
141
|
chunksize = None
|
141
142
|
schema = schema or self.schema
|
143
|
+
utc_dt_cols = [
|
144
|
+
col
|
145
|
+
for col, typ in dtype.items()
|
146
|
+
if are_dtypes_equal(typ, 'datetime') and 'utc' in typ.lower()
|
147
|
+
] if dtype else []
|
148
|
+
|
149
|
+
if dtype and utc_dt_cols and self.flavor in TIMEZONE_NAIVE_FLAVORS:
|
150
|
+
dtype = dtype.copy()
|
151
|
+
for col in utc_dt_cols:
|
152
|
+
dtype[col] = 'datetime64[ns]'
|
142
153
|
|
143
154
|
pool = get_pool(workers=workers)
|
144
155
|
sqlalchemy = attempt_import("sqlalchemy")
|
@@ -162,7 +173,6 @@ def read(
|
|
162
173
|
)
|
163
174
|
chunksize = _max_chunks_flavors[self.flavor]
|
164
175
|
|
165
|
-
### NOTE: A bug in duckdb_engine does not allow for chunks.
|
166
176
|
if chunksize is not None and self.flavor in _disallow_chunks_flavors:
|
167
177
|
chunksize = None
|
168
178
|
|
@@ -206,6 +216,9 @@ def read(
|
|
206
216
|
chunk_list = []
|
207
217
|
chunk_hook_results = []
|
208
218
|
def _process_chunk(_chunk, _retry_on_failure: bool = True):
|
219
|
+
if self.flavor in TIMEZONE_NAIVE_FLAVORS:
|
220
|
+
for col in utc_dt_cols:
|
221
|
+
_chunk[col] = coerce_timezone(_chunk[col], strip_timezone=False)
|
209
222
|
if not as_hook_results:
|
210
223
|
chunk_list.append(_chunk)
|
211
224
|
if chunk_hook is None:
|
@@ -765,7 +778,7 @@ def to_sql(
|
|
765
778
|
DROP_IF_EXISTS_FLAVORS,
|
766
779
|
)
|
767
780
|
from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
|
768
|
-
from meerschaum.utils.dtypes import are_dtypes_equal, quantize_decimal
|
781
|
+
from meerschaum.utils.dtypes import are_dtypes_equal, quantize_decimal, coerce_timezone
|
769
782
|
from meerschaum.utils.dtypes.sql import (
|
770
783
|
NUMERIC_PRECISION_FLAVORS,
|
771
784
|
PD_TO_SQLALCHEMY_DTYPES_FLAVORS,
|
@@ -848,7 +861,6 @@ def to_sql(
|
|
848
861
|
if not success:
|
849
862
|
warn(f"Unable to drop {name}")
|
850
863
|
|
851
|
-
|
852
864
|
### Enforce NVARCHAR(2000) as text instead of CLOB.
|
853
865
|
dtype = to_sql_kw.get('dtype', {})
|
854
866
|
for col, typ in df.dtypes.items():
|
@@ -858,11 +870,18 @@ def to_sql(
|
|
858
870
|
dtype[col] = sqlalchemy.types.INTEGER
|
859
871
|
to_sql_kw['dtype'] = dtype
|
860
872
|
elif self.flavor == 'mssql':
|
873
|
+
pass
|
874
|
+
### TODO clean this up
|
875
|
+
# dtype = to_sql_kw.get('dtype', {})
|
876
|
+
# for col, typ in df.dtypes.items():
|
877
|
+
# if are_dtypes_equal(str(typ), 'bool'):
|
878
|
+
# dtype[col] = sqlalchemy.types.INTEGER
|
879
|
+
# to_sql_kw['dtype'] = dtype
|
880
|
+
elif self.flavor == 'duckdb':
|
861
881
|
dtype = to_sql_kw.get('dtype', {})
|
862
|
-
for col, typ in df.dtypes.items()
|
863
|
-
|
864
|
-
|
865
|
-
to_sql_kw['dtype'] = dtype
|
882
|
+
dt_cols = [col for col, typ in df.dtypes.items() if are_dtypes_equal(str(typ), 'datetime')]
|
883
|
+
for col in dt_cols:
|
884
|
+
df[col] = coerce_timezone(df[col], strip_utc=False)
|
866
885
|
|
867
886
|
### Check for JSON columns.
|
868
887
|
if self.flavor not in json_flavors:
|
@@ -706,7 +706,7 @@ def get_sync_time(
|
|
706
706
|
"""
|
707
707
|
from meerschaum.utils.dtypes import are_dtypes_equal
|
708
708
|
dt_col = pipe.columns.get('datetime', None)
|
709
|
-
dt_typ = pipe.dtypes.get(dt_col, 'datetime64[ns]')
|
709
|
+
dt_typ = pipe.dtypes.get(dt_col, 'datetime64[ns, UTC]')
|
710
710
|
if not dt_col:
|
711
711
|
return None
|
712
712
|
|
meerschaum/core/Pipe/__init__.py
CHANGED
@@ -153,6 +153,7 @@ class Pipe:
|
|
153
153
|
dtypes: Optional[Dict[str, str]] = None,
|
154
154
|
instance: Optional[Union[str, InstanceConnector]] = None,
|
155
155
|
temporary: bool = False,
|
156
|
+
upsert: Optional[bool] = None,
|
156
157
|
mrsm_instance: Optional[Union[str, InstanceConnector]] = None,
|
157
158
|
cache: bool = False,
|
158
159
|
debug: bool = False,
|
@@ -201,6 +202,9 @@ class Pipe:
|
|
201
202
|
instance: Optional[Union[str, InstanceConnector]], default None
|
202
203
|
Alias for `mrsm_instance`. If `mrsm_instance` is supplied, this value is ignored.
|
203
204
|
|
205
|
+
upsert: Optional[bool], default None
|
206
|
+
If `True`, set `upsert` to `True` in the parameters.
|
207
|
+
|
204
208
|
temporary: bool, default False
|
205
209
|
If `True`, prevent instance tables (pipes, users, plugins) from being created.
|
206
210
|
|
@@ -268,7 +272,7 @@ class Pipe:
|
|
268
272
|
or indexes
|
269
273
|
or self._attributes.get('parameters', {}).get('indices', None)
|
270
274
|
or self._attributes.get('parameters', {}).get('indexes', None)
|
271
|
-
)
|
275
|
+
)
|
272
276
|
if isinstance(indices, dict):
|
273
277
|
indices_key = (
|
274
278
|
'indexes'
|
@@ -292,6 +296,9 @@ class Pipe:
|
|
292
296
|
elif dtypes is not None:
|
293
297
|
warn(f"The provided dtypes are of invalid type '{type(dtypes)}'.")
|
294
298
|
|
299
|
+
if isinstance(upsert, bool):
|
300
|
+
self._attributes['parameters']['upsert'] = upsert
|
301
|
+
|
295
302
|
### NOTE: The parameters dictionary is {} by default.
|
296
303
|
### A Pipe may be registered without parameters, then edited,
|
297
304
|
### or a Pipe may be registered with parameters set in-memory first.
|
@@ -308,7 +315,6 @@ class Pipe:
|
|
308
315
|
|
309
316
|
self._cache = cache and get_config('system', 'experimental', 'cache')
|
310
317
|
|
311
|
-
|
312
318
|
@property
|
313
319
|
def meta(self):
|
314
320
|
"""
|
@@ -321,7 +327,6 @@ class Pipe:
|
|
321
327
|
'instance': self.instance_keys,
|
322
328
|
}
|
323
329
|
|
324
|
-
|
325
330
|
def keys(self) -> List[str]:
|
326
331
|
"""
|
327
332
|
Return the ordered keys for this pipe.
|
@@ -332,7 +337,6 @@ class Pipe:
|
|
332
337
|
if key != 'instance'
|
333
338
|
}
|
334
339
|
|
335
|
-
|
336
340
|
@property
|
337
341
|
def instance_connector(self) -> Union[InstanceConnector, None]:
|
338
342
|
"""
|
@@ -369,7 +373,6 @@ class Pipe:
|
|
369
373
|
return None
|
370
374
|
return self._connector
|
371
375
|
|
372
|
-
|
373
376
|
@property
|
374
377
|
def cache_connector(self) -> Union[meerschaum.connectors.sql.SQLConnector, None]:
|
375
378
|
"""
|
@@ -391,7 +394,6 @@ class Pipe:
|
|
391
394
|
|
392
395
|
return self._cache_connector
|
393
396
|
|
394
|
-
|
395
397
|
@property
|
396
398
|
def cache_pipe(self) -> Union['meerschaum.Pipe', None]:
|
397
399
|
"""
|
@@ -433,11 +435,9 @@ class Pipe:
|
|
433
435
|
|
434
436
|
return self._cache_pipe
|
435
437
|
|
436
|
-
|
437
438
|
def __str__(self, ansi: bool=False):
|
438
439
|
return pipe_repr(self, ansi=ansi)
|
439
440
|
|
440
|
-
|
441
441
|
def __eq__(self, other):
|
442
442
|
try:
|
443
443
|
return (
|
@@ -489,7 +489,6 @@ class Pipe:
|
|
489
489
|
"""
|
490
490
|
self.__init__(**_state)
|
491
491
|
|
492
|
-
|
493
492
|
def __getitem__(self, key: str) -> Any:
|
494
493
|
"""
|
495
494
|
Index the pipe's attributes.
|