meerschaum 2.5.1__py3-none-any.whl → 2.6.0.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. meerschaum/_internal/arguments/_parser.py +6 -1
  2. meerschaum/actions/edit.py +6 -6
  3. meerschaum/actions/sql.py +12 -11
  4. meerschaum/config/_edit.py +46 -19
  5. meerschaum/config/_read_config.py +20 -9
  6. meerschaum/config/_version.py +1 -1
  7. meerschaum/config/stack/__init__.py +1 -1
  8. meerschaum/connectors/sql/_pipes.py +80 -24
  9. meerschaum/connectors/sql/_sql.py +29 -10
  10. meerschaum/connectors/valkey/_pipes.py +1 -1
  11. meerschaum/core/Pipe/__init__.py +8 -9
  12. meerschaum/core/Pipe/_attributes.py +33 -11
  13. meerschaum/core/Pipe/_data.py +26 -7
  14. meerschaum/core/Pipe/_dtypes.py +4 -4
  15. meerschaum/core/Pipe/_fetch.py +1 -1
  16. meerschaum/core/Pipe/_sync.py +16 -4
  17. meerschaum/core/Pipe/_verify.py +1 -1
  18. meerschaum/utils/dataframe.py +56 -29
  19. meerschaum/utils/dtypes/__init__.py +16 -5
  20. meerschaum/utils/dtypes/sql.py +58 -28
  21. meerschaum/utils/misc.py +49 -16
  22. meerschaum/utils/sql.py +224 -40
  23. {meerschaum-2.5.1.dist-info → meerschaum-2.6.0.dev1.dist-info}/METADATA +1 -1
  24. {meerschaum-2.5.1.dist-info → meerschaum-2.6.0.dev1.dist-info}/RECORD +30 -30
  25. {meerschaum-2.5.1.dist-info → meerschaum-2.6.0.dev1.dist-info}/WHEEL +1 -1
  26. {meerschaum-2.5.1.dist-info → meerschaum-2.6.0.dev1.dist-info}/LICENSE +0 -0
  27. {meerschaum-2.5.1.dist-info → meerschaum-2.6.0.dev1.dist-info}/NOTICE +0 -0
  28. {meerschaum-2.5.1.dist-info → meerschaum-2.6.0.dev1.dist-info}/entry_points.txt +0 -0
  29. {meerschaum-2.5.1.dist-info → meerschaum-2.6.0.dev1.dist-info}/top_level.txt +0 -0
  30. {meerschaum-2.5.1.dist-info → meerschaum-2.6.0.dev1.dist-info}/zip-safe +0 -0
@@ -90,8 +90,13 @@ def parse_datetime(dt_str: str) -> Union[datetime, int, str]:
90
90
  except Exception as e:
91
91
  dt = None
92
92
  if dt is None:
93
- from meerschaum.utils.warnings import warn, error
93
+ from meerschaum.utils.warnings import error
94
94
  error(f"'{dt_str}' is not a valid datetime format.", stack=False)
95
+
96
+ if isinstance(dt, datetime):
97
+ from meerschaum.utils.dtypes import coerce_timezone
98
+ dt = coerce_timezone(dt)
99
+
95
100
  return dt
96
101
 
97
102
 
@@ -65,24 +65,24 @@ def _complete_edit(
65
65
  from meerschaum._internal.shell import default_action_completer
66
66
  return default_action_completer(action=(['edit'] + action), **kw)
67
67
 
68
- def _edit_config(action : Optional[List[str]] = None, **kw : Any) -> SuccessTuple:
68
+
69
+ def _edit_config(action: Optional[List[str]] = None, **kw : Any) -> SuccessTuple:
69
70
  """
70
71
  Edit Meerschaum configuration files.
71
-
72
+
72
73
  Specify a specific configuration key to edit.
73
74
  Defaults to editing `meerschaum` configuration (connectors, instance, etc.).
74
-
75
+
75
76
  Examples:
76
77
  ```
77
78
  ### Edit the main 'meerschaum' configuration.
78
79
  edit config
79
-
80
+
80
81
  ### Edit 'system' configuration.
81
82
  edit config system
82
-
83
+
83
84
  ### Create a new configuration file called 'myconfig'.
84
85
  edit config myconfig
85
-
86
86
  ```
87
87
  """
88
88
  from meerschaum.config._edit import edit_config
meerschaum/actions/sql.py CHANGED
@@ -14,6 +14,7 @@ exec_methods = {
14
14
  'exec',
15
15
  }
16
16
 
17
+
17
18
  def sql(
18
19
  action: Optional[List[str]] = None,
19
20
  gui: bool = False,
@@ -22,40 +23,40 @@ def sql(
22
23
  **kw: Any
23
24
  ):
24
25
  """Execute a SQL query or launch an interactive CLI. All positional arguments are optional.
25
-
26
+
26
27
  Usage:
27
28
  `sql {label} {method} {query / table}`
28
-
29
+
29
30
  Options:
30
31
  - `sql {label}`
31
32
  Launch an interactive CLI. If {label} is omitted, use 'main'.
32
-
33
+
33
34
  - `sql {label} read [query / table]`
34
35
  Read a table or query as a pandas DataFrame and print the result.
35
-
36
+
36
37
  - `sql {label} exec [query]`
37
38
  Execute a query and print the success status.
38
-
39
+
39
40
  Examples:
40
41
  - `sql`
41
42
  Open an interactive CLI for `sql:main`.
42
-
43
+
43
44
  - `sql local`
44
45
  Open an interactive CLI for `sql:local`.
45
-
46
+
46
47
  - `sql table`
47
48
  Read from `table` on `sql:main`
48
49
  (translates to `SELECT * FROM table`).
49
-
50
+
50
51
  - `sql local table`
51
52
  Read from `table` on `sql:local`.
52
-
53
+
53
54
  - `sql local read table`
54
55
  Read from `table` on `sql:local`.
55
-
56
+
56
57
  - `sql "SELECT * FROM table WHERE id = 1"`
57
58
  Execute the above query on `sql:main` and print the results.
58
-
59
+
59
60
  - `sql local exec "INSERT INTO table (id) VALUES (1)"
60
61
  Execute the above query on `sql:local`.
61
62
  """
@@ -7,44 +7,71 @@ Functions for editing the configuration file
7
7
  """
8
8
 
9
9
  from __future__ import annotations
10
- import sys
11
10
  import pathlib
12
11
  from meerschaum.utils.typing import Optional, Any, SuccessTuple, Mapping, Dict, List, Union
13
12
 
13
+
14
14
  def edit_config(
15
- keys : Optional[List[str]] = None,
16
- params : Optional[Dict[str, Any]] = None,
17
- debug : bool = False,
18
- **kw : Any
19
- ) -> SuccessTuple:
15
+ keys: Optional[List[str]] = None,
16
+ params: Optional[Dict[str, Any]] = None,
17
+ debug: bool = False,
18
+ **kw: Any
19
+ ) -> SuccessTuple:
20
20
  """Edit the configuration files."""
21
21
  from meerschaum.config import get_config, config
22
- from meerschaum.config._read_config import get_keyfile_path
22
+ from meerschaum.config._read_config import get_keyfile_path, read_config
23
23
  from meerschaum.config._paths import CONFIG_DIR_PATH
24
24
  from meerschaum.utils.packages import reload_meerschaum
25
25
  from meerschaum.utils.misc import edit_file
26
- from meerschaum.utils.debug import dprint
26
+ from meerschaum.utils.warnings import warn, dprint
27
+ from meerschaum.utils.prompt import prompt
27
28
 
28
29
  if keys is None:
29
30
  keys = []
30
31
 
31
- for k in keys:
32
- ### If defined in default, create the config file.
33
- if isinstance(config, dict) and k in config:
34
- del config[k]
35
- get_config(k, write_missing=True, warn=False)
36
- edit_file(get_keyfile_path(k, create_new=True))
32
+ def _edit_key(key: str):
33
+ while True:
34
+ ### If defined in default, create the config file.
35
+ key_config = config.pop(key, None)
36
+ keyfile_path = get_keyfile_path(key, create_new=True)
37
+ get_config(key, write_missing=True, warn=False)
38
+
39
+ edit_file(get_keyfile_path(key, create_new=True))
40
+
41
+ ### TODO: verify that the file is valid. Retry if not.
42
+ try:
43
+ new_key_config = read_config(
44
+ CONFIG_DIR_PATH,
45
+ [key],
46
+ write_missing=False,
47
+ raise_parsing_errors=True,
48
+ )
49
+ except Exception:
50
+ if key_config:
51
+ config[key] = key_config
52
+ warn(f"Could not parse key '{key}'.", stack=False)
53
+ _ = prompt(f"Press [Enter] to edit '{keyfile_path}', [CTRL+C] to exit.")
54
+ continue
55
+
56
+ if new_key_config:
57
+ break
58
+
59
+ try:
60
+ for k in keys:
61
+ _edit_key(k)
62
+ except KeyboardInterrupt:
63
+ return False, f""
37
64
 
38
65
  reload_meerschaum(debug=debug)
39
66
  return (True, "Success")
40
67
 
41
68
 
42
69
  def write_config(
43
- config_dict: Optional[Dict[str, Any]] = None,
44
- directory: Union[str, pathlib.Path, None] = None,
45
- debug: bool = False,
46
- **kw : Any
47
- ) -> bool:
70
+ config_dict: Optional[Dict[str, Any]] = None,
71
+ directory: Union[str, pathlib.Path, None] = None,
72
+ debug: bool = False,
73
+ **kw: Any
74
+ ) -> bool:
48
75
  """Write YAML and JSON files to the configuration directory.
49
76
 
50
77
  Parameters
@@ -6,22 +6,26 @@ Import the config yaml file
6
6
  """
7
7
 
8
8
  from __future__ import annotations
9
+ import pathlib
10
+
9
11
  from meerschaum.utils.typing import Optional, Dict, Any, List, Tuple, Union
10
12
  from meerschaum.config import get_config
11
13
 
14
+
12
15
  def read_config(
13
- directory: Optional[str] = None,
16
+ directory: Union[pathlib.Path, str, None] = None,
14
17
  keys: Optional[List[str]] = None,
15
- write_missing : bool = True,
16
- substitute : bool = True,
17
- with_filenames : bool = False,
18
+ write_missing: bool = True,
19
+ substitute: bool = True,
20
+ with_filenames: bool = False,
21
+ raise_parsing_errors: bool = False,
18
22
  ) -> Union[Dict[str, Any], Tuple[Dict[str, Any], List[str]]]:
19
23
  """
20
24
  Read the configuration directory.
21
25
 
22
26
  Parameters
23
27
  ----------
24
- directory: Optional[str], default None
28
+ directory: Union[pathlib.Path, str, None], default None
25
29
  The directory with configuration files (.json and .yaml).
26
30
 
27
31
  keys: Optional[List[str]], default None
@@ -36,7 +40,10 @@ def read_config(
36
40
 
37
41
  with_filename: bool, default False
38
42
  If `True`, return a tuple of the configuration dictionary with a list of read filenames.
39
-
43
+
44
+ raise_parsing_errors: bool, default False
45
+ If `True`, re-raise parsing exceptions.
46
+
40
47
  Examples
41
48
  --------
42
49
  >>> read_config(keys=['meerschaum'], with_filename=True)
@@ -63,9 +70,9 @@ def read_config(
63
70
 
64
71
  default_filetype = STATIC_CONFIG['config']['default_filetype']
65
72
  filetype_loaders = {
66
- 'yml' : yaml.load,
67
- 'yaml' : yaml.load,
68
- 'json' : json.load,
73
+ 'yml': yaml.load,
74
+ 'yaml': yaml.load,
75
+ 'json': json.load,
69
76
  }
70
77
 
71
78
  ### Construct filekeys (files to parse).
@@ -167,6 +174,8 @@ def read_config(
167
174
  _config_key = filetype_loaders[_type](f)
168
175
  except Exception as e:
169
176
  print(f"Error processing file: {filepath}")
177
+ if raise_parsing_errors:
178
+ raise e
170
179
  import traceback
171
180
  traceback.print_exc()
172
181
  _config_key = {}
@@ -184,6 +193,8 @@ def read_config(
184
193
  config[symlinks_key][key] = _single_key_config[symlinks_key][key]
185
194
  break
186
195
  except Exception as e:
196
+ if raise_parsing_errors:
197
+ raise e
187
198
  print(f"Unable to parse {filename}!")
188
199
  import traceback
189
200
  traceback.print_exc()
@@ -2,4 +2,4 @@
2
2
  Specify the Meerschaum release version.
3
3
  """
4
4
 
5
- __version__ = "2.5.1"
5
+ __version__ = "2.6.0.dev1"
@@ -39,7 +39,7 @@ valkey_password = 'MRSM{meerschaum:connectors:valkey:main:password}'
39
39
 
40
40
  env_dict = {
41
41
  'COMPOSE_PROJECT_NAME': 'mrsm',
42
- 'TIMESCALEDB_VERSION': 'latest-pg16-oss',
42
+ 'TIMESCALEDB_VERSION': 'latest-pg16',
43
43
  'POSTGRES_USER': db_user,
44
44
  'POSTGRES_PASSWORD': db_pass,
45
45
  'POSTGRES_DB': db_base,
@@ -404,7 +404,7 @@ def get_create_index_queries(
404
404
  indices = pipe.indices
405
405
 
406
406
  _datetime = pipe.get_columns('datetime', error=False)
407
- _datetime_type = pipe.dtypes.get(_datetime, 'datetime64[ns]')
407
+ _datetime_type = pipe.dtypes.get(_datetime, 'datetime64[ns, UTC]')
408
408
  _datetime_name = (
409
409
  sql_item_name(_datetime, self.flavor, None)
410
410
  if _datetime is not None else None
@@ -738,7 +738,7 @@ def get_pipe_data(
738
738
  dt_type = dtypes.get(_dt, 'object').lower()
739
739
  if 'datetime' not in dt_type:
740
740
  if 'int' not in dt_type:
741
- dtypes[_dt] = 'datetime64[ns]'
741
+ dtypes[_dt] = 'datetime64[ns, UTC]'
742
742
  existing_cols = pipe.get_columns_types(debug=debug)
743
743
  select_columns = (
744
744
  [
@@ -1197,7 +1197,12 @@ def sync_pipe(
1197
1197
  A `SuccessTuple` of success (`bool`) and message (`str`).
1198
1198
  """
1199
1199
  from meerschaum.utils.packages import import_pandas
1200
- from meerschaum.utils.sql import get_update_queries, sql_item_name, json_flavors, update_queries
1200
+ from meerschaum.utils.sql import (
1201
+ get_update_queries,
1202
+ sql_item_name,
1203
+ update_queries,
1204
+ get_create_table_queries,
1205
+ )
1201
1206
  from meerschaum.utils.misc import generate_password
1202
1207
  from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols
1203
1208
  from meerschaum.utils.dtypes import are_dtypes_equal
@@ -1232,7 +1237,6 @@ def sync_pipe(
1232
1237
 
1233
1238
  ### if table does not exist, create it with indices
1234
1239
  is_new = False
1235
- add_cols_query = None
1236
1240
  if not pipe.exists(debug=debug):
1237
1241
  check_existing = False
1238
1242
  is_new = True
@@ -1252,9 +1256,7 @@ def sync_pipe(
1252
1256
 
1253
1257
  ### NOTE: Oracle SQL < 23c (2023) and SQLite does not support booleans,
1254
1258
  ### so infer bools and persist them to `dtypes`.
1255
- ### MSSQL supports `BIT` for booleans, but we coerce bools to int for MSSQL
1256
- ### to avoid merge issues.
1257
- if self.flavor in ('oracle', 'sqlite', 'mssql', 'mysql', 'mariadb'):
1259
+ if self.flavor in ('oracle', 'sqlite', 'mysql', 'mariadb'):
1258
1260
  pipe_dtypes = pipe.dtypes
1259
1261
  new_bool_cols = {
1260
1262
  col: 'bool[pyarrow]'
@@ -1309,7 +1311,60 @@ def sync_pipe(
1309
1311
  'schema': self.get_pipe_schema(pipe),
1310
1312
  })
1311
1313
 
1314
+ primary_key = pipe.columns.get('primary', None)
1315
+ new_dtypes = {
1316
+ **{
1317
+ col: str(typ)
1318
+ for col, typ in unseen_df.dtypes.items()
1319
+ },
1320
+ **{
1321
+ col: 'int'
1322
+ for col_ix, col in pipe.columns.items()
1323
+ if col_ix != 'primary'
1324
+ },
1325
+ **pipe.dtypes
1326
+ } if is_new else {}
1327
+ autoincrement = (
1328
+ pipe.parameters.get('autoincrement', False)
1329
+ or (is_new and primary_key and primary_key not in new_dtypes)
1330
+ )
1331
+ if autoincrement and autoincrement not in pipe.parameters:
1332
+ pipe.parameters['autoincrement'] = autoincrement
1333
+ edit_success, edit_msg = pipe.edit(debug=debug)
1334
+ if not edit_success:
1335
+ return edit_success, edit_msg
1336
+
1337
+ if autoincrement and primary_key and primary_key not in df.columns:
1338
+ if unseen_df is not None and primary_key in unseen_df.columns:
1339
+ del unseen_df[primary_key]
1340
+ if update_df is not None and primary_key in update_df.columns:
1341
+ del update_df[primary_key]
1342
+ if delta_df is not None and primary_key in delta_df.columns:
1343
+ del delta_df[primary_key]
1344
+
1345
+ if is_new:
1346
+ if autoincrement:
1347
+ _ = new_dtypes.pop(primary_key, None)
1348
+
1349
+ ### TODO: see if this can be removed
1350
+ if 'datetime' in pipe.columns and self.flavor == 'timescaledb':
1351
+ primary_key = None
1352
+
1353
+ create_table_queries = get_create_table_queries(
1354
+ new_dtypes,
1355
+ pipe.target,
1356
+ self.flavor,
1357
+ schema=self.get_pipe_schema(pipe),
1358
+ primary_key=primary_key,
1359
+ )
1360
+ create_success = all(
1361
+ self.exec_queries(create_table_queries, break_on_error=True, rollback=True, debug=debug)
1362
+ )
1363
+ if not create_success:
1364
+ warn(f"Failed to create '{pipe.target}'. Continuing...")
1365
+
1312
1366
  stats = self.to_sql(unseen_df, **unseen_kw)
1367
+
1313
1368
  if is_new:
1314
1369
  if not self.create_indices(pipe, debug=debug):
1315
1370
  warn(f"Failed to create indices for {pipe}. Continuing...")
@@ -1358,7 +1413,7 @@ def sync_pipe(
1358
1413
  ]
1359
1414
  update_queries = get_update_queries(
1360
1415
  pipe.target,
1361
- temp_target,
1416
+ temp_target,
1362
1417
  self,
1363
1418
  join_cols,
1364
1419
  upsert=upsert,
@@ -1960,7 +2015,7 @@ def get_sync_time(
1960
2015
  table = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
1961
2016
 
1962
2017
  dt_col = pipe.columns.get('datetime', None)
1963
- dt_type = pipe.dtypes.get(dt_col, 'datetime64[ns]')
2018
+ dt_type = pipe.dtypes.get(dt_col, 'datetime64[ns, UTC]')
1964
2019
  if not dt_col:
1965
2020
  _dt = pipe.guess_datetime()
1966
2021
  dt = sql_item_name(_dt, self.flavor, None) if _dt else None
@@ -2366,7 +2421,7 @@ def get_pipe_columns_types(
2366
2421
  ----------
2367
2422
  pipe: mrsm.Pipe:
2368
2423
  The pipe to get the columns for.
2369
-
2424
+
2370
2425
  Returns
2371
2426
  -------
2372
2427
  A dictionary of columns names (`str`) and types (`str`).
@@ -2381,17 +2436,18 @@ def get_pipe_columns_types(
2381
2436
  }
2382
2437
  >>>
2383
2438
  """
2439
+ from meerschaum.utils.sql import get_table_cols_types
2384
2440
  if not pipe.exists(debug=debug):
2385
2441
  return {}
2386
2442
 
2387
- if self.flavor == 'duckdb':
2388
- from meerschaum.utils.sql import get_table_cols_types
2389
- return get_table_cols_types(
2390
- pipe.target,
2391
- self,
2392
- flavor=self.flavor,
2393
- schema=self.get_pipe_schema(pipe),
2394
- )
2443
+ # if self.flavor not in ('oracle', 'mysql', 'mariadb'):
2444
+ return get_table_cols_types(
2445
+ pipe.target,
2446
+ self,
2447
+ flavor=self.flavor,
2448
+ schema=self.get_pipe_schema(pipe),
2449
+ debug=debug,
2450
+ )
2395
2451
 
2396
2452
  table_columns = {}
2397
2453
  try:
@@ -2823,11 +2879,11 @@ def get_alter_columns_queries(
2823
2879
 
2824
2880
 
2825
2881
  def get_to_sql_dtype(
2826
- self,
2827
- pipe: 'mrsm.Pipe',
2828
- df: 'pd.DataFrame',
2829
- update_dtypes: bool = True,
2830
- ) -> Dict[str, 'sqlalchemy.sql.visitors.TraversibleType']:
2882
+ self,
2883
+ pipe: 'mrsm.Pipe',
2884
+ df: 'pd.DataFrame',
2885
+ update_dtypes: bool = True,
2886
+ ) -> Dict[str, 'sqlalchemy.sql.visitors.TraversibleType']:
2831
2887
  """
2832
2888
  Given a pipe and DataFrame, return the `dtype` dictionary for `to_sql()`.
2833
2889
 
@@ -2947,7 +3003,7 @@ def deduplicate_pipe(
2947
3003
  duplicates_cte_name = sql_item_name('dups', self.flavor, None)
2948
3004
  duplicate_row_number_name = sql_item_name('dup_row_num', self.flavor, None)
2949
3005
  previous_row_number_name = sql_item_name('prev_row_num', self.flavor, None)
2950
-
3006
+
2951
3007
  index_list_str = (
2952
3008
  sql_item_name(dt_col, self.flavor, None)
2953
3009
  if dt_col
@@ -17,8 +17,8 @@ from meerschaum.utils.warnings import warn
17
17
  ### database flavors that can use bulk insert
18
18
  _bulk_flavors = {'postgresql', 'timescaledb', 'citus'}
19
19
  ### flavors that do not support chunks
20
- _disallow_chunks_flavors = ['duckdb']
21
- _max_chunks_flavors = {'sqlite': 1000,}
20
+ _disallow_chunks_flavors = []
21
+ _max_chunks_flavors = {'sqlite': 1000}
22
22
  SKIP_READ_TRANSACTION_FLAVORS: list[str] = ['mssql']
23
23
 
24
24
 
@@ -123,7 +123,8 @@ def read(
123
123
  if chunks is not None and chunks <= 0:
124
124
  return []
125
125
  from meerschaum.utils.sql import sql_item_name, truncate_item_name
126
- from meerschaum.utils.dtypes.sql import NUMERIC_PRECISION_FLAVORS
126
+ from meerschaum.utils.dtypes import are_dtypes_equal, coerce_timezone
127
+ from meerschaum.utils.dtypes.sql import NUMERIC_PRECISION_FLAVORS, TIMEZONE_NAIVE_FLAVORS
127
128
  from meerschaum.utils.packages import attempt_import, import_pandas
128
129
  from meerschaum.utils.pool import get_pool
129
130
  from meerschaum.utils.dataframe import chunksize_to_npartitions, get_numeric_cols
@@ -139,6 +140,16 @@ def read(
139
140
  if is_dask:
140
141
  chunksize = None
141
142
  schema = schema or self.schema
143
+ utc_dt_cols = [
144
+ col
145
+ for col, typ in dtype.items()
146
+ if are_dtypes_equal(typ, 'datetime') and 'utc' in typ.lower()
147
+ ] if dtype else []
148
+
149
+ if dtype and utc_dt_cols and self.flavor in TIMEZONE_NAIVE_FLAVORS:
150
+ dtype = dtype.copy()
151
+ for col in utc_dt_cols:
152
+ dtype[col] = 'datetime64[ns]'
142
153
 
143
154
  pool = get_pool(workers=workers)
144
155
  sqlalchemy = attempt_import("sqlalchemy")
@@ -162,7 +173,6 @@ def read(
162
173
  )
163
174
  chunksize = _max_chunks_flavors[self.flavor]
164
175
 
165
- ### NOTE: A bug in duckdb_engine does not allow for chunks.
166
176
  if chunksize is not None and self.flavor in _disallow_chunks_flavors:
167
177
  chunksize = None
168
178
 
@@ -206,6 +216,9 @@ def read(
206
216
  chunk_list = []
207
217
  chunk_hook_results = []
208
218
  def _process_chunk(_chunk, _retry_on_failure: bool = True):
219
+ if self.flavor in TIMEZONE_NAIVE_FLAVORS:
220
+ for col in utc_dt_cols:
221
+ _chunk[col] = coerce_timezone(_chunk[col], strip_timezone=False)
209
222
  if not as_hook_results:
210
223
  chunk_list.append(_chunk)
211
224
  if chunk_hook is None:
@@ -765,7 +778,7 @@ def to_sql(
765
778
  DROP_IF_EXISTS_FLAVORS,
766
779
  )
767
780
  from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
768
- from meerschaum.utils.dtypes import are_dtypes_equal, quantize_decimal
781
+ from meerschaum.utils.dtypes import are_dtypes_equal, quantize_decimal, coerce_timezone
769
782
  from meerschaum.utils.dtypes.sql import (
770
783
  NUMERIC_PRECISION_FLAVORS,
771
784
  PD_TO_SQLALCHEMY_DTYPES_FLAVORS,
@@ -848,7 +861,6 @@ def to_sql(
848
861
  if not success:
849
862
  warn(f"Unable to drop {name}")
850
863
 
851
-
852
864
  ### Enforce NVARCHAR(2000) as text instead of CLOB.
853
865
  dtype = to_sql_kw.get('dtype', {})
854
866
  for col, typ in df.dtypes.items():
@@ -858,11 +870,18 @@ def to_sql(
858
870
  dtype[col] = sqlalchemy.types.INTEGER
859
871
  to_sql_kw['dtype'] = dtype
860
872
  elif self.flavor == 'mssql':
873
+ pass
874
+ ### TODO clean this up
875
+ # dtype = to_sql_kw.get('dtype', {})
876
+ # for col, typ in df.dtypes.items():
877
+ # if are_dtypes_equal(str(typ), 'bool'):
878
+ # dtype[col] = sqlalchemy.types.INTEGER
879
+ # to_sql_kw['dtype'] = dtype
880
+ elif self.flavor == 'duckdb':
861
881
  dtype = to_sql_kw.get('dtype', {})
862
- for col, typ in df.dtypes.items():
863
- if are_dtypes_equal(str(typ), 'bool'):
864
- dtype[col] = sqlalchemy.types.INTEGER
865
- to_sql_kw['dtype'] = dtype
882
+ dt_cols = [col for col, typ in df.dtypes.items() if are_dtypes_equal(str(typ), 'datetime')]
883
+ for col in dt_cols:
884
+ df[col] = coerce_timezone(df[col], strip_utc=False)
866
885
 
867
886
  ### Check for JSON columns.
868
887
  if self.flavor not in json_flavors:
@@ -706,7 +706,7 @@ def get_sync_time(
706
706
  """
707
707
  from meerschaum.utils.dtypes import are_dtypes_equal
708
708
  dt_col = pipe.columns.get('datetime', None)
709
- dt_typ = pipe.dtypes.get(dt_col, 'datetime64[ns]')
709
+ dt_typ = pipe.dtypes.get(dt_col, 'datetime64[ns, UTC]')
710
710
  if not dt_col:
711
711
  return None
712
712
 
@@ -153,6 +153,7 @@ class Pipe:
153
153
  dtypes: Optional[Dict[str, str]] = None,
154
154
  instance: Optional[Union[str, InstanceConnector]] = None,
155
155
  temporary: bool = False,
156
+ upsert: Optional[bool] = None,
156
157
  mrsm_instance: Optional[Union[str, InstanceConnector]] = None,
157
158
  cache: bool = False,
158
159
  debug: bool = False,
@@ -201,6 +202,9 @@ class Pipe:
201
202
  instance: Optional[Union[str, InstanceConnector]], default None
202
203
  Alias for `mrsm_instance`. If `mrsm_instance` is supplied, this value is ignored.
203
204
 
205
+ upsert: Optional[bool], default None
206
+ If `True`, set `upsert` to `True` in the parameters.
207
+
204
208
  temporary: bool, default False
205
209
  If `True`, prevent instance tables (pipes, users, plugins) from being created.
206
210
 
@@ -268,7 +272,7 @@ class Pipe:
268
272
  or indexes
269
273
  or self._attributes.get('parameters', {}).get('indices', None)
270
274
  or self._attributes.get('parameters', {}).get('indexes', None)
271
- ) or columns
275
+ )
272
276
  if isinstance(indices, dict):
273
277
  indices_key = (
274
278
  'indexes'
@@ -292,6 +296,9 @@ class Pipe:
292
296
  elif dtypes is not None:
293
297
  warn(f"The provided dtypes are of invalid type '{type(dtypes)}'.")
294
298
 
299
+ if isinstance(upsert, bool):
300
+ self._attributes['parameters']['upsert'] = upsert
301
+
295
302
  ### NOTE: The parameters dictionary is {} by default.
296
303
  ### A Pipe may be registered without parameters, then edited,
297
304
  ### or a Pipe may be registered with parameters set in-memory first.
@@ -308,7 +315,6 @@ class Pipe:
308
315
 
309
316
  self._cache = cache and get_config('system', 'experimental', 'cache')
310
317
 
311
-
312
318
  @property
313
319
  def meta(self):
314
320
  """
@@ -321,7 +327,6 @@ class Pipe:
321
327
  'instance': self.instance_keys,
322
328
  }
323
329
 
324
-
325
330
  def keys(self) -> List[str]:
326
331
  """
327
332
  Return the ordered keys for this pipe.
@@ -332,7 +337,6 @@ class Pipe:
332
337
  if key != 'instance'
333
338
  }
334
339
 
335
-
336
340
  @property
337
341
  def instance_connector(self) -> Union[InstanceConnector, None]:
338
342
  """
@@ -369,7 +373,6 @@ class Pipe:
369
373
  return None
370
374
  return self._connector
371
375
 
372
-
373
376
  @property
374
377
  def cache_connector(self) -> Union[meerschaum.connectors.sql.SQLConnector, None]:
375
378
  """
@@ -391,7 +394,6 @@ class Pipe:
391
394
 
392
395
  return self._cache_connector
393
396
 
394
-
395
397
  @property
396
398
  def cache_pipe(self) -> Union['meerschaum.Pipe', None]:
397
399
  """
@@ -433,11 +435,9 @@ class Pipe:
433
435
 
434
436
  return self._cache_pipe
435
437
 
436
-
437
438
  def __str__(self, ansi: bool=False):
438
439
  return pipe_repr(self, ansi=ansi)
439
440
 
440
-
441
441
  def __eq__(self, other):
442
442
  try:
443
443
  return (
@@ -489,7 +489,6 @@ class Pipe:
489
489
  """
490
490
  self.__init__(**_state)
491
491
 
492
-
493
492
  def __getitem__(self, key: str) -> Any:
494
493
  """
495
494
  Index the pipe's attributes.