meerschaum 2.5.0__py3-none-any.whl → 2.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. meerschaum/_internal/arguments/_parser.py +6 -1
  2. meerschaum/_internal/entry.py +16 -5
  3. meerschaum/actions/edit.py +6 -6
  4. meerschaum/actions/sql.py +12 -11
  5. meerschaum/api/dash/pages/login.py +17 -17
  6. meerschaum/api/dash/pipes.py +104 -13
  7. meerschaum/api/routes/_pipes.py +58 -40
  8. meerschaum/api/routes/_webterm.py +1 -0
  9. meerschaum/config/_edit.py +46 -19
  10. meerschaum/config/_read_config.py +20 -9
  11. meerschaum/config/_version.py +1 -1
  12. meerschaum/config/stack/__init__.py +1 -1
  13. meerschaum/config/static/__init__.py +1 -0
  14. meerschaum/connectors/api/_APIConnector.py +1 -0
  15. meerschaum/connectors/api/_pipes.py +39 -8
  16. meerschaum/connectors/sql/_SQLConnector.py +4 -3
  17. meerschaum/connectors/sql/_pipes.py +511 -118
  18. meerschaum/connectors/sql/_sql.py +55 -15
  19. meerschaum/connectors/valkey/_ValkeyConnector.py +3 -2
  20. meerschaum/connectors/valkey/_pipes.py +11 -5
  21. meerschaum/core/Pipe/__init__.py +27 -9
  22. meerschaum/core/Pipe/_attributes.py +181 -18
  23. meerschaum/core/Pipe/_clear.py +10 -8
  24. meerschaum/core/Pipe/_copy.py +2 -0
  25. meerschaum/core/Pipe/_data.py +65 -17
  26. meerschaum/core/Pipe/_deduplicate.py +30 -28
  27. meerschaum/core/Pipe/_dtypes.py +4 -4
  28. meerschaum/core/Pipe/_fetch.py +12 -10
  29. meerschaum/core/Pipe/_sync.py +28 -11
  30. meerschaum/core/Pipe/_verify.py +52 -49
  31. meerschaum/utils/dataframe.py +64 -34
  32. meerschaum/utils/dtypes/__init__.py +25 -6
  33. meerschaum/utils/dtypes/sql.py +76 -33
  34. meerschaum/utils/misc.py +57 -24
  35. meerschaum/utils/packages/_packages.py +2 -1
  36. meerschaum/utils/schedule.py +7 -5
  37. meerschaum/utils/sql.py +697 -44
  38. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/METADATA +5 -3
  39. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/RECORD +45 -45
  40. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/WHEEL +1 -1
  41. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/LICENSE +0 -0
  42. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/NOTICE +0 -0
  43. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/entry_points.txt +0 -0
  44. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/top_level.txt +0 -0
  45. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/zip-safe +0 -0
@@ -94,14 +94,14 @@ def filter_unseen_df(
94
94
  ----------
95
95
  old_df: 'pd.DataFrame'
96
96
  The original (target) dataframe. Acts as a filter on the `new_df`.
97
-
97
+
98
98
  new_df: 'pd.DataFrame'
99
99
  The fetched (source) dataframe. Rows that are contained in `old_df` are removed.
100
100
 
101
101
  safe_copy: bool, default True
102
102
  If `True`, create a copy before comparing and modifying the dataframes.
103
103
  Setting to `False` may mutate the DataFrames.
104
-
104
+
105
105
  dtypes: Optional[Dict[str, Any]], default None
106
106
  Optionally specify the datatypes of the dataframe.
107
107
 
@@ -234,8 +234,10 @@ def filter_unseen_df(
234
234
  cast_dt_cols = True
235
235
  try:
236
236
  for col, typ in dt_dtypes.items():
237
- tz = typ.split(',')[-1].strip() if ',' in typ else None
238
- new_df[col] = coerce_timezone(pd.to_datetime(new_df[col], utc=True))
237
+ if col in old_df.columns:
238
+ old_df[col] = coerce_timezone(old_df[col])
239
+ if col in new_df.columns:
240
+ new_df[col] = coerce_timezone(new_df[col])
239
241
  cast_dt_cols = False
240
242
  except Exception as e:
241
243
  warn(f"Could not cast datetime columns:\n{e}")
@@ -363,6 +365,7 @@ def filter_unseen_df(
363
365
  def parse_df_datetimes(
364
366
  df: 'pd.DataFrame',
365
367
  ignore_cols: Optional[Iterable[str]] = None,
368
+ strip_timezone: bool = False,
366
369
  chunksize: Optional[int] = None,
367
370
  dtype_backend: str = 'numpy_nullable',
368
371
  debug: bool = False,
@@ -378,6 +381,9 @@ def parse_df_datetimes(
378
381
  ignore_cols: Optional[Iterable[str]], default None
379
382
  If provided, do not attempt to coerce these columns as datetimes.
380
383
 
384
+ strip_timezone: bool, default False
385
+ If `True`, remove the UTC `tzinfo` property.
386
+
381
387
  chunksize: Optional[int], default None
382
388
  If the pandas implementation is `'dask'`, use this chunksize for the distributed dataframe.
383
389
 
@@ -385,7 +391,7 @@ def parse_df_datetimes(
385
391
  If `df` is not a DataFrame and new one needs to be constructed,
386
392
  use this as the datatypes backend.
387
393
  Accepted values are 'numpy_nullable' and 'pyarrow'.
388
-
394
+
389
395
  debug: bool, default False
390
396
  Verbosity toggle.
391
397
 
@@ -447,7 +453,7 @@ def parse_df_datetimes(
447
453
  for doc in df
448
454
  ] for k in keys
449
455
  },
450
- npartitions = npartitions,
456
+ npartitions=npartitions,
451
457
  )
452
458
  elif isinstance(df, dict):
453
459
  df = pd.DataFrame.from_dict(df, npartitions=npartitions)
@@ -480,7 +486,7 @@ def parse_df_datetimes(
480
486
  if len(cols_to_inspect) == 0:
481
487
  if debug:
482
488
  dprint(f"All columns are ignored, skipping datetime detection...")
483
- return df
489
+ return df.fillna(pandas.NA)
484
490
 
485
491
  ### apply regex to columns to determine which are ISO datetimes
486
492
  iso_dt_regex = r'\d{4}-\d{2}-\d{2}.\d{2}\:\d{2}\:\d+'
@@ -493,21 +499,25 @@ def parse_df_datetimes(
493
499
  if not datetime_cols:
494
500
  if debug:
495
501
  dprint("No columns detected as datetimes, returning...")
496
- return df
502
+ return df.fillna(pandas.NA)
497
503
 
498
504
  if debug:
499
505
  dprint("Converting columns to datetimes: " + str(datetime_cols))
500
506
 
501
507
  try:
502
508
  if not using_dask:
503
- df[datetime_cols] = df[datetime_cols].apply(pd.to_datetime, utc=True)
509
+ df[datetime_cols] = df[datetime_cols].apply(
510
+ pd.to_datetime,
511
+ utc=True,
512
+ format='ISO8601',
513
+ )
504
514
  else:
505
515
  df[datetime_cols] = df[datetime_cols].apply(
506
516
  pd.to_datetime,
507
517
  utc=True,
508
518
  axis=1,
509
519
  meta={
510
- col: 'datetime64[ns]'
520
+ col: 'datetime64[ns, UTC]'
511
521
  for col in datetime_cols
512
522
  }
513
523
  )
@@ -517,13 +527,17 @@ def parse_df_datetimes(
517
527
  + f"{traceback.format_exc()}"
518
528
  )
519
529
 
520
- for dt in datetime_cols:
521
- try:
522
- df[dt] = df[dt].dt.tz_localize(None)
523
- except Exception:
524
- warn(f"Unable to convert column '{dt}' to naive datetime:\n{traceback.format_exc()}")
530
+ if strip_timezone:
531
+ for dt in datetime_cols:
532
+ try:
533
+ df[dt] = df[dt].dt.tz_localize(None)
534
+ except Exception:
535
+ warn(
536
+ f"Unable to convert column '{dt}' to naive datetime:\n"
537
+ + f"{traceback.format_exc()}"
538
+ )
525
539
 
526
- return df
540
+ return df.fillna(pandas.NA)
527
541
 
528
542
 
529
543
  def get_unhashable_cols(df: 'pd.DataFrame') -> List[str]:
@@ -674,6 +688,7 @@ def enforce_dtypes(
674
688
  dtypes: Dict[str, str],
675
689
  safe_copy: bool = True,
676
690
  coerce_numeric: bool = True,
691
+ coerce_timezone: bool = True,
677
692
  debug: bool = False,
678
693
  ) -> 'pd.DataFrame':
679
694
  """
@@ -695,6 +710,9 @@ def enforce_dtypes(
695
710
  coerce_numeric: bool, default True
696
711
  If `True`, convert float and int collisions to numeric.
697
712
 
713
+ coerce_timezone: bool, default True
714
+ If `True`, convert datetimes to UTC.
715
+
698
716
  debug: bool, default False
699
717
  Verbosity toggle.
700
718
 
@@ -703,21 +721,18 @@ def enforce_dtypes(
703
721
  The Pandas DataFrame with the types enforced.
704
722
  """
705
723
  import json
706
- import traceback
707
- from decimal import Decimal
708
724
  from meerschaum.utils.debug import dprint
709
- from meerschaum.utils.warnings import warn
710
725
  from meerschaum.utils.formatting import pprint
711
- from meerschaum.config.static import STATIC_CONFIG
712
- from meerschaum.utils.packages import import_pandas
713
726
  from meerschaum.utils.dtypes import (
714
727
  are_dtypes_equal,
715
728
  to_pandas_dtype,
716
729
  is_dtype_numeric,
717
730
  attempt_cast_to_numeric,
718
731
  attempt_cast_to_uuid,
719
- coerce_timezone,
732
+ coerce_timezone as _coerce_timezone,
720
733
  )
734
+ pandas = mrsm.attempt_import('pandas')
735
+ is_dask = 'dask' in df.__module__
721
736
  if safe_copy:
722
737
  df = df.copy()
723
738
  if len(df.columns) == 0:
@@ -744,6 +759,11 @@ def enforce_dtypes(
744
759
  for col, typ in dtypes.items()
745
760
  if typ == 'uuid'
746
761
  ]
762
+ datetime_cols = [
763
+ col
764
+ for col, typ in dtypes.items()
765
+ if are_dtypes_equal(typ, 'datetime')
766
+ ]
747
767
  df_numeric_cols = get_numeric_cols(df)
748
768
  if debug:
749
769
  dprint("Desired data types:")
@@ -792,6 +812,13 @@ def enforce_dtypes(
792
812
  if debug:
793
813
  dprint(f"Unable to parse column '{col}' as UUID:\n{e}")
794
814
 
815
+ if datetime_cols and coerce_timezone:
816
+ if debug:
817
+ dprint(f"Checking for datetime conversion: {datetime_cols}")
818
+ for col in datetime_cols:
819
+ if col in df.columns:
820
+ df[col] = _coerce_timezone(df[col])
821
+
795
822
  df_dtypes = {c: str(t) for c, t in df.dtypes.items()}
796
823
  if are_dtypes_equal(df_dtypes, pipe_pandas_dtypes):
797
824
  if debug:
@@ -826,8 +853,7 @@ def enforce_dtypes(
826
853
  if debug:
827
854
  dprint(
828
855
  "The incoming DataFrame has mostly the same types, skipping enforcement."
829
- + "The only detected difference was in the following datetime columns.\n"
830
- + " Timezone information may be stripped."
856
+ + "The only detected difference was in the following datetime columns."
831
857
  )
832
858
  pprint(detected_dt_cols)
833
859
  return df
@@ -930,11 +956,15 @@ def get_datetime_bound_from_df(
930
956
  if datetime_column not in df.columns:
931
957
  return None
932
958
 
933
- dt_val = (
934
- df[datetime_column].min(skipna=True)
935
- if minimum else df[datetime_column].max(skipna=True)
936
- )
937
- if is_dask and dt_val is not None:
959
+ try:
960
+ dt_val = (
961
+ df[datetime_column].min(skipna=True)
962
+ if minimum
963
+ else df[datetime_column].max(skipna=True)
964
+ )
965
+ except Exception:
966
+ dt_val = pandas.NA
967
+ if is_dask and dt_val is not None and dt_val is not pandas.NA:
938
968
  dt_val = dt_val.compute()
939
969
 
940
970
  return (
@@ -1194,9 +1224,9 @@ def query_df(
1194
1224
  dtypes = {col: str(typ) for col, typ in df.dtypes.items()}
1195
1225
 
1196
1226
  if inplace:
1197
- df.infer_objects(copy=False).fillna(NA, inplace=True)
1227
+ df.fillna(NA, inplace=True)
1198
1228
  else:
1199
- df = df.infer_objects(copy=False).fillna(NA)
1229
+ df = df.infer_objects().fillna(NA)
1200
1230
 
1201
1231
  if isinstance(begin, str):
1202
1232
  begin = dateutil_parser.parse(begin)
@@ -1243,12 +1273,12 @@ def query_df(
1243
1273
  end_tz = end.tzinfo if end is not None else None
1244
1274
 
1245
1275
  if begin_tz is not None or end_tz is not None or df_tz is not None:
1246
- begin = coerce_timezone(begin)
1247
- end = coerce_timezone(end)
1276
+ begin = coerce_timezone(begin, strip_utc=False)
1277
+ end = coerce_timezone(end, strip_utc=False)
1248
1278
  if df_tz is not None:
1249
1279
  if debug:
1250
1280
  dprint(f"Casting column '{datetime_column}' to UTC...")
1251
- df[datetime_column] = coerce_timezone(df[datetime_column])
1281
+ df[datetime_column] = coerce_timezone(df[datetime_column], strip_utc=False)
1252
1282
  dprint(f"Using datetime bounds:\n{begin=}\n{end=}")
1253
1283
 
1254
1284
  in_ex_params = get_in_ex_params(params)
@@ -19,7 +19,7 @@ MRSM_PD_DTYPES: Dict[str, str] = {
19
19
  'json': 'object',
20
20
  'numeric': 'object',
21
21
  'uuid': 'object',
22
- 'datetime': 'datetime64[ns]',
22
+ 'datetime': 'datetime64[ns, UTC]',
23
23
  'bool': 'bool[pyarrow]',
24
24
  'int': 'Int64',
25
25
  'int8': 'Int8',
@@ -245,7 +245,10 @@ def quantize_decimal(x: Decimal, scale: int, precision: int) -> Decimal:
245
245
  return x
246
246
 
247
247
 
248
- def coerce_timezone(dt: Any) -> Any:
248
+ def coerce_timezone(
249
+ dt: Any,
250
+ strip_utc: bool = False,
251
+ ) -> Any:
249
252
  """
250
253
  Given a `datetime`, pandas `Timestamp` or `Series` of `Timestamp`,
251
254
  return a naive datetime in terms of UTC.
@@ -256,13 +259,29 @@ def coerce_timezone(dt: Any) -> Any:
256
259
  if isinstance(dt, int):
257
260
  return dt
258
261
 
259
- dt_is_series = hasattr(dt, 'dtype')
262
+ if isinstance(dt, str):
263
+ dateutil_parser = mrsm.attempt_import('dateutil.parser')
264
+ dt = dateutil_parser.parse(dt)
265
+
266
+ dt_is_series = hasattr(dt, 'dtype') and hasattr(dt, '__module__')
260
267
 
261
268
  if dt_is_series:
269
+ is_dask = 'dask' in dt.__module__
262
270
  pandas = mrsm.attempt_import('pandas')
263
- return pandas.to_datetime(dt, utc=True).apply(lambda x: x.replace(tzinfo=None))
271
+ dd = mrsm.attempt_import('dask.dataframe') if is_dask else None
272
+ dt_series = (
273
+ pandas.to_datetime(dt, utc=True)
274
+ if dd is None
275
+ else dd.to_datetime(dt, utc=True)
276
+ )
277
+ if strip_utc:
278
+ dt_series = dt_series.apply(lambda x: x.replace(tzinfo=None))
279
+
280
+ return dt_series
264
281
 
265
282
  if dt.tzinfo is None:
266
- return dt
283
+ if strip_utc:
284
+ return dt
285
+ return dt.replace(tzinfo=timezone.utc)
267
286
 
268
- return dt.astimezone(timezone.utc).replace(tzinfo=None)
287
+ return dt.astimezone(timezone.utc)
@@ -7,15 +7,16 @@ Utility functions for working with SQL data types.
7
7
  """
8
8
 
9
9
  from __future__ import annotations
10
- from meerschaum.utils.typing import Dict, Union, Tuple
10
+ from meerschaum.utils.typing import Dict, Union, Tuple, List
11
11
 
12
12
  NUMERIC_PRECISION_FLAVORS: Dict[str, Tuple[int, int]] = {
13
13
  'mariadb': (38, 20),
14
14
  'mysql': (38, 20),
15
15
  'mssql': (28, 10),
16
- 'duckdb': (15, 4),
16
+ 'duckdb': (15, 3),
17
17
  'sqlite': (15, 4),
18
18
  }
19
+ TIMEZONE_NAIVE_FLAVORS = {'oracle', 'mysql', 'mariadb'}
19
20
 
20
21
  ### MySQL doesn't allow for casting as BIGINT, so this is a workaround.
21
22
  DB_FLAVORS_CAST_DTYPES = {
@@ -49,6 +50,7 @@ DB_FLAVORS_CAST_DTYPES = {
49
50
  'NVARCHAR(2000)': 'NVARCHAR2(2000)',
50
51
  'NVARCHAR': 'NVARCHAR2(2000)',
51
52
  'NVARCHAR2': 'NVARCHAR2(2000)',
53
+ 'CHAR': 'CHAR(36)', # UUID columns
52
54
  },
53
55
  'mssql': {
54
56
  'NVARCHAR COLLATE "SQL Latin1 General CP1 CI AS"': 'NVARCHAR(MAX)',
@@ -78,7 +80,9 @@ DB_TO_PD_DTYPES: Dict[str, Union[str, Dict[str, str]]] = {
78
80
  'NUMBER': 'numeric',
79
81
  'NUMERIC': 'numeric',
80
82
  'TIMESTAMP': 'datetime64[ns]',
83
+ 'TIMESTAMP WITHOUT TIMEZONE': 'datetime64[ns]',
81
84
  'TIMESTAMP WITH TIMEZONE': 'datetime64[ns, UTC]',
85
+ 'TIMESTAMP WITH TIME ZONE': 'datetime64[ns, UTC]',
82
86
  'TIMESTAMPTZ': 'datetime64[ns, UTC]',
83
87
  'DATE': 'datetime64[ns]',
84
88
  'DATETIME': 'datetime64[ns]',
@@ -160,7 +164,7 @@ PD_TO_DB_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
160
164
  'mariadb': 'DATETIME',
161
165
  'mysql': 'DATETIME',
162
166
  'mssql': 'DATETIME2',
163
- 'oracle': 'DATE',
167
+ 'oracle': 'TIMESTAMP',
164
168
  'sqlite': 'DATETIME',
165
169
  'duckdb': 'TIMESTAMP',
166
170
  'citus': 'TIMESTAMP',
@@ -168,24 +172,37 @@ PD_TO_DB_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
168
172
  'default': 'DATETIME',
169
173
  },
170
174
  'datetime64[ns, UTC]': {
171
- 'timescaledb': 'TIMESTAMP',
172
- 'postgresql': 'TIMESTAMP',
173
- 'mariadb': 'TIMESTAMP',
174
- 'mysql': 'TIMESTAMP',
175
+ 'timescaledb': 'TIMESTAMPTZ',
176
+ 'postgresql': 'TIMESTAMPTZ',
177
+ 'mariadb': 'DATETIME',
178
+ 'mysql': 'DATETIME',
175
179
  'mssql': 'DATETIMEOFFSET',
176
180
  'oracle': 'TIMESTAMP',
177
181
  'sqlite': 'TIMESTAMP',
178
- 'duckdb': 'TIMESTAMP',
179
- 'citus': 'TIMESTAMP',
180
- 'cockroachdb': 'TIMESTAMP',
181
- 'default': 'TIMESTAMP',
182
+ 'duckdb': 'TIMESTAMPTZ',
183
+ 'citus': 'TIMESTAMPTZ',
184
+ 'cockroachdb': 'TIMESTAMPTZ',
185
+ 'default': 'TIMESTAMPTZ',
186
+ },
187
+ 'datetime': {
188
+ 'timescaledb': 'TIMESTAMPTZ',
189
+ 'postgresql': 'TIMESTAMPTZ',
190
+ 'mariadb': 'DATETIME',
191
+ 'mysql': 'DATETIME',
192
+ 'mssql': 'DATETIMEOFFSET',
193
+ 'oracle': 'TIMESTAMP',
194
+ 'sqlite': 'TIMESTAMP',
195
+ 'duckdb': 'TIMESTAMPTZ',
196
+ 'citus': 'TIMESTAMPTZ',
197
+ 'cockroachdb': 'TIMESTAMPTZ',
198
+ 'default': 'TIMESTAMPTZ',
182
199
  },
183
200
  'bool': {
184
201
  'timescaledb': 'BOOLEAN',
185
202
  'postgresql': 'BOOLEAN',
186
203
  'mariadb': 'BOOLEAN',
187
204
  'mysql': 'BOOLEAN',
188
- 'mssql': 'INTEGER',
205
+ 'mssql': 'BIT',
189
206
  'oracle': 'INTEGER',
190
207
  'sqlite': 'FLOAT',
191
208
  'duckdb': 'BOOLEAN',
@@ -252,7 +269,7 @@ PD_TO_DB_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
252
269
  'mysql': 'CHAR(36)',
253
270
  'mssql': 'UNIQUEIDENTIFIER',
254
271
  ### I know this is too much space, but erring on the side of caution.
255
- 'oracle': 'NVARCHAR(2000)',
272
+ 'oracle': 'CHAR(36)',
256
273
  'sqlite': 'TEXT',
257
274
  'duckdb': 'VARCHAR',
258
275
  'citus': 'UUID',
@@ -301,24 +318,24 @@ PD_TO_SQLALCHEMY_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
301
318
  'default': 'DateTime',
302
319
  },
303
320
  'datetime64[ns, UTC]': {
304
- 'timescaledb': 'DateTime',
305
- 'postgresql': 'DateTime',
306
- 'mariadb': 'DateTime',
307
- 'mysql': 'DateTime',
321
+ 'timescaledb': 'DateTime(timezone=True)',
322
+ 'postgresql': 'DateTime(timezone=True)',
323
+ 'mariadb': 'DateTime(timezone=True)',
324
+ 'mysql': 'DateTime(timezone=True)',
308
325
  'mssql': 'sqlalchemy.dialects.mssql.DATETIMEOFFSET',
309
- 'oracle': 'DateTime',
310
- 'sqlite': 'DateTime',
311
- 'duckdb': 'DateTime',
312
- 'citus': 'DateTime',
313
- 'cockroachdb': 'DateTime',
314
- 'default': 'DateTime',
326
+ 'oracle': 'sqlalchemy.dialects.oracle.TIMESTAMP(timezone=True)',
327
+ 'sqlite': 'DateTime(timezone=True)',
328
+ 'duckdb': 'DateTime(timezone=True)',
329
+ 'citus': 'DateTime(timezone=True)',
330
+ 'cockroachdb': 'DateTime(timezone=True)',
331
+ 'default': 'DateTime(timezone=True)',
315
332
  },
316
333
  'bool': {
317
334
  'timescaledb': 'Boolean',
318
335
  'postgresql': 'Boolean',
319
336
  'mariadb': 'Integer',
320
337
  'mysql': 'Integer',
321
- 'mssql': 'Integer',
338
+ 'mssql': 'sqlalchemy.dialects.mssql.BIT',
322
339
  'oracle': 'Integer',
323
340
  'sqlite': 'Float',
324
341
  'duckdb': 'Boolean',
@@ -384,7 +401,7 @@ PD_TO_SQLALCHEMY_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
384
401
  'mariadb': 'sqlalchemy.dialects.mysql.CHAR(36)',
385
402
  'mysql': 'sqlalchemy.dialects.mysql.CHAR(36)',
386
403
  'mssql': 'Uuid',
387
- 'oracle': 'UnicodeText',
404
+ 'oracle': 'sqlalchemy.dialects.oracle.CHAR(36)',
388
405
  'sqlite': 'UnicodeText',
389
406
  'duckdb': 'UnicodeText',
390
407
  'citus': 'Uuid',
@@ -393,6 +410,20 @@ PD_TO_SQLALCHEMY_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
393
410
  },
394
411
  }
395
412
 
413
+ AUTO_INCREMENT_COLUMN_FLAVORS: Dict[str, str] = {
414
+ 'timescaledb': 'GENERATED BY DEFAULT AS IDENTITY',
415
+ 'postgresql': 'GENERATED BY DEFAULT AS IDENTITY',
416
+ 'mariadb': 'AUTO_INCREMENT',
417
+ 'mysql': 'AUTO_INCREMENT',
418
+ 'mssql': 'IDENTITY(1,1)',
419
+ 'oracle': 'GENERATED BY DEFAULT ON NULL AS IDENTITY',
420
+ 'sqlite': 'AUTOINCREMENT',
421
+ 'duckdb': 'GENERATED BY DEFAULT',
422
+ 'citus': 'GENERATED BY DEFAULT',
423
+ 'cockroachdb': 'GENERATED BY DEFAULT AS IDENTITY',
424
+ 'default': 'GENERATED BY DEFAULT AS IDENTITY',
425
+ }
426
+
396
427
 
397
428
  def get_pd_type_from_db_type(db_type: str, allow_custom_dtypes: bool = False) -> str:
398
429
  """
@@ -456,10 +487,10 @@ def get_db_type_from_pd_type(
456
487
  The database data type for the incoming Pandas data type.
457
488
  If nothing can be found, a warning will be thrown and 'TEXT' will be returned.
458
489
  """
459
- import ast
460
490
  from meerschaum.utils.warnings import warn
461
491
  from meerschaum.utils.packages import attempt_import
462
- from meerschaum.utils.dtypes import are_dtypes_equal
492
+ from meerschaum.utils.dtypes import are_dtypes_equal, MRSM_PD_DTYPES
493
+ from meerschaum.utils.misc import parse_arguments_str
463
494
  sqlalchemy_types = attempt_import('sqlalchemy.types')
464
495
 
465
496
  types_registry = (
@@ -512,15 +543,16 @@ def get_db_type_from_pd_type(
512
543
 
513
544
  if db_type.startswith('sqlalchemy.dialects'):
514
545
  dialect, typ_class_name = db_type.replace('sqlalchemy.dialects.', '').split('.', maxsplit=2)
515
- arg = None
546
+ cls_args, cls_kwargs = None, None
516
547
  if '(' in typ_class_name:
517
- typ_class_name, arg_str = typ_class_name.split('(', maxsplit=1)
518
- arg = ast.literal_eval(arg_str.rstrip(')'))
548
+ typ_class_name, args_str = typ_class_name.split('(', maxsplit=1)
549
+ args_str = args_str.rstrip(')')
550
+ cls_args, cls_kwargs = parse_arguments_str(args_str)
519
551
  sqlalchemy_dialects_flavor_module = attempt_import(f'sqlalchemy.dialects.{dialect}')
520
552
  cls = getattr(sqlalchemy_dialects_flavor_module, typ_class_name)
521
- if arg is None:
553
+ if cls_args is None:
522
554
  return cls
523
- return cls(arg)
555
+ return cls(*cls_args, **cls_kwargs)
524
556
 
525
557
  if 'numeric' in db_type.lower():
526
558
  numeric_type_str = PD_TO_DB_DTYPES_FLAVORS['numeric'].get(flavor, 'NUMERIC')
@@ -528,4 +560,15 @@ def get_db_type_from_pd_type(
528
560
  return sqlalchemy_types.Numeric
529
561
  precision, scale = NUMERIC_PRECISION_FLAVORS[flavor]
530
562
  return sqlalchemy_types.Numeric(precision, scale)
531
- return getattr(sqlalchemy_types, db_type)
563
+
564
+ cls_args, cls_kwargs = None, None
565
+ typ_class_name = db_type
566
+ if '(' in db_type:
567
+ typ_class_name, args_str = db_type.split('(', maxsplit=1)
568
+ args_str = args_str.rstrip(')')
569
+ cls_args, cls_kwargs = parse_arguments_str(args_str)
570
+
571
+ cls = getattr(sqlalchemy_types, typ_class_name)
572
+ if cls_args is None:
573
+ return cls
574
+ return cls(*cls_args, **cls_kwargs)
meerschaum/utils/misc.py CHANGED
@@ -214,20 +214,20 @@ def parse_config_substitution(
214
214
 
215
215
 
216
216
  def edit_file(
217
- path: Union[pathlib.Path, str],
218
- default_editor: str = 'pyvim',
219
- debug: bool = False
220
- ) -> bool:
217
+ path: Union['pathlib.Path', str],
218
+ default_editor: str = 'pyvim',
219
+ debug: bool = False
220
+ ) -> bool:
221
221
  """
222
222
  Open a file for editing.
223
-
223
+
224
224
  Attempt to launch the user's defined `$EDITOR`, otherwise use `pyvim`.
225
225
 
226
226
  Parameters
227
227
  ----------
228
228
  path: Union[pathlib.Path, str]
229
229
  The path to the file to be edited.
230
-
230
+
231
231
  default_editor: str, default 'pyvim'
232
232
  If `$EDITOR` is not set, use this instead.
233
233
  If `pyvim` is not installed, it will install it from PyPI.
@@ -250,7 +250,7 @@ def edit_file(
250
250
  rc = call([EDITOR, path])
251
251
  except Exception as e: ### can't open with default editors
252
252
  if debug:
253
- dprint(e)
253
+ dprint(str(e))
254
254
  dprint('Failed to open file with system editor. Falling back to pyvim...')
255
255
  pyvim = attempt_import('pyvim', lazy=False)
256
256
  rc = run_python_package('pyvim', [path], venv=package_venv(pyvim), debug=debug)
@@ -258,10 +258,10 @@ def edit_file(
258
258
 
259
259
 
260
260
  def is_pipe_registered(
261
- pipe: mrsm.Pipe,
262
- pipes: PipesDict,
263
- debug: bool = False
264
- ) -> bool:
261
+ pipe: mrsm.Pipe,
262
+ pipes: PipesDict,
263
+ debug: bool = False
264
+ ) -> bool:
265
265
  """
266
266
  Check if a Pipe is inside the pipes dictionary.
267
267
 
@@ -269,10 +269,10 @@ def is_pipe_registered(
269
269
  ----------
270
270
  pipe: meerschaum.Pipe
271
271
  The pipe to see if it's in the dictionary.
272
-
272
+
273
273
  pipes: PipesDict
274
274
  The dictionary to search inside.
275
-
275
+
276
276
  debug: bool, default False
277
277
  Verbosity toggle.
278
278
 
@@ -975,13 +975,13 @@ def json_serialize_datetime(dt: datetime) -> Union[str, None]:
975
975
 
976
976
 
977
977
  def wget(
978
- url: str,
979
- dest: Optional[Union[str, 'pathlib.Path']] = None,
980
- headers: Optional[Dict[str, Any]] = None,
981
- color: bool = True,
982
- debug: bool = False,
983
- **kw: Any
984
- ) -> 'pathlib.Path':
978
+ url: str,
979
+ dest: Optional[Union[str, 'pathlib.Path']] = None,
980
+ headers: Optional[Dict[str, Any]] = None,
981
+ color: bool = True,
982
+ debug: bool = False,
983
+ **kw: Any
984
+ ) -> 'pathlib.Path':
985
985
  """
986
986
  Mimic `wget` with `requests`.
987
987
 
@@ -989,7 +989,7 @@ def wget(
989
989
  ----------
990
990
  url: str
991
991
  The URL to the resource to be downloaded.
992
-
992
+
993
993
  dest: Optional[Union[str, pathlib.Path]], default None
994
994
  The destination path of the downloaded file.
995
995
  If `None`, save to the current directory.
@@ -1426,7 +1426,40 @@ def flatten_list(list_: List[Any]) -> List[Any]:
1426
1426
  yield item
1427
1427
 
1428
1428
 
1429
- def make_symlink(src_path: pathlib.Path, dest_path: pathlib.Path) -> SuccessTuple:
1429
+ def parse_arguments_str(args_str: str) -> Tuple[Tuple[Any], Dict[str, Any]]:
1430
+ """
1431
+ Parse a string containing the text to be passed into a function
1432
+ and return a tuple of args, kwargs.
1433
+
1434
+ Parameters
1435
+ ----------
1436
+ args_str: str
1437
+ The contents of the function parameter (as a string).
1438
+
1439
+ Returns
1440
+ -------
1441
+ A tuple of args (tuple) and kwargs (dict[str, Any]).
1442
+
1443
+ Examples
1444
+ --------
1445
+ >>> parse_arguments_str('123, 456, foo=789, bar="baz"')
1446
+ (123, 456), {'foo': 789, 'bar': 'baz'}
1447
+ """
1448
+ import ast
1449
+ args = []
1450
+ kwargs = {}
1451
+
1452
+ for part in args_str.split(','):
1453
+ if '=' in part:
1454
+ key, val = part.split('=', 1)
1455
+ kwargs[key.strip()] = ast.literal_eval(val)
1456
+ else:
1457
+ args.append(ast.literal_eval(part.strip()))
1458
+
1459
+ return tuple(args), kwargs
1460
+
1461
+
1462
+ def make_symlink(src_path: 'pathlib.Path', dest_path: 'pathlib.Path') -> SuccessTuple:
1430
1463
  """
1431
1464
  Wrap around `pathlib.Path.symlink_to`, but add support for Windows.
1432
1465
 
@@ -1452,7 +1485,7 @@ def make_symlink(src_path: pathlib.Path, dest_path: pathlib.Path) -> SuccessTupl
1452
1485
  msg = str(e)
1453
1486
  if success:
1454
1487
  return success, "Success"
1455
-
1488
+
1456
1489
  ### Failed to create a symlink.
1457
1490
  ### If we're not on Windows, return an error.
1458
1491
  import platform
@@ -1477,7 +1510,7 @@ def make_symlink(src_path: pathlib.Path, dest_path: pathlib.Path) -> SuccessTupl
1477
1510
  shutil.copy(src_path, dest_path)
1478
1511
  except Exception as e:
1479
1512
  return False, str(e)
1480
-
1513
+
1481
1514
  return True, "Success"
1482
1515
 
1483
1516
 
@@ -163,7 +163,8 @@ packages['api'] = {
163
163
  'fastapi' : 'fastapi>=0.111.0',
164
164
  'fastapi_login' : 'fastapi-login>=1.7.2',
165
165
  'multipart' : 'python-multipart>=0.0.9',
166
- 'httpx' : 'httpx>=0.24.1',
166
+ 'httpx' : 'httpx>=0.27.2',
167
+ 'httpcore' : 'httpcore>=1.0.6',
167
168
  'valkey' : 'valkey>=6.0.0',
168
169
  }
169
170
  packages['api'].update(packages['sql'])