meerschaum 2.6.16__py3-none-any.whl → 2.7.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. meerschaum/_internal/arguments/_parse_arguments.py +1 -1
  2. meerschaum/actions/delete.py +65 -69
  3. meerschaum/actions/edit.py +22 -2
  4. meerschaum/actions/install.py +1 -2
  5. meerschaum/actions/sync.py +2 -3
  6. meerschaum/config/_default.py +1 -1
  7. meerschaum/config/_paths.py +2 -1
  8. meerschaum/config/_version.py +1 -1
  9. meerschaum/connectors/api/_pipes.py +4 -3
  10. meerschaum/connectors/sql/_create_engine.py +3 -3
  11. meerschaum/connectors/sql/_pipes.py +84 -38
  12. meerschaum/connectors/sql/_sql.py +6 -1
  13. meerschaum/connectors/valkey/_pipes.py +12 -1
  14. meerschaum/core/Pipe/__init__.py +23 -13
  15. meerschaum/core/Pipe/_attributes.py +19 -0
  16. meerschaum/core/Pipe/_dtypes.py +1 -1
  17. meerschaum/core/Pipe/_sync.py +61 -21
  18. meerschaum/core/Pipe/_verify.py +8 -7
  19. meerschaum/jobs/_Job.py +2 -1
  20. meerschaum/plugins/_Plugin.py +11 -14
  21. meerschaum/utils/daemon/Daemon.py +20 -13
  22. meerschaum/utils/dataframe.py +175 -13
  23. meerschaum/utils/dtypes/__init__.py +103 -14
  24. meerschaum/utils/dtypes/sql.py +26 -0
  25. meerschaum/utils/misc.py +8 -8
  26. meerschaum/utils/packages/_packages.py +1 -1
  27. meerschaum/utils/schedule.py +8 -3
  28. meerschaum/utils/sql.py +70 -47
  29. meerschaum/utils/venv/_Venv.py +4 -4
  30. meerschaum/utils/venv/__init__.py +33 -13
  31. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/METADATA +2 -2
  32. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/RECORD +38 -38
  33. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/LICENSE +0 -0
  34. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/NOTICE +0 -0
  35. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/WHEEL +0 -0
  36. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/entry_points.txt +0 -0
  37. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/top_level.txt +0 -0
  38. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/zip-safe +0 -0
@@ -15,7 +15,19 @@ import meerschaum as mrsm
15
15
  from meerschaum.utils.typing import Dict, Union, Any
16
16
  from meerschaum.utils.warnings import warn
17
17
 
18
- MRSM_PD_DTYPES: Dict[str, str] = {
18
+ MRSM_ALIAS_DTYPES: Dict[str, str] = {
19
+ 'decimal': 'numeric',
20
+ 'number': 'numeric',
21
+ 'jsonl': 'json',
22
+ 'JSON': 'json',
23
+ 'binary': 'bytes',
24
+ 'blob': 'bytes',
25
+ 'varbinary': 'bytes',
26
+ 'bytea': 'bytes',
27
+ 'guid': 'uuid',
28
+ 'UUID': 'uuid',
29
+ }
30
+ MRSM_PD_DTYPES: Dict[Union[str, None], str] = {
19
31
  'json': 'object',
20
32
  'numeric': 'object',
21
33
  'uuid': 'object',
@@ -27,6 +39,8 @@ MRSM_PD_DTYPES: Dict[str, str] = {
27
39
  'int32': 'Int32',
28
40
  'int64': 'Int64',
29
41
  'str': 'string[python]',
42
+ 'bytes': 'object',
43
+ None: 'object',
30
44
  }
31
45
 
32
46
 
@@ -38,6 +52,10 @@ def to_pandas_dtype(dtype: str) -> str:
38
52
  if known_dtype is not None:
39
53
  return known_dtype
40
54
 
55
+ alias_dtype = MRSM_ALIAS_DTYPES.get(dtype, None)
56
+ if alias_dtype is not None:
57
+ return MRSM_PD_DTYPES[alias_dtype]
58
+
41
59
  ### NOTE: Kind of a hack, but if the first word of the given dtype is in all caps,
42
60
  ### treat it as a SQL db type.
43
61
  if dtype.split(' ')[0].isupper():
@@ -95,7 +113,7 @@ def are_dtypes_equal(
95
113
  try:
96
114
  if ldtype == rdtype:
97
115
  return True
98
- except Exception as e:
116
+ except Exception:
99
117
  warn(f"Exception when comparing dtypes, returning False:\n{traceback.format_exc()}")
100
118
  return False
101
119
 
@@ -115,6 +133,10 @@ def are_dtypes_equal(
115
133
  if ldtype in uuid_dtypes and rdtype in uuid_dtypes:
116
134
  return True
117
135
 
136
+ bytes_dtypes = ('bytes', 'object')
137
+ if ldtype in bytes_dtypes and rdtype in bytes_dtypes:
138
+ return True
139
+
118
140
  ldtype_clean = ldtype.split('[', maxsplit=1)[0]
119
141
  rdtype_clean = rdtype.split('[', maxsplit=1)[0]
120
142
 
@@ -185,7 +207,7 @@ def attempt_cast_to_numeric(value: Any) -> Any:
185
207
  if not value_is_null(value)
186
208
  else Decimal('NaN')
187
209
  )
188
- except Exception as e:
210
+ except Exception:
189
211
  return value
190
212
 
191
213
 
@@ -201,7 +223,23 @@ def attempt_cast_to_uuid(value: Any) -> Any:
201
223
  if not value_is_null(value)
202
224
  else None
203
225
  )
204
- except Exception as e:
226
+ except Exception:
227
+ return value
228
+
229
+
230
+ def attempt_cast_to_bytes(value: Any) -> Any:
231
+ """
232
+ Given a value, attempt to coerce it into a bytestring.
233
+ """
234
+ if isinstance(value, uuid.UUID):
235
+ return value
236
+ try:
237
+ return (
238
+ deserialize_base64(str(value))
239
+ if not value_is_null(value)
240
+ else None
241
+ )
242
+ except Exception:
205
243
  return value
206
244
 
207
245
 
@@ -251,7 +289,7 @@ def coerce_timezone(
251
289
  ) -> Any:
252
290
  """
253
291
  Given a `datetime`, pandas `Timestamp` or `Series` of `Timestamp`,
254
- return a naive datetime in terms of UTC.
292
+ return a UTC timestamp (strip timezone if `strip_utc` is `True`.
255
293
  """
256
294
  if dt is None:
257
295
  return None
@@ -266,9 +304,7 @@ def coerce_timezone(
266
304
  dt_is_series = hasattr(dt, 'dtype') and hasattr(dt, '__module__')
267
305
 
268
306
  if dt_is_series:
269
- is_dask = 'dask' in dt.__module__
270
307
  pandas = mrsm.attempt_import('pandas', lazy=False)
271
- dd = mrsm.attempt_import('dask.dataframe') if is_dask else None
272
308
 
273
309
  if (
274
310
  pandas.api.types.is_datetime64_any_dtype(dt) and (
@@ -279,14 +315,13 @@ def coerce_timezone(
279
315
  ):
280
316
  return dt
281
317
 
282
- dt_series = (
283
- pandas.to_datetime(dt, utc=True, format='ISO8601')
284
- if dd is None
285
- else dd.to_datetime(dt, utc=True, format='ISO8601')
286
- )
318
+ dt_series = to_datetime(dt, coerce_utc=False)
287
319
  if strip_utc:
288
- if dt_series.dt.tz is not None:
289
- dt_series = dt_series.dt.tz_localize(None)
320
+ try:
321
+ if dt_series.dt.tz is not None:
322
+ dt_series = dt_series.dt.tz_localize(None)
323
+ except Exception:
324
+ pass
290
325
 
291
326
  return dt_series
292
327
 
@@ -299,3 +334,57 @@ def coerce_timezone(
299
334
  if strip_utc:
300
335
  return utc_dt.replace(tzinfo=None)
301
336
  return utc_dt
337
+
338
+
339
+ def to_datetime(dt_val: Any, as_pydatetime: bool = False, coerce_utc: bool = True) -> Any:
340
+ """
341
+ Wrap `pd.to_datetime()` and add support for out-of-bounds values.
342
+ """
343
+ pandas, dateutil_parser = mrsm.attempt_import('pandas', 'dateutil.parser', lazy=False)
344
+ is_dask = 'dask' in getattr(dt_val, '__module__', '')
345
+ dd = mrsm.attempt_import('dask.dataframe') if is_dask else None
346
+ dt_is_series = hasattr(dt_val, 'dtype') and hasattr(dt_val, '__module__')
347
+ pd = pandas if dd is None else dd
348
+
349
+ try:
350
+ new_dt_val = pd.to_datetime(dt_val, utc=True, format='ISO8601')
351
+ if as_pydatetime:
352
+ return new_dt_val.to_pydatetime()
353
+ return new_dt_val
354
+ except (pd.errors.OutOfBoundsDatetime, ValueError):
355
+ pass
356
+
357
+ def parse(x: Any) -> Any:
358
+ try:
359
+ return dateutil_parser.parse(x)
360
+ except Exception:
361
+ return x
362
+
363
+ if dt_is_series:
364
+ new_series = dt_val.apply(parse)
365
+ if coerce_utc:
366
+ return coerce_timezone(new_series)
367
+ return new_series
368
+
369
+ new_dt_val = parse(dt_val)
370
+ if not coerce_utc:
371
+ return new_dt_val
372
+ return coerce_timezone(new_dt_val)
373
+
374
+
375
+ def serialize_bytes(data: bytes) -> str:
376
+ """
377
+ Return the given bytes as a base64-encoded string.
378
+ """
379
+ import base64
380
+ if not isinstance(data, bytes) and value_is_null(data):
381
+ return data
382
+ return base64.b64encode(data).decode('utf-8')
383
+
384
+
385
+ def deserialize_base64(data: str) -> bytes:
386
+ """
387
+ Return the original bytestring from the given base64-encoded string.
388
+ """
389
+ import base64
390
+ return base64.b64decode(data)
@@ -276,6 +276,19 @@ PD_TO_DB_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
276
276
  'cockroachdb': 'UUID',
277
277
  'default': 'TEXT',
278
278
  },
279
+ 'bytes': {
280
+ 'timescaledb': 'BYTEA',
281
+ 'postgresql': 'BYTEA',
282
+ 'mariadb': 'BLOB',
283
+ 'mysql': 'BLOB',
284
+ 'mssql': 'VARBINARY(MAX)',
285
+ 'oracle': 'BLOB',
286
+ 'sqlite': 'BLOB',
287
+ 'duckdb': 'BLOB',
288
+ 'citus': 'BYTEA',
289
+ 'cockroachdb': 'BYTEA',
290
+ 'default': 'BLOB',
291
+ },
279
292
  }
280
293
  PD_TO_SQLALCHEMY_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
281
294
  'int': {
@@ -421,6 +434,19 @@ PD_TO_SQLALCHEMY_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
421
434
  'cockroachdb': 'Uuid',
422
435
  'default': 'Uuid',
423
436
  },
437
+ 'bytes': {
438
+ 'timescaledb': 'LargeBinary',
439
+ 'postgresql': 'LargeBinary',
440
+ 'mariadb': 'LargeBinary',
441
+ 'mysql': 'LargeBinary',
442
+ 'mssql': 'LargeBinary',
443
+ 'oracle': 'LargeBinary',
444
+ 'sqlite': 'LargeBinary',
445
+ 'duckdb': 'LargeBinary',
446
+ 'citus': 'LargeBinary',
447
+ 'cockroachdb': 'LargeBinary',
448
+ 'default': 'LargeBinary',
449
+ },
424
450
  }
425
451
 
426
452
  AUTO_INCREMENT_COLUMN_FLAVORS: Dict[str, str] = {
meerschaum/utils/misc.py CHANGED
@@ -177,14 +177,14 @@ def string_to_dict(
177
177
  keys = _keys[:-1]
178
178
  try:
179
179
  val = ast.literal_eval(_keys[-1])
180
- except Exception as e:
180
+ except Exception:
181
181
  val = str(_keys[-1])
182
182
 
183
183
  c = params_dict
184
184
  for _k in keys[:-1]:
185
185
  try:
186
186
  k = ast.literal_eval(_k)
187
- except Exception as e:
187
+ except Exception:
188
188
  k = str(_k)
189
189
  if k not in c:
190
190
  c[k] = {}
@@ -196,12 +196,12 @@ def string_to_dict(
196
196
 
197
197
 
198
198
  def parse_config_substitution(
199
- value: str,
200
- leading_key: str = 'MRSM',
201
- begin_key: str = '{',
202
- end_key: str = '}',
203
- delimeter: str = ':'
204
- ) -> List[Any]:
199
+ value: str,
200
+ leading_key: str = 'MRSM',
201
+ begin_key: str = '{',
202
+ end_key: str = '}',
203
+ delimeter: str = ':'
204
+ ) -> List[Any]:
205
205
  """
206
206
  Parse Meerschaum substitution syntax
207
207
  E.g. MRSM{value1:value2} => ['value1', 'value2']
@@ -79,7 +79,7 @@ packages: Dict[str, Dict[str, str]] = {
79
79
  },
80
80
  'drivers-extras': {
81
81
  'pyodbc' : 'pyodbc>=4.0.30',
82
- 'cx_Oracle' : 'cx_Oracle>=8.3.0',
82
+ 'oracledb' : 'oracledb>=2.5.0',
83
83
  },
84
84
  'cli': {
85
85
  'pgcli' : 'pgcli>=3.1.0',
@@ -132,7 +132,7 @@ def schedule_function(
132
132
 
133
133
  try:
134
134
  loop.run_until_complete(run_scheduler())
135
- except (KeyboardInterrupt, SystemExit) as e:
135
+ except (KeyboardInterrupt, SystemExit):
136
136
  loop.run_until_complete(_stop_scheduler())
137
137
 
138
138
  return True, "Success"
@@ -159,13 +159,13 @@ def parse_schedule(schedule: str, now: Optional[datetime] = None):
159
159
  )
160
160
 
161
161
  starting_ts = parse_start_time(schedule, now=now)
162
- schedule = schedule.split(STARTING_KEYWORD)[0].strip()
162
+ schedule = schedule.split(STARTING_KEYWORD, maxsplit=1)[0].strip()
163
163
  for alias_keyword, true_keyword in SCHEDULE_ALIASES.items():
164
164
  schedule = schedule.replace(alias_keyword, true_keyword)
165
165
 
166
166
  ### TODO Allow for combining `and` + `or` logic.
167
167
  if '&' in schedule and '|' in schedule:
168
- raise ValueError(f"Cannot accept both 'and' + 'or' logic in the schedule frequency.")
168
+ raise ValueError("Cannot accept both 'and' + 'or' logic in the schedule frequency.")
169
169
 
170
170
  join_str = '|' if '|' in schedule else '&'
171
171
  join_trigger = (
@@ -300,6 +300,11 @@ def parse_start_time(schedule: str, now: Optional[datetime] = None) -> datetime:
300
300
  try:
301
301
  if starting_str == 'now':
302
302
  starting_ts = now
303
+ elif starting_str.startswith('in '):
304
+ delta_vals = starting_str.replace('in ', '').split(' ', maxsplit=1)
305
+ delta_unit = delta_vals[-1].rstrip('s') + 's'
306
+ delta_num = float(delta_vals[0])
307
+ starting_ts = now + timedelta(**{delta_unit: delta_num})
303
308
  elif 'tomorrow' in starting_str or 'today' in starting_str:
304
309
  today = round_time(now, timedelta(days=1))
305
310
  tomorrow = today + timedelta(days=1)
meerschaum/utils/sql.py CHANGED
@@ -108,24 +108,30 @@ update_queries = {
108
108
  {cols_equal_values}
109
109
  """,
110
110
  'mssql': """
111
+ {with_temp_date_bounds}
111
112
  MERGE {target_table_name} f
112
- USING (SELECT DISTINCT {patch_cols_str} FROM {patch_table_name}) p
113
+ USING (SELECT {patch_cols_str} FROM {patch_table_name}) p
113
114
  ON {and_subquery_f}
114
115
  AND {date_bounds_subquery}
115
116
  WHEN MATCHED THEN
116
117
  UPDATE
117
118
  {sets_subquery_none};
118
119
  """,
119
- 'mssql-upsert': """
120
+ 'mssql-upsert': [
121
+ "{identity_insert_on}",
122
+ """
123
+ {with_temp_date_bounds}
120
124
  MERGE {target_table_name} f
121
- USING (SELECT DISTINCT {patch_cols_str} FROM {patch_table_name}) p
125
+ USING (SELECT {patch_cols_str} FROM {patch_table_name}) p
122
126
  ON {and_subquery_f}
123
127
  AND {date_bounds_subquery}
124
128
  {when_matched_update_sets_subquery_none}
125
129
  WHEN NOT MATCHED THEN
126
130
  INSERT ({patch_cols_str})
127
131
  VALUES ({patch_cols_prefixed_str});
128
- """,
132
+ """,
133
+ "{identity_insert_off}",
134
+ ],
129
135
  'oracle': """
130
136
  MERGE INTO {target_table_name} f
131
137
  USING (SELECT DISTINCT {patch_cols_str} FROM {patch_table_name}) p
@@ -425,20 +431,10 @@ reset_autoincrement_queries: Dict[str, Union[str, List[str]]] = {
425
431
  SET seq = {val}
426
432
  WHERE name = '{table}'
427
433
  """,
428
- 'oracle': [
429
- """
430
- DECLARE
431
- max_id NUMBER := {val};
432
- current_val NUMBER;
433
- BEGIN
434
- SELECT {table_seq_name}.NEXTVAL INTO current_val FROM dual;
435
-
436
- WHILE current_val < max_id LOOP
437
- SELECT {table_seq_name}.NEXTVAL INTO current_val FROM dual;
438
- END LOOP;
439
- END;
440
- """,
441
- ],
434
+ 'oracle': (
435
+ "ALTER TABLE {table_name} MODIFY {column_name} "
436
+ "GENERATED BY DEFAULT ON NULL AS IDENTITY (START WITH {val_plus_1})"
437
+ ),
442
438
  }
443
439
  table_wrappers = {
444
440
  'default' : ('"', '"'),
@@ -1066,7 +1062,7 @@ def get_sqlalchemy_table(
1066
1062
  connector.metadata,
1067
1063
  **table_kwargs
1068
1064
  )
1069
- except sqlalchemy.exc.NoSuchTableError as e:
1065
+ except sqlalchemy.exc.NoSuchTableError:
1070
1066
  warn(f"Table '{truncated_table_name}' does not exist in '{connector}'.")
1071
1067
  return None
1072
1068
  return tables[truncated_table_name]
@@ -1393,6 +1389,7 @@ def get_update_queries(
1393
1389
  datetime_col: Optional[str] = None,
1394
1390
  schema: Optional[str] = None,
1395
1391
  patch_schema: Optional[str] = None,
1392
+ identity_insert: bool = False,
1396
1393
  debug: bool = False,
1397
1394
  ) -> List[str]:
1398
1395
  """
@@ -1430,6 +1427,10 @@ def get_update_queries(
1430
1427
  If provided, use this schema when quoting the patch table.
1431
1428
  Defaults to `schema`.
1432
1429
 
1430
+ identity_insert: bool, default False
1431
+ If `True`, include `SET IDENTITY_INSERT` queries before and after the update queries.
1432
+ Only applies for MSSQL upserts.
1433
+
1433
1434
  debug: bool, default False
1434
1435
  Verbosity toggle.
1435
1436
 
@@ -1564,17 +1565,36 @@ def get_update_queries(
1564
1565
  ) for c_name, c_type in join_cols_types
1565
1566
  ])
1566
1567
 
1568
+ skip_query_val = ""
1567
1569
  target_table_name = sql_item_name(target, flavor, schema)
1568
1570
  patch_table_name = sql_item_name(patch, flavor, patch_schema)
1569
1571
  dt_col_name = sql_item_name(datetime_col, flavor, None) if datetime_col else None
1572
+ date_bounds_table = patch_table_name if flavor != 'mssql' else '[date_bounds]'
1573
+ min_dt_col_name = f"MIN({dt_col_name})" if flavor != 'mssql' else '[Min_dt]'
1574
+ max_dt_col_name = f"MAX({dt_col_name})" if flavor != 'mssql' else '[Max_dt]'
1570
1575
  date_bounds_subquery = (
1571
1576
  f"""
1572
- f.{dt_col_name} >= (SELECT MIN({dt_col_name}) FROM {patch_table_name})
1573
- AND f.{dt_col_name} <= (SELECT MAX({dt_col_name}) FROM {patch_table_name})
1577
+ f.{dt_col_name} >= (SELECT {min_dt_col_name} FROM {date_bounds_table})
1578
+ AND f.{dt_col_name} <= (SELECT {max_dt_col_name} FROM {date_bounds_table})
1574
1579
  """
1575
1580
  if datetime_col
1576
1581
  else "1 = 1"
1577
1582
  )
1583
+ with_temp_date_bounds = f"""
1584
+ WITH [date_bounds] AS (
1585
+ SELECT MIN({dt_col_name}) AS {min_dt_col_name}, MAX({dt_col_name}) AS {max_dt_col_name}
1586
+ FROM {patch_table_name}
1587
+ )""" if datetime_col else ""
1588
+ identity_insert_on = (
1589
+ f"SET IDENTITY_INSERT {target_table_name} ON"
1590
+ if identity_insert
1591
+ else skip_query_val
1592
+ )
1593
+ identity_insert_off = (
1594
+ f"SET IDENTITY_INSERT {target_table_name} OFF"
1595
+ if identity_insert
1596
+ else skip_query_val
1597
+ )
1578
1598
 
1579
1599
  ### NOTE: MSSQL upserts must exclude the update portion if only upserting indices.
1580
1600
  when_matched_update_sets_subquery_none = "" if not value_cols else (
@@ -1595,7 +1615,7 @@ def get_update_queries(
1595
1615
  )
1596
1616
  ignore = "IGNORE " if not value_cols else ""
1597
1617
 
1598
- return [
1618
+ formatted_queries = [
1599
1619
  base_query.format(
1600
1620
  sets_subquery_none=sets_subquery('', 'p.'),
1601
1621
  sets_subquery_none_excluded=sets_subquery('', 'EXCLUDED.'),
@@ -1614,10 +1634,16 @@ def get_update_queries(
1614
1634
  cols_equal_values=cols_equal_values,
1615
1635
  on_duplicate_key_update=on_duplicate_key_update,
1616
1636
  ignore=ignore,
1637
+ with_temp_date_bounds=with_temp_date_bounds,
1638
+ identity_insert_on=identity_insert_on,
1639
+ identity_insert_off=identity_insert_off,
1617
1640
  )
1618
1641
  for base_query in base_queries
1619
1642
  ]
1620
1643
 
1644
+ ### NOTE: Allow for skipping some queries.
1645
+ return [query for query in formatted_queries if query]
1646
+
1621
1647
 
1622
1648
  def get_null_replacement(typ: str, flavor: str) -> str:
1623
1649
  """
@@ -1867,7 +1893,17 @@ def _get_create_table_query_from_dtypes(
1867
1893
 
1868
1894
  table_name = sql_item_name(new_table, schema=schema, flavor=flavor)
1869
1895
  primary_key_name = sql_item_name(primary_key, flavor) if primary_key else None
1896
+ primary_key_constraint_name = (
1897
+ sql_item_name(f'PK_{new_table}', flavor, None)
1898
+ if primary_key
1899
+ else None
1900
+ )
1870
1901
  datetime_column_name = sql_item_name(datetime_column, flavor) if datetime_column else None
1902
+ primary_key_clustered = (
1903
+ "CLUSTERED"
1904
+ if not datetime_column or datetime_column == primary_key
1905
+ else "NONCLUSTERED"
1906
+ )
1871
1907
  query = f"CREATE TABLE {table_name} ("
1872
1908
  if primary_key:
1873
1909
  col_db_type = cols_types[0][1]
@@ -1887,6 +1923,8 @@ def _get_create_table_query_from_dtypes(
1887
1923
  query += f"\n {col_name} {col_db_type} {auto_increment_str} PRIMARY KEY,"
1888
1924
  elif flavor == 'timescaledb' and datetime_column and datetime_column != primary_key:
1889
1925
  query += f"\n {col_name} {col_db_type}{auto_increment_str} NOT NULL,"
1926
+ elif flavor == 'mssql':
1927
+ query += f"\n {col_name} {col_db_type}{auto_increment_str} NOT NULL,"
1890
1928
  else:
1891
1929
  query += f"\n {col_name} {col_db_type} PRIMARY KEY{auto_increment_str} NOT NULL,"
1892
1930
 
@@ -1902,6 +1940,10 @@ def _get_create_table_query_from_dtypes(
1902
1940
  and datetime_column != primary_key
1903
1941
  ):
1904
1942
  query += f"\n PRIMARY KEY({datetime_column_name}, {primary_key_name}),"
1943
+
1944
+ if flavor == 'mssql' and primary_key:
1945
+ query += f"\n CONSTRAINT {primary_key_constraint_name} PRIMARY KEY {primary_key_clustered} ({primary_key_name}),"
1946
+
1905
1947
  query = query[:-1]
1906
1948
  query += "\n)"
1907
1949
 
@@ -1927,7 +1969,7 @@ def _get_create_table_query_from_cte(
1927
1969
  create_cte_name = sql_item_name(create_cte, flavor, None)
1928
1970
  new_table_name = sql_item_name(new_table, flavor, schema)
1929
1971
  primary_key_constraint_name = (
1930
- sql_item_name(f'pk_{new_table}', flavor, None)
1972
+ sql_item_name(f'PK_{new_table}', flavor, None)
1931
1973
  if primary_key
1932
1974
  else None
1933
1975
  )
@@ -1936,6 +1978,7 @@ def _get_create_table_query_from_cte(
1936
1978
  if primary_key
1937
1979
  else None
1938
1980
  )
1981
+ primary_key_clustered = "CLUSTERED" if not datetime_column else "NONCLUSTERED"
1939
1982
  datetime_column_name = (
1940
1983
  sql_item_name(datetime_column, flavor)
1941
1984
  if datetime_column
@@ -1943,7 +1986,7 @@ def _get_create_table_query_from_cte(
1943
1986
  )
1944
1987
  if flavor in ('mssql',):
1945
1988
  query = query.lstrip()
1946
- if 'with ' in query.lower():
1989
+ if query.lower().startswith('with '):
1947
1990
  final_select_ix = query.lower().rfind('select')
1948
1991
  create_table_query = (
1949
1992
  query[:final_select_ix].rstrip() + ',\n'
@@ -1961,7 +2004,7 @@ def _get_create_table_query_from_cte(
1961
2004
 
1962
2005
  alter_type_query = f"""
1963
2006
  ALTER TABLE {new_table_name}
1964
- ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})
2007
+ ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY {primary_key_clustered} ({primary_key_name})
1965
2008
  """
1966
2009
  elif flavor in (None,):
1967
2010
  create_table_query = f"""
@@ -2225,29 +2268,8 @@ def get_reset_autoincrement_queries(
2225
2268
  schema = schema or connector.schema
2226
2269
  max_id_name = sql_item_name('max_id', connector.flavor)
2227
2270
  table_name = sql_item_name(table, connector.flavor, schema)
2228
- table_trunc = truncate_item_name(table, connector.flavor)
2229
2271
  table_seq_name = sql_item_name(table + '_' + column + '_seq', connector.flavor, schema)
2230
2272
  column_name = sql_item_name(column, connector.flavor)
2231
- if connector.flavor == 'oracle':
2232
- potential_table_names = set([
2233
- f"'{table_trunc.upper()}'",
2234
- f"'{table_trunc}'",
2235
- f"'{table_name}'",
2236
- f"'{table_name.upper()}'",
2237
- ])
2238
- df = connector.read(
2239
- """
2240
- SELECT SEQUENCE_NAME
2241
- FROM ALL_TAB_IDENTITY_COLS
2242
- WHERE TABLE_NAME IN ("""
2243
- + ", ".join([name for name in potential_table_names])
2244
- + """)
2245
- """,
2246
- debug=debug
2247
- )
2248
- if len(df) > 0:
2249
- table_seq_name = df['sequence_name'][0]
2250
-
2251
2273
  max_id = connector.value(
2252
2274
  f"""
2253
2275
  SELECT COALESCE(MAX({column_name}), 0) AS {max_id_name}
@@ -2272,7 +2294,8 @@ def get_reset_autoincrement_queries(
2272
2294
  table=table,
2273
2295
  table_name=table_name,
2274
2296
  table_seq_name=table_seq_name,
2275
- val=(max_id),
2297
+ val=max_id,
2298
+ val_plus_1=(max_id + 1),
2276
2299
  )
2277
2300
  for query in reset_queries
2278
2301
  ]
@@ -34,10 +34,10 @@ class Venv:
34
34
  """
35
35
 
36
36
  def __init__(
37
- self,
38
- venv: Union[str, 'meerschaum.plugins.Plugin', None] = 'mrsm',
39
- debug: bool = False,
40
- ) -> None:
37
+ self,
38
+ venv: Union[str, 'meerschaum.plugins.Plugin', None] = 'mrsm',
39
+ debug: bool = False,
40
+ ) -> None:
41
41
  from meerschaum.utils.venv import activate_venv, deactivate_venv, active_venvs
42
42
  ### For some weird threading issue,
43
43
  ### we can't use `isinstance` here.