meerschaum 2.6.16__py3-none-any.whl → 2.7.0rc1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. meerschaum/_internal/arguments/_parse_arguments.py +1 -1
  2. meerschaum/actions/delete.py +65 -69
  3. meerschaum/actions/edit.py +22 -2
  4. meerschaum/actions/install.py +1 -2
  5. meerschaum/actions/sync.py +2 -3
  6. meerschaum/config/_default.py +1 -1
  7. meerschaum/config/_paths.py +2 -1
  8. meerschaum/config/_version.py +1 -1
  9. meerschaum/connectors/api/_pipes.py +4 -3
  10. meerschaum/connectors/sql/_create_engine.py +3 -3
  11. meerschaum/connectors/sql/_pipes.py +84 -38
  12. meerschaum/connectors/sql/_sql.py +6 -1
  13. meerschaum/connectors/valkey/_pipes.py +12 -1
  14. meerschaum/core/Pipe/__init__.py +23 -13
  15. meerschaum/core/Pipe/_attributes.py +19 -0
  16. meerschaum/core/Pipe/_dtypes.py +1 -1
  17. meerschaum/core/Pipe/_sync.py +61 -21
  18. meerschaum/core/Pipe/_verify.py +8 -7
  19. meerschaum/jobs/_Job.py +2 -1
  20. meerschaum/plugins/_Plugin.py +11 -14
  21. meerschaum/utils/daemon/Daemon.py +20 -13
  22. meerschaum/utils/dataframe.py +175 -13
  23. meerschaum/utils/dtypes/__init__.py +103 -14
  24. meerschaum/utils/dtypes/sql.py +26 -0
  25. meerschaum/utils/misc.py +8 -8
  26. meerschaum/utils/packages/_packages.py +1 -1
  27. meerschaum/utils/schedule.py +8 -3
  28. meerschaum/utils/sql.py +70 -47
  29. meerschaum/utils/venv/_Venv.py +4 -4
  30. meerschaum/utils/venv/__init__.py +33 -13
  31. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/METADATA +2 -2
  32. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/RECORD +38 -38
  33. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/LICENSE +0 -0
  34. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/NOTICE +0 -0
  35. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/WHEEL +0 -0
  36. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/entry_points.txt +0 -0
  37. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/top_level.txt +0 -0
  38. {meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/zip-safe +0 -0
@@ -15,7 +15,19 @@ import meerschaum as mrsm
15
15
  from meerschaum.utils.typing import Dict, Union, Any
16
16
  from meerschaum.utils.warnings import warn
17
17
 
18
- MRSM_PD_DTYPES: Dict[str, str] = {
18
+ MRSM_ALIAS_DTYPES: Dict[str, str] = {
19
+ 'decimal': 'numeric',
20
+ 'number': 'numeric',
21
+ 'jsonl': 'json',
22
+ 'JSON': 'json',
23
+ 'binary': 'bytes',
24
+ 'blob': 'bytes',
25
+ 'varbinary': 'bytes',
26
+ 'bytea': 'bytes',
27
+ 'guid': 'uuid',
28
+ 'UUID': 'uuid',
29
+ }
30
+ MRSM_PD_DTYPES: Dict[Union[str, None], str] = {
19
31
  'json': 'object',
20
32
  'numeric': 'object',
21
33
  'uuid': 'object',
@@ -27,6 +39,8 @@ MRSM_PD_DTYPES: Dict[str, str] = {
27
39
  'int32': 'Int32',
28
40
  'int64': 'Int64',
29
41
  'str': 'string[python]',
42
+ 'bytes': 'object',
43
+ None: 'object',
30
44
  }
31
45
 
32
46
 
@@ -38,6 +52,10 @@ def to_pandas_dtype(dtype: str) -> str:
38
52
  if known_dtype is not None:
39
53
  return known_dtype
40
54
 
55
+ alias_dtype = MRSM_ALIAS_DTYPES.get(dtype, None)
56
+ if alias_dtype is not None:
57
+ return MRSM_PD_DTYPES[alias_dtype]
58
+
41
59
  ### NOTE: Kind of a hack, but if the first word of the given dtype is in all caps,
42
60
  ### treat it as a SQL db type.
43
61
  if dtype.split(' ')[0].isupper():
@@ -95,7 +113,7 @@ def are_dtypes_equal(
95
113
  try:
96
114
  if ldtype == rdtype:
97
115
  return True
98
- except Exception as e:
116
+ except Exception:
99
117
  warn(f"Exception when comparing dtypes, returning False:\n{traceback.format_exc()}")
100
118
  return False
101
119
 
@@ -115,6 +133,10 @@ def are_dtypes_equal(
115
133
  if ldtype in uuid_dtypes and rdtype in uuid_dtypes:
116
134
  return True
117
135
 
136
+ bytes_dtypes = ('bytes', 'object')
137
+ if ldtype in bytes_dtypes and rdtype in bytes_dtypes:
138
+ return True
139
+
118
140
  ldtype_clean = ldtype.split('[', maxsplit=1)[0]
119
141
  rdtype_clean = rdtype.split('[', maxsplit=1)[0]
120
142
 
@@ -185,7 +207,7 @@ def attempt_cast_to_numeric(value: Any) -> Any:
185
207
  if not value_is_null(value)
186
208
  else Decimal('NaN')
187
209
  )
188
- except Exception as e:
210
+ except Exception:
189
211
  return value
190
212
 
191
213
 
@@ -201,7 +223,23 @@ def attempt_cast_to_uuid(value: Any) -> Any:
201
223
  if not value_is_null(value)
202
224
  else None
203
225
  )
204
- except Exception as e:
226
+ except Exception:
227
+ return value
228
+
229
+
230
+ def attempt_cast_to_bytes(value: Any) -> Any:
231
+ """
232
+ Given a value, attempt to coerce it into a bytestring.
233
+ """
234
+ if isinstance(value, uuid.UUID):
235
+ return value
236
+ try:
237
+ return (
238
+ deserialize_base64(str(value))
239
+ if not value_is_null(value)
240
+ else None
241
+ )
242
+ except Exception:
205
243
  return value
206
244
 
207
245
 
@@ -251,7 +289,7 @@ def coerce_timezone(
251
289
  ) -> Any:
252
290
  """
253
291
  Given a `datetime`, pandas `Timestamp` or `Series` of `Timestamp`,
254
- return a naive datetime in terms of UTC.
292
+ return a UTC timestamp (strip timezone if `strip_utc` is `True`.
255
293
  """
256
294
  if dt is None:
257
295
  return None
@@ -266,9 +304,7 @@ def coerce_timezone(
266
304
  dt_is_series = hasattr(dt, 'dtype') and hasattr(dt, '__module__')
267
305
 
268
306
  if dt_is_series:
269
- is_dask = 'dask' in dt.__module__
270
307
  pandas = mrsm.attempt_import('pandas', lazy=False)
271
- dd = mrsm.attempt_import('dask.dataframe') if is_dask else None
272
308
 
273
309
  if (
274
310
  pandas.api.types.is_datetime64_any_dtype(dt) and (
@@ -279,14 +315,13 @@ def coerce_timezone(
279
315
  ):
280
316
  return dt
281
317
 
282
- dt_series = (
283
- pandas.to_datetime(dt, utc=True, format='ISO8601')
284
- if dd is None
285
- else dd.to_datetime(dt, utc=True, format='ISO8601')
286
- )
318
+ dt_series = to_datetime(dt, coerce_utc=False)
287
319
  if strip_utc:
288
- if dt_series.dt.tz is not None:
289
- dt_series = dt_series.dt.tz_localize(None)
320
+ try:
321
+ if dt_series.dt.tz is not None:
322
+ dt_series = dt_series.dt.tz_localize(None)
323
+ except Exception:
324
+ pass
290
325
 
291
326
  return dt_series
292
327
 
@@ -299,3 +334,57 @@ def coerce_timezone(
299
334
  if strip_utc:
300
335
  return utc_dt.replace(tzinfo=None)
301
336
  return utc_dt
337
+
338
+
339
+ def to_datetime(dt_val: Any, as_pydatetime: bool = False, coerce_utc: bool = True) -> Any:
340
+ """
341
+ Wrap `pd.to_datetime()` and add support for out-of-bounds values.
342
+ """
343
+ pandas, dateutil_parser = mrsm.attempt_import('pandas', 'dateutil.parser', lazy=False)
344
+ is_dask = 'dask' in getattr(dt_val, '__module__', '')
345
+ dd = mrsm.attempt_import('dask.dataframe') if is_dask else None
346
+ dt_is_series = hasattr(dt_val, 'dtype') and hasattr(dt_val, '__module__')
347
+ pd = pandas if dd is None else dd
348
+
349
+ try:
350
+ new_dt_val = pd.to_datetime(dt_val, utc=True, format='ISO8601')
351
+ if as_pydatetime:
352
+ return new_dt_val.to_pydatetime()
353
+ return new_dt_val
354
+ except (pd.errors.OutOfBoundsDatetime, ValueError):
355
+ pass
356
+
357
+ def parse(x: Any) -> Any:
358
+ try:
359
+ return dateutil_parser.parse(x)
360
+ except Exception:
361
+ return x
362
+
363
+ if dt_is_series:
364
+ new_series = dt_val.apply(parse)
365
+ if coerce_utc:
366
+ return coerce_timezone(new_series)
367
+ return new_series
368
+
369
+ new_dt_val = parse(dt_val)
370
+ if not coerce_utc:
371
+ return new_dt_val
372
+ return coerce_timezone(new_dt_val)
373
+
374
+
375
+ def serialize_bytes(data: bytes) -> str:
376
+ """
377
+ Return the given bytes as a base64-encoded string.
378
+ """
379
+ import base64
380
+ if not isinstance(data, bytes) and value_is_null(data):
381
+ return data
382
+ return base64.b64encode(data).decode('utf-8')
383
+
384
+
385
+ def deserialize_base64(data: str) -> bytes:
386
+ """
387
+ Return the original bytestring from the given base64-encoded string.
388
+ """
389
+ import base64
390
+ return base64.b64decode(data)
@@ -276,6 +276,19 @@ PD_TO_DB_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
276
276
  'cockroachdb': 'UUID',
277
277
  'default': 'TEXT',
278
278
  },
279
+ 'bytes': {
280
+ 'timescaledb': 'BYTEA',
281
+ 'postgresql': 'BYTEA',
282
+ 'mariadb': 'BLOB',
283
+ 'mysql': 'BLOB',
284
+ 'mssql': 'VARBINARY(MAX)',
285
+ 'oracle': 'BLOB',
286
+ 'sqlite': 'BLOB',
287
+ 'duckdb': 'BLOB',
288
+ 'citus': 'BYTEA',
289
+ 'cockroachdb': 'BYTEA',
290
+ 'default': 'BLOB',
291
+ },
279
292
  }
280
293
  PD_TO_SQLALCHEMY_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
281
294
  'int': {
@@ -421,6 +434,19 @@ PD_TO_SQLALCHEMY_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
421
434
  'cockroachdb': 'Uuid',
422
435
  'default': 'Uuid',
423
436
  },
437
+ 'bytes': {
438
+ 'timescaledb': 'LargeBinary',
439
+ 'postgresql': 'LargeBinary',
440
+ 'mariadb': 'LargeBinary',
441
+ 'mysql': 'LargeBinary',
442
+ 'mssql': 'LargeBinary',
443
+ 'oracle': 'LargeBinary',
444
+ 'sqlite': 'LargeBinary',
445
+ 'duckdb': 'LargeBinary',
446
+ 'citus': 'LargeBinary',
447
+ 'cockroachdb': 'LargeBinary',
448
+ 'default': 'LargeBinary',
449
+ },
424
450
  }
425
451
 
426
452
  AUTO_INCREMENT_COLUMN_FLAVORS: Dict[str, str] = {
meerschaum/utils/misc.py CHANGED
@@ -177,14 +177,14 @@ def string_to_dict(
177
177
  keys = _keys[:-1]
178
178
  try:
179
179
  val = ast.literal_eval(_keys[-1])
180
- except Exception as e:
180
+ except Exception:
181
181
  val = str(_keys[-1])
182
182
 
183
183
  c = params_dict
184
184
  for _k in keys[:-1]:
185
185
  try:
186
186
  k = ast.literal_eval(_k)
187
- except Exception as e:
187
+ except Exception:
188
188
  k = str(_k)
189
189
  if k not in c:
190
190
  c[k] = {}
@@ -196,12 +196,12 @@ def string_to_dict(
196
196
 
197
197
 
198
198
  def parse_config_substitution(
199
- value: str,
200
- leading_key: str = 'MRSM',
201
- begin_key: str = '{',
202
- end_key: str = '}',
203
- delimeter: str = ':'
204
- ) -> List[Any]:
199
+ value: str,
200
+ leading_key: str = 'MRSM',
201
+ begin_key: str = '{',
202
+ end_key: str = '}',
203
+ delimeter: str = ':'
204
+ ) -> List[Any]:
205
205
  """
206
206
  Parse Meerschaum substitution syntax
207
207
  E.g. MRSM{value1:value2} => ['value1', 'value2']
@@ -79,7 +79,7 @@ packages: Dict[str, Dict[str, str]] = {
79
79
  },
80
80
  'drivers-extras': {
81
81
  'pyodbc' : 'pyodbc>=4.0.30',
82
- 'cx_Oracle' : 'cx_Oracle>=8.3.0',
82
+ 'oracledb' : 'oracledb>=2.5.0',
83
83
  },
84
84
  'cli': {
85
85
  'pgcli' : 'pgcli>=3.1.0',
@@ -132,7 +132,7 @@ def schedule_function(
132
132
 
133
133
  try:
134
134
  loop.run_until_complete(run_scheduler())
135
- except (KeyboardInterrupt, SystemExit) as e:
135
+ except (KeyboardInterrupt, SystemExit):
136
136
  loop.run_until_complete(_stop_scheduler())
137
137
 
138
138
  return True, "Success"
@@ -159,13 +159,13 @@ def parse_schedule(schedule: str, now: Optional[datetime] = None):
159
159
  )
160
160
 
161
161
  starting_ts = parse_start_time(schedule, now=now)
162
- schedule = schedule.split(STARTING_KEYWORD)[0].strip()
162
+ schedule = schedule.split(STARTING_KEYWORD, maxsplit=1)[0].strip()
163
163
  for alias_keyword, true_keyword in SCHEDULE_ALIASES.items():
164
164
  schedule = schedule.replace(alias_keyword, true_keyword)
165
165
 
166
166
  ### TODO Allow for combining `and` + `or` logic.
167
167
  if '&' in schedule and '|' in schedule:
168
- raise ValueError(f"Cannot accept both 'and' + 'or' logic in the schedule frequency.")
168
+ raise ValueError("Cannot accept both 'and' + 'or' logic in the schedule frequency.")
169
169
 
170
170
  join_str = '|' if '|' in schedule else '&'
171
171
  join_trigger = (
@@ -300,6 +300,11 @@ def parse_start_time(schedule: str, now: Optional[datetime] = None) -> datetime:
300
300
  try:
301
301
  if starting_str == 'now':
302
302
  starting_ts = now
303
+ elif starting_str.startswith('in '):
304
+ delta_vals = starting_str.replace('in ', '').split(' ', maxsplit=1)
305
+ delta_unit = delta_vals[-1].rstrip('s') + 's'
306
+ delta_num = float(delta_vals[0])
307
+ starting_ts = now + timedelta(**{delta_unit: delta_num})
303
308
  elif 'tomorrow' in starting_str or 'today' in starting_str:
304
309
  today = round_time(now, timedelta(days=1))
305
310
  tomorrow = today + timedelta(days=1)
meerschaum/utils/sql.py CHANGED
@@ -108,24 +108,30 @@ update_queries = {
108
108
  {cols_equal_values}
109
109
  """,
110
110
  'mssql': """
111
+ {with_temp_date_bounds}
111
112
  MERGE {target_table_name} f
112
- USING (SELECT DISTINCT {patch_cols_str} FROM {patch_table_name}) p
113
+ USING (SELECT {patch_cols_str} FROM {patch_table_name}) p
113
114
  ON {and_subquery_f}
114
115
  AND {date_bounds_subquery}
115
116
  WHEN MATCHED THEN
116
117
  UPDATE
117
118
  {sets_subquery_none};
118
119
  """,
119
- 'mssql-upsert': """
120
+ 'mssql-upsert': [
121
+ "{identity_insert_on}",
122
+ """
123
+ {with_temp_date_bounds}
120
124
  MERGE {target_table_name} f
121
- USING (SELECT DISTINCT {patch_cols_str} FROM {patch_table_name}) p
125
+ USING (SELECT {patch_cols_str} FROM {patch_table_name}) p
122
126
  ON {and_subquery_f}
123
127
  AND {date_bounds_subquery}
124
128
  {when_matched_update_sets_subquery_none}
125
129
  WHEN NOT MATCHED THEN
126
130
  INSERT ({patch_cols_str})
127
131
  VALUES ({patch_cols_prefixed_str});
128
- """,
132
+ """,
133
+ "{identity_insert_off}",
134
+ ],
129
135
  'oracle': """
130
136
  MERGE INTO {target_table_name} f
131
137
  USING (SELECT DISTINCT {patch_cols_str} FROM {patch_table_name}) p
@@ -425,20 +431,10 @@ reset_autoincrement_queries: Dict[str, Union[str, List[str]]] = {
425
431
  SET seq = {val}
426
432
  WHERE name = '{table}'
427
433
  """,
428
- 'oracle': [
429
- """
430
- DECLARE
431
- max_id NUMBER := {val};
432
- current_val NUMBER;
433
- BEGIN
434
- SELECT {table_seq_name}.NEXTVAL INTO current_val FROM dual;
435
-
436
- WHILE current_val < max_id LOOP
437
- SELECT {table_seq_name}.NEXTVAL INTO current_val FROM dual;
438
- END LOOP;
439
- END;
440
- """,
441
- ],
434
+ 'oracle': (
435
+ "ALTER TABLE {table_name} MODIFY {column_name} "
436
+ "GENERATED BY DEFAULT ON NULL AS IDENTITY (START WITH {val_plus_1})"
437
+ ),
442
438
  }
443
439
  table_wrappers = {
444
440
  'default' : ('"', '"'),
@@ -1066,7 +1062,7 @@ def get_sqlalchemy_table(
1066
1062
  connector.metadata,
1067
1063
  **table_kwargs
1068
1064
  )
1069
- except sqlalchemy.exc.NoSuchTableError as e:
1065
+ except sqlalchemy.exc.NoSuchTableError:
1070
1066
  warn(f"Table '{truncated_table_name}' does not exist in '{connector}'.")
1071
1067
  return None
1072
1068
  return tables[truncated_table_name]
@@ -1393,6 +1389,7 @@ def get_update_queries(
1393
1389
  datetime_col: Optional[str] = None,
1394
1390
  schema: Optional[str] = None,
1395
1391
  patch_schema: Optional[str] = None,
1392
+ identity_insert: bool = False,
1396
1393
  debug: bool = False,
1397
1394
  ) -> List[str]:
1398
1395
  """
@@ -1430,6 +1427,10 @@ def get_update_queries(
1430
1427
  If provided, use this schema when quoting the patch table.
1431
1428
  Defaults to `schema`.
1432
1429
 
1430
+ identity_insert: bool, default False
1431
+ If `True`, include `SET IDENTITY_INSERT` queries before and after the update queries.
1432
+ Only applies for MSSQL upserts.
1433
+
1433
1434
  debug: bool, default False
1434
1435
  Verbosity toggle.
1435
1436
 
@@ -1564,17 +1565,36 @@ def get_update_queries(
1564
1565
  ) for c_name, c_type in join_cols_types
1565
1566
  ])
1566
1567
 
1568
+ skip_query_val = ""
1567
1569
  target_table_name = sql_item_name(target, flavor, schema)
1568
1570
  patch_table_name = sql_item_name(patch, flavor, patch_schema)
1569
1571
  dt_col_name = sql_item_name(datetime_col, flavor, None) if datetime_col else None
1572
+ date_bounds_table = patch_table_name if flavor != 'mssql' else '[date_bounds]'
1573
+ min_dt_col_name = f"MIN({dt_col_name})" if flavor != 'mssql' else '[Min_dt]'
1574
+ max_dt_col_name = f"MAX({dt_col_name})" if flavor != 'mssql' else '[Max_dt]'
1570
1575
  date_bounds_subquery = (
1571
1576
  f"""
1572
- f.{dt_col_name} >= (SELECT MIN({dt_col_name}) FROM {patch_table_name})
1573
- AND f.{dt_col_name} <= (SELECT MAX({dt_col_name}) FROM {patch_table_name})
1577
+ f.{dt_col_name} >= (SELECT {min_dt_col_name} FROM {date_bounds_table})
1578
+ AND f.{dt_col_name} <= (SELECT {max_dt_col_name} FROM {date_bounds_table})
1574
1579
  """
1575
1580
  if datetime_col
1576
1581
  else "1 = 1"
1577
1582
  )
1583
+ with_temp_date_bounds = f"""
1584
+ WITH [date_bounds] AS (
1585
+ SELECT MIN({dt_col_name}) AS {min_dt_col_name}, MAX({dt_col_name}) AS {max_dt_col_name}
1586
+ FROM {patch_table_name}
1587
+ )""" if datetime_col else ""
1588
+ identity_insert_on = (
1589
+ f"SET IDENTITY_INSERT {target_table_name} ON"
1590
+ if identity_insert
1591
+ else skip_query_val
1592
+ )
1593
+ identity_insert_off = (
1594
+ f"SET IDENTITY_INSERT {target_table_name} OFF"
1595
+ if identity_insert
1596
+ else skip_query_val
1597
+ )
1578
1598
 
1579
1599
  ### NOTE: MSSQL upserts must exclude the update portion if only upserting indices.
1580
1600
  when_matched_update_sets_subquery_none = "" if not value_cols else (
@@ -1595,7 +1615,7 @@ def get_update_queries(
1595
1615
  )
1596
1616
  ignore = "IGNORE " if not value_cols else ""
1597
1617
 
1598
- return [
1618
+ formatted_queries = [
1599
1619
  base_query.format(
1600
1620
  sets_subquery_none=sets_subquery('', 'p.'),
1601
1621
  sets_subquery_none_excluded=sets_subquery('', 'EXCLUDED.'),
@@ -1614,10 +1634,16 @@ def get_update_queries(
1614
1634
  cols_equal_values=cols_equal_values,
1615
1635
  on_duplicate_key_update=on_duplicate_key_update,
1616
1636
  ignore=ignore,
1637
+ with_temp_date_bounds=with_temp_date_bounds,
1638
+ identity_insert_on=identity_insert_on,
1639
+ identity_insert_off=identity_insert_off,
1617
1640
  )
1618
1641
  for base_query in base_queries
1619
1642
  ]
1620
1643
 
1644
+ ### NOTE: Allow for skipping some queries.
1645
+ return [query for query in formatted_queries if query]
1646
+
1621
1647
 
1622
1648
  def get_null_replacement(typ: str, flavor: str) -> str:
1623
1649
  """
@@ -1867,7 +1893,17 @@ def _get_create_table_query_from_dtypes(
1867
1893
 
1868
1894
  table_name = sql_item_name(new_table, schema=schema, flavor=flavor)
1869
1895
  primary_key_name = sql_item_name(primary_key, flavor) if primary_key else None
1896
+ primary_key_constraint_name = (
1897
+ sql_item_name(f'PK_{new_table}', flavor, None)
1898
+ if primary_key
1899
+ else None
1900
+ )
1870
1901
  datetime_column_name = sql_item_name(datetime_column, flavor) if datetime_column else None
1902
+ primary_key_clustered = (
1903
+ "CLUSTERED"
1904
+ if not datetime_column or datetime_column == primary_key
1905
+ else "NONCLUSTERED"
1906
+ )
1871
1907
  query = f"CREATE TABLE {table_name} ("
1872
1908
  if primary_key:
1873
1909
  col_db_type = cols_types[0][1]
@@ -1887,6 +1923,8 @@ def _get_create_table_query_from_dtypes(
1887
1923
  query += f"\n {col_name} {col_db_type} {auto_increment_str} PRIMARY KEY,"
1888
1924
  elif flavor == 'timescaledb' and datetime_column and datetime_column != primary_key:
1889
1925
  query += f"\n {col_name} {col_db_type}{auto_increment_str} NOT NULL,"
1926
+ elif flavor == 'mssql':
1927
+ query += f"\n {col_name} {col_db_type}{auto_increment_str} NOT NULL,"
1890
1928
  else:
1891
1929
  query += f"\n {col_name} {col_db_type} PRIMARY KEY{auto_increment_str} NOT NULL,"
1892
1930
 
@@ -1902,6 +1940,10 @@ def _get_create_table_query_from_dtypes(
1902
1940
  and datetime_column != primary_key
1903
1941
  ):
1904
1942
  query += f"\n PRIMARY KEY({datetime_column_name}, {primary_key_name}),"
1943
+
1944
+ if flavor == 'mssql' and primary_key:
1945
+ query += f"\n CONSTRAINT {primary_key_constraint_name} PRIMARY KEY {primary_key_clustered} ({primary_key_name}),"
1946
+
1905
1947
  query = query[:-1]
1906
1948
  query += "\n)"
1907
1949
 
@@ -1927,7 +1969,7 @@ def _get_create_table_query_from_cte(
1927
1969
  create_cte_name = sql_item_name(create_cte, flavor, None)
1928
1970
  new_table_name = sql_item_name(new_table, flavor, schema)
1929
1971
  primary_key_constraint_name = (
1930
- sql_item_name(f'pk_{new_table}', flavor, None)
1972
+ sql_item_name(f'PK_{new_table}', flavor, None)
1931
1973
  if primary_key
1932
1974
  else None
1933
1975
  )
@@ -1936,6 +1978,7 @@ def _get_create_table_query_from_cte(
1936
1978
  if primary_key
1937
1979
  else None
1938
1980
  )
1981
+ primary_key_clustered = "CLUSTERED" if not datetime_column else "NONCLUSTERED"
1939
1982
  datetime_column_name = (
1940
1983
  sql_item_name(datetime_column, flavor)
1941
1984
  if datetime_column
@@ -1943,7 +1986,7 @@ def _get_create_table_query_from_cte(
1943
1986
  )
1944
1987
  if flavor in ('mssql',):
1945
1988
  query = query.lstrip()
1946
- if 'with ' in query.lower():
1989
+ if query.lower().startswith('with '):
1947
1990
  final_select_ix = query.lower().rfind('select')
1948
1991
  create_table_query = (
1949
1992
  query[:final_select_ix].rstrip() + ',\n'
@@ -1961,7 +2004,7 @@ def _get_create_table_query_from_cte(
1961
2004
 
1962
2005
  alter_type_query = f"""
1963
2006
  ALTER TABLE {new_table_name}
1964
- ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})
2007
+ ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY {primary_key_clustered} ({primary_key_name})
1965
2008
  """
1966
2009
  elif flavor in (None,):
1967
2010
  create_table_query = f"""
@@ -2225,29 +2268,8 @@ def get_reset_autoincrement_queries(
2225
2268
  schema = schema or connector.schema
2226
2269
  max_id_name = sql_item_name('max_id', connector.flavor)
2227
2270
  table_name = sql_item_name(table, connector.flavor, schema)
2228
- table_trunc = truncate_item_name(table, connector.flavor)
2229
2271
  table_seq_name = sql_item_name(table + '_' + column + '_seq', connector.flavor, schema)
2230
2272
  column_name = sql_item_name(column, connector.flavor)
2231
- if connector.flavor == 'oracle':
2232
- potential_table_names = set([
2233
- f"'{table_trunc.upper()}'",
2234
- f"'{table_trunc}'",
2235
- f"'{table_name}'",
2236
- f"'{table_name.upper()}'",
2237
- ])
2238
- df = connector.read(
2239
- """
2240
- SELECT SEQUENCE_NAME
2241
- FROM ALL_TAB_IDENTITY_COLS
2242
- WHERE TABLE_NAME IN ("""
2243
- + ", ".join([name for name in potential_table_names])
2244
- + """)
2245
- """,
2246
- debug=debug
2247
- )
2248
- if len(df) > 0:
2249
- table_seq_name = df['sequence_name'][0]
2250
-
2251
2273
  max_id = connector.value(
2252
2274
  f"""
2253
2275
  SELECT COALESCE(MAX({column_name}), 0) AS {max_id_name}
@@ -2272,7 +2294,8 @@ def get_reset_autoincrement_queries(
2272
2294
  table=table,
2273
2295
  table_name=table_name,
2274
2296
  table_seq_name=table_seq_name,
2275
- val=(max_id),
2297
+ val=max_id,
2298
+ val_plus_1=(max_id + 1),
2276
2299
  )
2277
2300
  for query in reset_queries
2278
2301
  ]
@@ -34,10 +34,10 @@ class Venv:
34
34
  """
35
35
 
36
36
  def __init__(
37
- self,
38
- venv: Union[str, 'meerschaum.plugins.Plugin', None] = 'mrsm',
39
- debug: bool = False,
40
- ) -> None:
37
+ self,
38
+ venv: Union[str, 'meerschaum.plugins.Plugin', None] = 'mrsm',
39
+ debug: bool = False,
40
+ ) -> None:
41
41
  from meerschaum.utils.venv import activate_venv, deactivate_venv, active_venvs
42
42
  ### For some weird threading issue,
43
43
  ### we can't use `isinstance` here.