meerschaum 2.5.0__py3-none-any.whl → 2.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. meerschaum/_internal/arguments/_parser.py +6 -1
  2. meerschaum/_internal/entry.py +16 -5
  3. meerschaum/actions/edit.py +6 -6
  4. meerschaum/actions/sql.py +12 -11
  5. meerschaum/api/dash/pages/login.py +17 -17
  6. meerschaum/api/dash/pipes.py +104 -13
  7. meerschaum/api/routes/_pipes.py +58 -40
  8. meerschaum/api/routes/_webterm.py +1 -0
  9. meerschaum/config/_edit.py +46 -19
  10. meerschaum/config/_read_config.py +20 -9
  11. meerschaum/config/_version.py +1 -1
  12. meerschaum/config/stack/__init__.py +1 -1
  13. meerschaum/config/static/__init__.py +1 -0
  14. meerschaum/connectors/api/_APIConnector.py +1 -0
  15. meerschaum/connectors/api/_pipes.py +39 -8
  16. meerschaum/connectors/sql/_SQLConnector.py +4 -3
  17. meerschaum/connectors/sql/_pipes.py +511 -118
  18. meerschaum/connectors/sql/_sql.py +55 -15
  19. meerschaum/connectors/valkey/_ValkeyConnector.py +3 -2
  20. meerschaum/connectors/valkey/_pipes.py +11 -5
  21. meerschaum/core/Pipe/__init__.py +27 -9
  22. meerschaum/core/Pipe/_attributes.py +181 -18
  23. meerschaum/core/Pipe/_clear.py +10 -8
  24. meerschaum/core/Pipe/_copy.py +2 -0
  25. meerschaum/core/Pipe/_data.py +65 -17
  26. meerschaum/core/Pipe/_deduplicate.py +30 -28
  27. meerschaum/core/Pipe/_dtypes.py +4 -4
  28. meerschaum/core/Pipe/_fetch.py +12 -10
  29. meerschaum/core/Pipe/_sync.py +28 -11
  30. meerschaum/core/Pipe/_verify.py +52 -49
  31. meerschaum/utils/dataframe.py +64 -34
  32. meerschaum/utils/dtypes/__init__.py +25 -6
  33. meerschaum/utils/dtypes/sql.py +76 -33
  34. meerschaum/utils/misc.py +57 -24
  35. meerschaum/utils/packages/_packages.py +2 -1
  36. meerschaum/utils/schedule.py +7 -5
  37. meerschaum/utils/sql.py +697 -44
  38. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/METADATA +5 -3
  39. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/RECORD +45 -45
  40. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/WHEEL +1 -1
  41. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/LICENSE +0 -0
  42. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/NOTICE +0 -0
  43. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/entry_points.txt +0 -0
  44. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/top_level.txt +0 -0
  45. {meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/zip-safe +0 -0
meerschaum/utils/sql.py CHANGED
@@ -16,6 +16,7 @@ from meerschaum.utils.dtypes.sql import (
16
16
  PD_TO_DB_DTYPES_FLAVORS,
17
17
  get_pd_type_from_db_type as get_pd_type,
18
18
  get_db_type_from_pd_type as get_db_type,
19
+ TIMEZONE_NAIVE_FLAVORS,
19
20
  )
20
21
  from meerschaum.utils.warnings import warn
21
22
  from meerschaum.utils.debug import dprint
@@ -41,6 +42,7 @@ SKIP_IF_EXISTS_FLAVORS = {'mssql', 'oracle'}
41
42
  DROP_IF_EXISTS_FLAVORS = {
42
43
  'timescaledb', 'postgresql', 'citus', 'mssql', 'mysql', 'mariadb', 'sqlite',
43
44
  }
45
+ SKIP_AUTO_INCREMENT_FLAVORS = {'citus', 'duckdb'}
44
46
  COALESCE_UNIQUE_INDEX_FLAVORS = {'timescaledb', 'postgresql', 'citus'}
45
47
  update_queries = {
46
48
  'default': """
@@ -173,7 +175,7 @@ columns_types_queries = {
173
175
  p.name "column",
174
176
  p.type "type"
175
177
  FROM sqlite_master m
176
- LEFT OUTER JOIN pragma_table_info((m.name)) p
178
+ LEFT OUTER JOIN pragma_table_info(m.name) p
177
179
  ON m.name <> p.name
178
180
  WHERE m.type = 'table'
179
181
  AND m.name IN ('{table}', '{table_trunc}')
@@ -186,8 +188,11 @@ columns_types_queries = {
186
188
  COLUMN_NAME AS [column],
187
189
  DATA_TYPE AS [type]
188
190
  FROM {db_prefix}INFORMATION_SCHEMA.COLUMNS
189
- WHERE TABLE_NAME LIKE '{table}%'
190
- OR TABLE_NAME LIKE '{table_trunc}%'
191
+ WHERE TABLE_NAME IN (
192
+ '{table}',
193
+ '{table_trunc}'
194
+ )
195
+
191
196
  """,
192
197
  'mysql': """
193
198
  SELECT
@@ -231,6 +236,206 @@ hypertable_queries = {
231
236
  'timescaledb': 'SELECT hypertable_size(\'{table_name}\')',
232
237
  'citus': 'SELECT citus_table_size(\'{table_name}\')',
233
238
  }
239
+ columns_indices_queries = {
240
+ 'default': """
241
+ SELECT
242
+ current_database() AS "database",
243
+ n.nspname AS "schema",
244
+ t.relname AS "table",
245
+ c.column_name AS "column",
246
+ i.relname AS "index",
247
+ CASE WHEN con.contype = 'p' THEN 'PRIMARY KEY' ELSE 'INDEX' END AS "index_type"
248
+ FROM pg_class t
249
+ INNER JOIN pg_index AS ix
250
+ ON t.oid = ix.indrelid
251
+ INNER JOIN pg_class AS i
252
+ ON i.oid = ix.indexrelid
253
+ INNER JOIN pg_namespace AS n
254
+ ON n.oid = t.relnamespace
255
+ INNER JOIN pg_attribute AS a
256
+ ON a.attnum = ANY(ix.indkey)
257
+ AND a.attrelid = t.oid
258
+ INNER JOIN information_schema.columns AS c
259
+ ON c.column_name = a.attname
260
+ AND c.table_name = t.relname
261
+ AND c.table_schema = n.nspname
262
+ LEFT JOIN pg_constraint AS con
263
+ ON con.conindid = i.oid
264
+ AND con.contype = 'p'
265
+ WHERE
266
+ t.relname IN ('{table}', '{table_trunc}')
267
+ AND n.nspname = '{schema}'
268
+ """,
269
+ 'sqlite': """
270
+ WITH indexed_columns AS (
271
+ SELECT
272
+ '{table}' AS table_name,
273
+ pi.name AS column_name,
274
+ i.name AS index_name,
275
+ 'INDEX' AS index_type
276
+ FROM
277
+ sqlite_master AS i,
278
+ pragma_index_info(i.name) AS pi
279
+ WHERE
280
+ i.type = 'index'
281
+ AND i.tbl_name = '{table}'
282
+ ),
283
+ primary_key_columns AS (
284
+ SELECT
285
+ '{table}' AS table_name,
286
+ ti.name AS column_name,
287
+ 'PRIMARY_KEY' AS index_name,
288
+ 'PRIMARY KEY' AS index_type
289
+ FROM
290
+ pragma_table_info('{table}') AS ti
291
+ WHERE
292
+ ti.pk > 0
293
+ )
294
+ SELECT
295
+ NULL AS "database",
296
+ NULL AS "schema",
297
+ "table_name" AS "table",
298
+ "column_name" AS "column",
299
+ "index_name" AS "index",
300
+ "index_type"
301
+ FROM indexed_columns
302
+ UNION ALL
303
+ SELECT
304
+ NULL AS "database",
305
+ NULL AS "schema",
306
+ table_name AS "table",
307
+ column_name AS "column",
308
+ index_name AS "index",
309
+ index_type
310
+ FROM primary_key_columns
311
+ """,
312
+ 'mssql': """
313
+ SELECT
314
+ NULL AS [database],
315
+ s.name AS [schema],
316
+ t.name AS [table],
317
+ c.name AS [column],
318
+ i.name AS [index],
319
+ CASE
320
+ WHEN kc.type = 'PK' THEN 'PRIMARY KEY'
321
+ ELSE 'INDEX'
322
+ END AS [index_type]
323
+ FROM
324
+ sys.schemas s
325
+ INNER JOIN sys.tables t
326
+ ON s.schema_id = t.schema_id
327
+ INNER JOIN sys.indexes i
328
+ ON t.object_id = i.object_id
329
+ INNER JOIN sys.index_columns ic
330
+ ON i.object_id = ic.object_id
331
+ AND i.index_id = ic.index_id
332
+ INNER JOIN sys.columns c
333
+ ON ic.object_id = c.object_id
334
+ AND ic.column_id = c.column_id
335
+ LEFT JOIN sys.key_constraints kc
336
+ ON kc.parent_object_id = i.object_id
337
+ AND kc.type = 'PK'
338
+ AND kc.name = i.name
339
+ WHERE
340
+ t.name IN ('{table}', '{table_trunc}')
341
+ AND s.name = 'dbo'
342
+ AND i.type IN (1, 2) -- 1 = CLUSTERED, 2 = NONCLUSTERED
343
+ """,
344
+ 'oracle': """
345
+ SELECT
346
+ NULL AS "database",
347
+ ic.table_owner AS "schema",
348
+ ic.table_name AS "table",
349
+ ic.column_name AS "column",
350
+ i.index_name AS "index",
351
+ CASE
352
+ WHEN c.constraint_type = 'P' THEN 'PRIMARY KEY'
353
+ WHEN i.uniqueness = 'UNIQUE' THEN 'UNIQUE INDEX'
354
+ ELSE 'INDEX'
355
+ END AS index_type
356
+ FROM
357
+ all_ind_columns ic
358
+ INNER JOIN all_indexes i
359
+ ON ic.index_name = i.index_name
360
+ AND ic.table_owner = i.owner
361
+ LEFT JOIN all_constraints c
362
+ ON i.index_name = c.constraint_name
363
+ AND i.table_owner = c.owner
364
+ AND c.constraint_type = 'P'
365
+ WHERE ic.table_name IN (
366
+ '{table}',
367
+ '{table_trunc}',
368
+ '{table_upper}',
369
+ '{table_upper_trunc}'
370
+ )
371
+ """,
372
+ 'mysql': """
373
+ SELECT
374
+ TABLE_SCHEMA AS `database`,
375
+ TABLE_SCHEMA AS `schema`,
376
+ TABLE_NAME AS `table`,
377
+ COLUMN_NAME AS `column`,
378
+ INDEX_NAME AS `index`,
379
+ CASE
380
+ WHEN NON_UNIQUE = 0 THEN 'PRIMARY KEY'
381
+ ELSE 'INDEX'
382
+ END AS `index_type`
383
+ FROM
384
+ information_schema.STATISTICS
385
+ WHERE
386
+ TABLE_NAME IN ('{table}', '{table_trunc}')
387
+ """,
388
+ 'mariadb': """
389
+ SELECT
390
+ TABLE_SCHEMA AS `database`,
391
+ TABLE_SCHEMA AS `schema`,
392
+ TABLE_NAME AS `table`,
393
+ COLUMN_NAME AS `column`,
394
+ INDEX_NAME AS `index`,
395
+ CASE
396
+ WHEN NON_UNIQUE = 0 THEN 'PRIMARY KEY'
397
+ ELSE 'INDEX'
398
+ END AS `index_type`
399
+ FROM
400
+ information_schema.STATISTICS
401
+ WHERE
402
+ TABLE_NAME IN ('{table}', '{table_trunc}')
403
+ """,
404
+ }
405
+ reset_autoincrement_queries: Dict[str, Union[str, List[str]]] = {
406
+ 'default': """
407
+ SELECT SETVAL(pg_get_serial_sequence('{table}', '{column}'), {val})
408
+ FROM {table_name}
409
+ """,
410
+ 'mssql': """
411
+ DBCC CHECKIDENT ('{table}', RESEED, {val})
412
+ """,
413
+ 'mysql': """
414
+ ALTER TABLE {table_name} AUTO_INCREMENT = {val}
415
+ """,
416
+ 'mariadb': """
417
+ ALTER TABLE {table_name} AUTO_INCREMENT = {val}
418
+ """,
419
+ 'sqlite': """
420
+ UPDATE sqlite_sequence
421
+ SET seq = {val}
422
+ WHERE name = '{table}'
423
+ """,
424
+ 'oracle': [
425
+ """
426
+ DECLARE
427
+ max_id NUMBER := {val};
428
+ current_val NUMBER;
429
+ BEGIN
430
+ SELECT {table_seq_name}.NEXTVAL INTO current_val FROM dual;
431
+
432
+ WHILE current_val < max_id LOOP
433
+ SELECT {table_seq_name}.NEXTVAL INTO current_val FROM dual;
434
+ END LOOP;
435
+ END;
436
+ """,
437
+ ],
438
+ }
234
439
  table_wrappers = {
235
440
  'default' : ('"', '"'),
236
441
  'timescaledb': ('"', '"'),
@@ -349,9 +554,8 @@ def dateadd_str(
349
554
  "CAST('2022-01-01 00:00:00' AS TIMESTAMP) + INTERVAL '1 day'"
350
555
 
351
556
  """
352
- from meerschaum.utils.debug import dprint
353
557
  from meerschaum.utils.packages import attempt_import
354
- from meerschaum.utils.warnings import error
558
+ from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
355
559
  dateutil_parser = attempt_import('dateutil.parser')
356
560
  if 'int' in str(type(begin)).lower():
357
561
  return str(begin)
@@ -379,26 +583,32 @@ def dateadd_str(
379
583
  begin = begin.astimezone(timezone.utc)
380
584
  begin = (
381
585
  f"'{begin.replace(tzinfo=None)}'"
382
- if isinstance(begin, datetime)
586
+ if isinstance(begin, datetime) and flavor in TIMEZONE_NAIVE_FLAVORS
383
587
  else f"'{begin}'"
384
588
  )
385
589
 
590
+ dt_is_utc = begin_time.tzinfo is not None if begin_time is not None else '+' in str(begin)
591
+ db_type = get_db_type_from_pd_type(
592
+ ('datetime64[ns, UTC]' if dt_is_utc else 'datetime64[ns]'),
593
+ flavor=flavor,
594
+ )
595
+
386
596
  da = ""
387
597
  if flavor in ('postgresql', 'timescaledb', 'cockroachdb', 'citus'):
388
598
  begin = (
389
- f"CAST({begin} AS TIMESTAMP)" if begin != 'now'
390
- else "CAST(NOW() AT TIME ZONE 'utc' AS TIMESTAMP)"
599
+ f"CAST({begin} AS {db_type})" if begin != 'now'
600
+ else "CAST(NOW() AT TIME ZONE 'utc' AS {db_type})"
391
601
  )
392
602
  da = begin + (f" + INTERVAL '{number} {datepart}'" if number != 0 else '')
393
603
 
394
604
  elif flavor == 'duckdb':
395
- begin = f"CAST({begin} AS TIMESTAMP)" if begin != 'now' else 'NOW()'
605
+ begin = f"CAST({begin} AS {db_type})" if begin != 'now' else 'NOW()'
396
606
  da = begin + (f" + INTERVAL '{number} {datepart}'" if number != 0 else '')
397
607
 
398
608
  elif flavor in ('mssql',):
399
609
  if begin_time and begin_time.microsecond != 0:
400
610
  begin = begin[:-4] + "'"
401
- begin = f"CAST({begin} AS DATETIME)" if begin != 'now' else 'GETUTCDATE()'
611
+ begin = f"CAST({begin} AS {db_type})" if begin != 'now' else 'GETUTCDATE()'
402
612
  da = f"DATEADD({datepart}, {number}, {begin})" if number != 0 else begin
403
613
 
404
614
  elif flavor in ('mysql', 'mariadb'):
@@ -425,9 +635,9 @@ def dateadd_str(
425
635
 
426
636
 
427
637
  def test_connection(
428
- self,
429
- **kw: Any
430
- ) -> Union[bool, None]:
638
+ self,
639
+ **kw: Any
640
+ ) -> Union[bool, None]:
431
641
  """
432
642
  Test if a successful connection to the database may be made.
433
643
 
@@ -454,11 +664,11 @@ def test_connection(
454
664
 
455
665
 
456
666
  def get_distinct_col_count(
457
- col: str,
458
- query: str,
459
- connector: Optional[mrsm.connectors.sql.SQLConnector] = None,
460
- debug: bool = False
461
- ) -> Optional[int]:
667
+ col: str,
668
+ query: str,
669
+ connector: Optional[mrsm.connectors.sql.SQLConnector] = None,
670
+ debug: bool = False
671
+ ) -> Optional[int]:
462
672
  """
463
673
  Returns the number of distinct items in a column of a SQL query.
464
674
 
@@ -624,10 +834,10 @@ def truncate_item_name(item: str, flavor: str) -> str:
624
834
 
625
835
 
626
836
  def build_where(
627
- params: Dict[str, Any],
628
- connector: Optional[meerschaum.connectors.sql.SQLConnector] = None,
629
- with_where: bool = True,
630
- ) -> str:
837
+ params: Dict[str, Any],
838
+ connector: Optional[meerschaum.connectors.sql.SQLConnector] = None,
839
+ with_where: bool = True,
840
+ ) -> str:
631
841
  """
632
842
  Build the `WHERE` clause based on the input criteria.
633
843
 
@@ -769,7 +979,7 @@ def table_exists(
769
979
  ----------
770
980
  table: str:
771
981
  The name of the table in question.
772
-
982
+
773
983
  connector: mrsm.connectors.sql.SQLConnector
774
984
  The connector to the database which holds the table.
775
985
 
@@ -783,7 +993,6 @@ def table_exists(
783
993
  Returns
784
994
  -------
785
995
  A `bool` indicating whether or not the table exists on the database.
786
-
787
996
  """
788
997
  sqlalchemy = mrsm.attempt_import('sqlalchemy')
789
998
  schema = schema or connector.schema
@@ -806,7 +1015,7 @@ def get_sqlalchemy_table(
806
1015
  ----------
807
1016
  table: str
808
1017
  The name of the table on the database. Does not need to be escaped.
809
-
1018
+
810
1019
  connector: Optional[meerschaum.connectors.sql.SQLConnector], default None:
811
1020
  The connector to the database which holds the table.
812
1021
 
@@ -822,7 +1031,7 @@ def get_sqlalchemy_table(
822
1031
 
823
1032
  Returns
824
1033
  -------
825
- A `sqlalchemy.Table` object for the table.
1034
+ A `sqlalchemy.Table` object for the table.
826
1035
 
827
1036
  """
828
1037
  if connector is None:
@@ -888,6 +1097,7 @@ def get_table_cols_types(
888
1097
  connectable: Union[
889
1098
  'mrsm.connectors.sql.SQLConnector',
890
1099
  'sqlalchemy.orm.session.Session',
1100
+ 'sqlalchemy.engine.base.Engine'
891
1101
  ]
892
1102
  The connection object used to fetch the columns and types.
893
1103
 
@@ -1008,6 +1218,164 @@ def get_table_cols_types(
1008
1218
  return {}
1009
1219
 
1010
1220
 
1221
+ def get_table_cols_indices(
1222
+ table: str,
1223
+ connectable: Union[
1224
+ 'mrsm.connectors.sql.SQLConnector',
1225
+ 'sqlalchemy.orm.session.Session',
1226
+ 'sqlalchemy.engine.base.Engine'
1227
+ ],
1228
+ flavor: Optional[str] = None,
1229
+ schema: Optional[str] = None,
1230
+ database: Optional[str] = None,
1231
+ debug: bool = False,
1232
+ ) -> Dict[str, List[str]]:
1233
+ """
1234
+ Return a dictionary mapping a table's columns to lists of indices.
1235
+ This is useful for inspecting tables creating during a not-yet-committed session.
1236
+
1237
+ NOTE: This may return incorrect columns if the schema is not explicitly stated.
1238
+ Use this function if you are confident the table name is unique or if you have
1239
+ and explicit schema.
1240
+ To use the configured schema, get the columns from `get_sqlalchemy_table()` instead.
1241
+
1242
+ Parameters
1243
+ ----------
1244
+ table: str
1245
+ The name of the table (unquoted).
1246
+
1247
+ connectable: Union[
1248
+ 'mrsm.connectors.sql.SQLConnector',
1249
+ 'sqlalchemy.orm.session.Session',
1250
+ 'sqlalchemy.engine.base.Engine'
1251
+ ]
1252
+ The connection object used to fetch the columns and types.
1253
+
1254
+ flavor: Optional[str], default None
1255
+ The database dialect flavor to use for the query.
1256
+ If omitted, default to `connectable.flavor`.
1257
+
1258
+ schema: Optional[str], default None
1259
+ If provided, restrict the query to this schema.
1260
+
1261
+ database: Optional[str]. default None
1262
+ If provided, restrict the query to this database.
1263
+
1264
+ Returns
1265
+ -------
1266
+ A dictionary mapping column names to a list of indices.
1267
+ """
1268
+ from collections import defaultdict
1269
+ from meerschaum.connectors import SQLConnector
1270
+ sqlalchemy = mrsm.attempt_import('sqlalchemy')
1271
+ flavor = flavor or getattr(connectable, 'flavor', None)
1272
+ if not flavor:
1273
+ raise ValueError("Please provide a database flavor.")
1274
+ if flavor == 'duckdb' and not isinstance(connectable, SQLConnector):
1275
+ raise ValueError("You must provide a SQLConnector when using DuckDB.")
1276
+ if flavor in NO_SCHEMA_FLAVORS:
1277
+ schema = None
1278
+ if schema is None:
1279
+ schema = DEFAULT_SCHEMA_FLAVORS.get(flavor, None)
1280
+ if flavor in ('sqlite', 'duckdb', 'oracle'):
1281
+ database = None
1282
+ table_trunc = truncate_item_name(table, flavor=flavor)
1283
+ table_lower = table.lower()
1284
+ table_upper = table.upper()
1285
+ table_lower_trunc = truncate_item_name(table_lower, flavor=flavor)
1286
+ table_upper_trunc = truncate_item_name(table_upper, flavor=flavor)
1287
+ db_prefix = (
1288
+ "tempdb."
1289
+ if flavor == 'mssql' and table.startswith('#')
1290
+ else ""
1291
+ )
1292
+
1293
+ cols_indices_query = sqlalchemy.text(
1294
+ columns_indices_queries.get(
1295
+ flavor,
1296
+ columns_indices_queries['default']
1297
+ ).format(
1298
+ table=table,
1299
+ table_trunc=table_trunc,
1300
+ table_lower=table_lower,
1301
+ table_lower_trunc=table_lower_trunc,
1302
+ table_upper=table_upper,
1303
+ table_upper_trunc=table_upper_trunc,
1304
+ db_prefix=db_prefix,
1305
+ schema=schema,
1306
+ )
1307
+ )
1308
+
1309
+ cols = ['database', 'schema', 'table', 'column', 'index', 'index_type']
1310
+ result_cols_ix = dict(enumerate(cols))
1311
+
1312
+ debug_kwargs = {'debug': debug} if isinstance(connectable, SQLConnector) else {}
1313
+ if not debug_kwargs and debug:
1314
+ dprint(cols_indices_query)
1315
+
1316
+ try:
1317
+ result_rows = (
1318
+ [
1319
+ row
1320
+ for row in connectable.execute(cols_indices_query, **debug_kwargs).fetchall()
1321
+ ]
1322
+ if flavor != 'duckdb'
1323
+ else [
1324
+ tuple([doc[col] for col in cols])
1325
+ for doc in connectable.read(cols_indices_query, debug=debug).to_dict(orient='records')
1326
+ ]
1327
+ )
1328
+ cols_types_docs = [
1329
+ {
1330
+ result_cols_ix[i]: val
1331
+ for i, val in enumerate(row)
1332
+ }
1333
+ for row in result_rows
1334
+ ]
1335
+ cols_types_docs_filtered = [
1336
+ doc
1337
+ for doc in cols_types_docs
1338
+ if (
1339
+ (
1340
+ not schema
1341
+ or doc['schema'] == schema
1342
+ )
1343
+ and
1344
+ (
1345
+ not database
1346
+ or doc['database'] == database
1347
+ )
1348
+ )
1349
+ ]
1350
+
1351
+ ### NOTE: This may return incorrect columns if the schema is not explicitly stated.
1352
+ if cols_types_docs and not cols_types_docs_filtered:
1353
+ cols_types_docs_filtered = cols_types_docs
1354
+
1355
+ cols_indices = defaultdict(lambda: [])
1356
+ for doc in cols_types_docs_filtered:
1357
+ col = (
1358
+ doc['column']
1359
+ if flavor != 'oracle'
1360
+ else (
1361
+ doc['column'].lower()
1362
+ if (doc['column'].isupper() and doc['column'].replace('_', '').isalpha())
1363
+ else doc['column']
1364
+ )
1365
+ )
1366
+ cols_indices[col].append(
1367
+ {
1368
+ 'name': doc.get('index', None),
1369
+ 'type': doc.get('index_type', None),
1370
+ }
1371
+ )
1372
+
1373
+ return dict(cols_indices)
1374
+ except Exception as e:
1375
+ warn(f"Failed to fetch columns for table '{table}':\n{e}")
1376
+ return {}
1377
+
1378
+
1011
1379
  def get_update_queries(
1012
1380
  target: str,
1013
1381
  patch: str,
@@ -1248,10 +1616,11 @@ def get_null_replacement(typ: str, flavor: str) -> str:
1248
1616
  A value which may stand in place of NULL for this type.
1249
1617
  `'None'` is returned if a value cannot be determined.
1250
1618
  """
1619
+ from meerschaum.utils.dtypes import are_dtypes_equal
1251
1620
  from meerschaum.utils.dtypes.sql import DB_FLAVORS_CAST_DTYPES
1252
1621
  if 'int' in typ.lower() or typ.lower() in ('numeric', 'number'):
1253
1622
  return '-987654321'
1254
- if 'bool' in typ.lower():
1623
+ if 'bool' in typ.lower() or typ.lower() == 'bit':
1255
1624
  bool_typ = (
1256
1625
  PD_TO_DB_DTYPES_FLAVORS
1257
1626
  .get('bool', {})
@@ -1261,7 +1630,7 @@ def get_null_replacement(typ: str, flavor: str) -> str:
1261
1630
  bool_typ = DB_FLAVORS_CAST_DTYPES[flavor].get(bool_typ, bool_typ)
1262
1631
  val_to_cast = (
1263
1632
  -987654321
1264
- if flavor in ('mysql', 'mariadb', 'sqlite', 'mssql')
1633
+ if flavor in ('mysql', 'mariadb')
1265
1634
  else 0
1266
1635
  )
1267
1636
  return f'CAST({val_to_cast} AS {bool_typ})'
@@ -1269,6 +1638,8 @@ def get_null_replacement(typ: str, flavor: str) -> str:
1269
1638
  return dateadd_str(flavor=flavor, begin='1900-01-01')
1270
1639
  if 'float' in typ.lower() or 'double' in typ.lower() or typ.lower() in ('decimal',):
1271
1640
  return '-987654321.0'
1641
+ if flavor == 'oracle' and typ.lower().split('(', maxsplit=1)[0] == 'char':
1642
+ return "'-987654321'"
1272
1643
  if typ.lower() in ('uniqueidentifier', 'guid', 'uuid'):
1273
1644
  magic_val = 'DEADBEEF-ABBA-BABE-CAFE-DECAFC0FFEE5'
1274
1645
  if flavor == 'mssql':
@@ -1325,35 +1696,48 @@ def get_rename_table_queries(
1325
1696
 
1326
1697
  if_exists_str = "IF EXISTS" if flavor in DROP_IF_EXISTS_FLAVORS else ""
1327
1698
  if flavor == 'duckdb':
1328
- return [
1329
- get_create_table_query(f"SELECT * FROM {old_table_name}", tmp_table, 'duckdb', schema),
1330
- get_create_table_query(f"SELECT * FROM {tmp_table_name}", new_table, 'duckdb', schema),
1331
- f"DROP TABLE {if_exists_str} {tmp_table_name}",
1332
- f"DROP TABLE {if_exists_str} {old_table_name}",
1333
- ]
1699
+ return (
1700
+ get_create_table_queries(
1701
+ f"SELECT * FROM {old_table_name}",
1702
+ tmp_table,
1703
+ 'duckdb',
1704
+ schema,
1705
+ ) + get_create_table_queries(
1706
+ f"SELECT * FROM {tmp_table_name}",
1707
+ new_table,
1708
+ 'duckdb',
1709
+ schema,
1710
+ ) + [
1711
+ f"DROP TABLE {if_exists_str} {tmp_table_name}",
1712
+ f"DROP TABLE {if_exists_str} {old_table_name}",
1713
+ ]
1714
+ )
1334
1715
 
1335
1716
  return [f"ALTER TABLE {old_table_name} RENAME TO {new_table_name}"]
1336
1717
 
1337
1718
 
1338
1719
  def get_create_table_query(
1339
- query: str,
1720
+ query_or_dtypes: Union[str, Dict[str, str]],
1340
1721
  new_table: str,
1341
1722
  flavor: str,
1342
1723
  schema: Optional[str] = None,
1343
1724
  ) -> str:
1344
1725
  """
1726
+ NOTE: This function is deprecated. Use `get_create_table_queries()` instead.
1727
+
1345
1728
  Return a query to create a new table from a `SELECT` query.
1346
1729
 
1347
1730
  Parameters
1348
1731
  ----------
1349
- query: str
1732
+ query: Union[str, Dict[str, str]]
1350
1733
  The select query to use for the creation of the table.
1734
+ If a dictionary is provided, return a `CREATE TABLE` query from the given `dtypes` columns.
1351
1735
 
1352
1736
  new_table: str
1353
1737
  The unquoted name of the new table.
1354
1738
 
1355
1739
  flavor: str
1356
- The database flavor to use for the query (e.g. `'mssql'`, `'postgresql'`.
1740
+ The database flavor to use for the query (e.g. `'mssql'`, `'postgresql'`).
1357
1741
 
1358
1742
  schema: Optional[str], default None
1359
1743
  The schema on which the table will reside.
@@ -1362,26 +1746,202 @@ def get_create_table_query(
1362
1746
  -------
1363
1747
  A `CREATE TABLE` (or `SELECT INTO`) query for the database flavor.
1364
1748
  """
1749
+ return get_create_table_queries(
1750
+ query_or_dtypes,
1751
+ new_table,
1752
+ flavor,
1753
+ schema=schema,
1754
+ primary_key=None,
1755
+ )[0]
1756
+
1757
+
1758
+ def get_create_table_queries(
1759
+ query_or_dtypes: Union[str, Dict[str, str]],
1760
+ new_table: str,
1761
+ flavor: str,
1762
+ schema: Optional[str] = None,
1763
+ primary_key: Optional[str] = None,
1764
+ autoincrement: bool = False,
1765
+ datetime_column: Optional[str] = None,
1766
+ ) -> List[str]:
1767
+ """
1768
+ Return a query to create a new table from a `SELECT` query or a `dtypes` dictionary.
1769
+
1770
+ Parameters
1771
+ ----------
1772
+ query_or_dtypes: Union[str, Dict[str, str]]
1773
+ The select query to use for the creation of the table.
1774
+ If a dictionary is provided, return a `CREATE TABLE` query from the given `dtypes` columns.
1775
+
1776
+ new_table: str
1777
+ The unquoted name of the new table.
1778
+
1779
+ flavor: str
1780
+ The database flavor to use for the query (e.g. `'mssql'`, `'postgresql'`).
1781
+
1782
+ schema: Optional[str], default None
1783
+ The schema on which the table will reside.
1784
+
1785
+ primary_key: Optional[str], default None
1786
+ If provided, designate this column as the primary key in the new table.
1787
+
1788
+ autoincrement: bool, default False
1789
+ If `True` and `primary_key` is provided, create the `primary_key` column
1790
+ as an auto-incrementing integer column.
1791
+
1792
+ datetime_column: Optional[str], default None
1793
+ If provided, include this column in the primary key.
1794
+ Applicable to TimescaleDB only.
1795
+
1796
+ Returns
1797
+ -------
1798
+ A `CREATE TABLE` (or `SELECT INTO`) query for the database flavor.
1799
+ """
1800
+ if not isinstance(query_or_dtypes, (str, dict)):
1801
+ raise TypeError("`query_or_dtypes` must be a query or a dtypes dictionary.")
1802
+
1803
+ method = (
1804
+ _get_create_table_query_from_cte
1805
+ if isinstance(query_or_dtypes, str)
1806
+ else _get_create_table_query_from_dtypes
1807
+ )
1808
+ return method(
1809
+ query_or_dtypes,
1810
+ new_table,
1811
+ flavor,
1812
+ schema=schema,
1813
+ primary_key=primary_key,
1814
+ autoincrement=(autoincrement and flavor not in SKIP_AUTO_INCREMENT_FLAVORS),
1815
+ datetime_column=datetime_column,
1816
+ )
1817
+
1818
+
1819
+ def _get_create_table_query_from_dtypes(
1820
+ dtypes: Dict[str, str],
1821
+ new_table: str,
1822
+ flavor: str,
1823
+ schema: Optional[str] = None,
1824
+ primary_key: Optional[str] = None,
1825
+ autoincrement: bool = False,
1826
+ datetime_column: Optional[str] = None,
1827
+ ) -> List[str]:
1828
+ """
1829
+ Create a new table from a `dtypes` dictionary.
1830
+ """
1831
+ from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type, AUTO_INCREMENT_COLUMN_FLAVORS
1832
+ if not dtypes and not primary_key:
1833
+ raise ValueError(f"Expecting columns for table '{new_table}'.")
1834
+
1835
+ if flavor in SKIP_AUTO_INCREMENT_FLAVORS:
1836
+ autoincrement = False
1837
+
1838
+ cols_types = (
1839
+ [(primary_key, get_db_type_from_pd_type(dtypes.get(primary_key, 'int'), flavor=flavor))]
1840
+ if primary_key
1841
+ else []
1842
+ ) + [
1843
+ (col, get_db_type_from_pd_type(typ, flavor=flavor))
1844
+ for col, typ in dtypes.items()
1845
+ if col != primary_key
1846
+ ]
1847
+
1848
+ table_name = sql_item_name(new_table, schema=schema, flavor=flavor)
1849
+ primary_key_name = sql_item_name(primary_key, flavor) if primary_key else None
1850
+ datetime_column_name = sql_item_name(datetime_column, flavor) if datetime_column else None
1851
+ query = f"CREATE TABLE {table_name} ("
1852
+ if primary_key:
1853
+ col_db_type = cols_types[0][1]
1854
+ auto_increment_str = (' ' + AUTO_INCREMENT_COLUMN_FLAVORS.get(
1855
+ flavor,
1856
+ AUTO_INCREMENT_COLUMN_FLAVORS['default']
1857
+ )) if autoincrement or primary_key not in dtypes else ''
1858
+ col_name = sql_item_name(primary_key, flavor=flavor, schema=None)
1859
+
1860
+ if flavor == 'sqlite':
1861
+ query += (
1862
+ f"\n {col_name} "
1863
+ + (f"{col_db_type}" if not auto_increment_str else 'INTEGER')
1864
+ + f" PRIMARY KEY{auto_increment_str} NOT NULL,"
1865
+ )
1866
+ elif flavor == 'oracle':
1867
+ query += f"\n {col_name} {col_db_type} {auto_increment_str} PRIMARY KEY,"
1868
+ elif flavor == 'timescaledb' and datetime_column and datetime_column != primary_key:
1869
+ query += f"\n {col_name} {col_db_type}{auto_increment_str} NOT NULL,"
1870
+ else:
1871
+ query += f"\n {col_name} {col_db_type} PRIMARY KEY{auto_increment_str} NOT NULL,"
1872
+
1873
+ for col, db_type in cols_types:
1874
+ if col == primary_key:
1875
+ continue
1876
+ col_name = sql_item_name(col, schema=None, flavor=flavor)
1877
+ query += f"\n {col_name} {db_type},"
1878
+ if (
1879
+ flavor == 'timescaledb'
1880
+ and datetime_column
1881
+ and primary_key
1882
+ and datetime_column != primary_key
1883
+ ):
1884
+ query += f"\n PRIMARY KEY({datetime_column_name}, {primary_key_name}),"
1885
+ query = query[:-1]
1886
+ query += "\n)"
1887
+
1888
+ queries = [query]
1889
+ return queries
1890
+
1891
+
1892
+ def _get_create_table_query_from_cte(
1893
+ query: str,
1894
+ new_table: str,
1895
+ flavor: str,
1896
+ schema: Optional[str] = None,
1897
+ primary_key: Optional[str] = None,
1898
+ autoincrement: bool = False,
1899
+ datetime_column: Optional[str] = None,
1900
+ ) -> List[str]:
1901
+ """
1902
+ Create a new table from a CTE query.
1903
+ """
1365
1904
  import textwrap
1905
+ from meerschaum.utils.dtypes.sql import AUTO_INCREMENT_COLUMN_FLAVORS
1366
1906
  create_cte = 'create_query'
1367
1907
  create_cte_name = sql_item_name(create_cte, flavor, None)
1368
1908
  new_table_name = sql_item_name(new_table, flavor, schema)
1909
+ primary_key_constraint_name = (
1910
+ sql_item_name(f'pk_{new_table}', flavor, None)
1911
+ if primary_key
1912
+ else None
1913
+ )
1914
+ primary_key_name = (
1915
+ sql_item_name(primary_key, flavor, None)
1916
+ if primary_key
1917
+ else None
1918
+ )
1919
+ datetime_column_name = (
1920
+ sql_item_name(datetime_column, flavor)
1921
+ if datetime_column
1922
+ else None
1923
+ )
1369
1924
  if flavor in ('mssql',):
1370
1925
  query = query.lstrip()
1371
1926
  if 'with ' in query.lower():
1372
1927
  final_select_ix = query.lower().rfind('select')
1373
- return (
1928
+ create_table_query = (
1374
1929
  query[:final_select_ix].rstrip() + ',\n'
1375
1930
  + f"{create_cte_name} AS (\n"
1376
1931
  + query[final_select_ix:]
1377
1932
  + "\n)\n"
1378
1933
  + f"SELECT *\nINTO {new_table_name}\nFROM {create_cte_name}"
1379
1934
  )
1380
-
1381
- create_table_query = f"""
1382
- SELECT *
1383
- INTO {new_table_name}
1384
- FROM ({query}) AS {create_cte_name}
1935
+ else:
1936
+ create_table_query = f"""
1937
+ SELECT *
1938
+ INTO {new_table_name}
1939
+ FROM ({query}) AS {create_cte_name}
1940
+ """
1941
+
1942
+ alter_type_query = f"""
1943
+ ALTER TABLE {new_table_name}
1944
+ ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})
1385
1945
  """
1386
1946
  elif flavor in (None,):
1387
1947
  create_table_query = f"""
@@ -1390,12 +1950,33 @@ def get_create_table_query(
1390
1950
  SELECT *
1391
1951
  FROM {create_cte_name}
1392
1952
  """
1953
+
1954
+ alter_type_query = f"""
1955
+ ALTER TABLE {new_table_name}
1956
+ ADD PRIMARY KEY ({primary_key_name})
1957
+ """
1393
1958
  elif flavor in ('sqlite', 'mysql', 'mariadb', 'duckdb', 'oracle'):
1394
1959
  create_table_query = f"""
1395
1960
  CREATE TABLE {new_table_name} AS
1396
1961
  SELECT *
1397
1962
  FROM ({query})""" + (f""" AS {create_cte_name}""" if flavor != 'oracle' else '') + """
1398
1963
  """
1964
+
1965
+ alter_type_query = f"""
1966
+ ALTER TABLE {new_table_name}
1967
+ ADD PRIMARY KEY ({primary_key_name})
1968
+ """
1969
+ elif flavor == 'timescaledb' and datetime_column and datetime_column != primary_key:
1970
+ create_table_query = f"""
1971
+ SELECT *
1972
+ INTO {new_table_name}
1973
+ FROM ({query}) AS {create_cte_name}
1974
+ """
1975
+
1976
+ alter_type_query = f"""
1977
+ ALTER TABLE {new_table_name}
1978
+ ADD PRIMARY KEY ({datetime_column_name}, {primary_key_name})
1979
+ """
1399
1980
  else:
1400
1981
  create_table_query = f"""
1401
1982
  SELECT *
@@ -1403,7 +1984,21 @@ def get_create_table_query(
1403
1984
  FROM ({query}) AS {create_cte_name}
1404
1985
  """
1405
1986
 
1406
- return textwrap.dedent(create_table_query)
1987
+ alter_type_query = f"""
1988
+ ALTER TABLE {new_table_name}
1989
+ ADD PRIMARY KEY ({primary_key_name})
1990
+ """
1991
+
1992
+ create_table_query = textwrap.dedent(create_table_query)
1993
+ if not primary_key:
1994
+ return [create_table_query]
1995
+
1996
+ alter_type_query = textwrap.dedent(alter_type_query)
1997
+
1998
+ return [
1999
+ create_table_query,
2000
+ alter_type_query,
2001
+ ]
1407
2002
 
1408
2003
 
1409
2004
  def wrap_query_with_cte(
@@ -1574,3 +2169,61 @@ def session_execute(
1574
2169
  if with_results:
1575
2170
  return (success, msg), results
1576
2171
  return success, msg
2172
+
2173
+
2174
+ def get_reset_autoincrement_queries(
2175
+ table: str,
2176
+ column: str,
2177
+ connector: mrsm.connectors.SQLConnector,
2178
+ schema: Optional[str] = None,
2179
+ debug: bool = False,
2180
+ ) -> List[str]:
2181
+ """
2182
+ Return a list of queries to reset a table's auto-increment counter.
2183
+ """
2184
+ if not table_exists(table, connector, schema=schema, debug=debug):
2185
+ return []
2186
+
2187
+ schema = schema or connector.schema
2188
+ max_id_name = sql_item_name('max_id', connector.flavor)
2189
+ table_name = sql_item_name(table, connector.flavor, schema)
2190
+ table_trunc = truncate_item_name(table, connector.flavor)
2191
+ table_seq_name = sql_item_name(table + '_' + column + '_seq', connector.flavor, schema)
2192
+ column_name = sql_item_name(column, connector.flavor)
2193
+ if connector.flavor == 'oracle':
2194
+ df = connector.read(f"""
2195
+ SELECT SEQUENCE_NAME
2196
+ FROM ALL_TAB_IDENTITY_COLS
2197
+ WHERE TABLE_NAME IN '{table_trunc.upper()}'
2198
+ """, debug=debug)
2199
+ if len(df) > 0:
2200
+ table_seq_name = df['sequence_name'][0]
2201
+
2202
+ max_id = connector.value(
2203
+ f"""
2204
+ SELECT COALESCE(MAX({column_name}), 0) AS {max_id_name}
2205
+ FROM {table_name}
2206
+ """,
2207
+ debug=debug,
2208
+ )
2209
+ if max_id is None:
2210
+ return []
2211
+
2212
+ reset_queries = reset_autoincrement_queries.get(
2213
+ connector.flavor,
2214
+ reset_autoincrement_queries['default']
2215
+ )
2216
+ if not isinstance(reset_queries, list):
2217
+ reset_queries = [reset_queries]
2218
+
2219
+ return [
2220
+ query.format(
2221
+ column=column,
2222
+ column_name=column_name,
2223
+ table=table,
2224
+ table_name=table_name,
2225
+ table_seq_name=table_seq_name,
2226
+ val=(max_id),
2227
+ )
2228
+ for query in reset_queries
2229
+ ]