mtsql 1.7.202312151026__py3-none-any.whl → 1.9.202401091637__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
mt/sql/base.py CHANGED
@@ -1,5 +1,6 @@
1
1
  """Base functions dealing with an SQL database."""
2
2
 
3
+ import uuid
3
4
  import sqlalchemy as sa
4
5
  import sqlalchemy.exc as se
5
6
  import psycopg2 as ps
@@ -11,6 +12,7 @@ from mt.base import deprecated_func
11
12
 
12
13
  __all__ = [
13
14
  "frame_sql",
15
+ "indices",
14
16
  "run_func",
15
17
  "conn_ctx",
16
18
  "engine_execute",
@@ -19,6 +21,9 @@ __all__ = [
19
21
  "exec_sql",
20
22
  "list_schemas",
21
23
  "list_tables",
24
+ "list_views",
25
+ "table_exists",
26
+ "create_temp_id_table",
22
27
  ]
23
28
 
24
29
 
@@ -26,6 +31,12 @@ def frame_sql(frame_name, schema: tp.Optional[str] = None):
26
31
  return frame_name if schema is None else "{}.{}".format(schema, frame_name)
27
32
 
28
33
 
34
+ def indices(df):
35
+ """Returns the list of named indices of the dataframe, ignoring any unnamed index."""
36
+ a = list(df.index.names)
37
+ return a if a != [None] else []
38
+
39
+
29
40
  # ----- functions dealing with sql queries to overcome OperationalError -----
30
41
 
31
42
 
@@ -34,7 +45,7 @@ def run_func(
34
45
  *args,
35
46
  nb_trials: int = 3,
36
47
  logger: tp.Optional[logg.IndentedLoggerAdapter] = None,
37
- **kwargs
48
+ **kwargs,
38
49
  ):
39
50
  """Attempt to run a function a number of times to overcome OperationalError exceptions.
40
51
 
@@ -97,7 +108,7 @@ def read_sql(
97
108
  nb_trials: int = 3,
98
109
  exception_handling: str = "raise",
99
110
  logger: tp.Optional[logg.IndentedLoggerAdapter] = None,
100
- **kwargs
111
+ **kwargs,
101
112
  ) -> pd.DataFrame:
102
113
  """Read an SQL query with a number of trials to overcome OperationalError.
103
114
 
@@ -165,7 +176,7 @@ def read_sql(
165
176
  chunksize=chunksize,
166
177
  nb_trials=nb_trials,
167
178
  logger=logger,
168
- **kwargs
179
+ **kwargs,
169
180
  )
170
181
 
171
182
  if chunksize is None:
@@ -200,66 +211,12 @@ def read_sql(
200
211
  return df
201
212
 
202
213
 
203
- @deprecated_func(
204
- "1.0",
205
- suggested_func="mt.sql.base.read_sql",
206
- removed_version="2.0",
207
- docstring_prefix=" ",
208
- )
209
- def read_sql_query(
210
- sql,
211
- engine,
212
- index_col=None,
213
- set_index_after=False,
214
- nb_trials: int = 3,
215
- logger: tp.Optional[logg.IndentedLoggerAdapter] = None,
216
- **kwargs
217
- ):
218
- """Read an SQL query with a number of trials to overcome OperationalError.
219
-
220
- Parameters
221
- ----------
222
- sql : str
223
- SQL query to be executed
224
- engine : sqlalchemy.engine.Engine
225
- connection engine to the server
226
- index_col: string or list of strings, optional, default: None
227
- Column(s) to set as index(MultiIndex). See :func:`pandas.read_sql_query`.
228
- set_index_after: bool
229
- whether to set index specified by index_col via the pandas.read_sql_query() function or
230
- after the function has been invoked
231
- nb_trials: int
232
- number of query trials
233
- logger: mt.logg.IndentedLoggerAdapter, optional
234
- logger for debugging
235
- kwargs: dict
236
- other keyword arguments to be passed directly to :func:`pandas.read_sql_query`
237
-
238
- See Also
239
- --------
240
- pandas.read_sql_query
241
- """
242
-
243
- df = read_sql(
244
- sql,
245
- engine,
246
- index_col=index_col,
247
- nb_trials=nb_trials,
248
- exception_handling="raise",
249
- logger=logger,
250
- **kwargs
251
- )
252
- if index_col is None or not set_index_after:
253
- return df
254
- return df.set_index(index_col, drop=True)
255
-
256
-
257
214
  def read_sql_table(
258
215
  table_name,
259
216
  engine,
260
217
  nb_trials: int = 3,
261
218
  logger: tp.Optional[logg.IndentedLoggerAdapter] = None,
262
- **kwargs
219
+ **kwargs,
263
220
  ):
264
221
  """Read an SQL table with a number of trials to overcome OperationalError.
265
222
 
@@ -285,7 +242,7 @@ def read_sql_table(
285
242
  engine,
286
243
  nb_trials=nb_trials,
287
244
  logger=logger,
288
- **kwargs
245
+ **kwargs,
289
246
  )
290
247
 
291
248
 
@@ -295,7 +252,7 @@ def exec_sql(
295
252
  *args,
296
253
  nb_trials: int = 3,
297
254
  logger: tp.Optional[logg.IndentedLoggerAdapter] = None,
298
- **kwargs
255
+ **kwargs,
299
256
  ):
300
257
  """Execute an SQL query with a number of trials to overcome OperationalError.
301
258
 
@@ -358,3 +315,74 @@ def list_tables(engine, schema: tp.Optional[str] = None):
358
315
  list of all table names
359
316
  """
360
317
  return sa.inspect(engine).get_table_names(schema=schema)
318
+
319
+
320
+ def list_views(engine, schema: tp.Optional[str] = None):
321
+ """Lists all views of a given schema.
322
+
323
+ Parameters
324
+ ----------
325
+ engine : sqlalchemy.engine.Engine
326
+ connection engine to the server
327
+ schema: str, optional
328
+ a valid schema name returned from :func:`list_schemas`. Default to sqlalchemy
329
+
330
+ Returns
331
+ -------
332
+ list
333
+ list of all view names
334
+ """
335
+ return sa.inspect(engine).get_view_names(schema=schema)
336
+
337
+
338
+ def table_exists(
339
+ table_name,
340
+ engine,
341
+ schema: tp.Optional[str] = None,
342
+ ):
343
+ """Checks if a table exists.
344
+
345
+ Parameters
346
+ ----------
347
+ table_name: str
348
+ name of table
349
+ engine: sqlalchemy.engine.Engine
350
+ an sqlalchemy connection engine created by function `create_engine()`
351
+ schema: str or None
352
+ a valid schema name returned from `list_schemas()`
353
+
354
+ Returns
355
+ -------
356
+ retval: bool
357
+ whether a table or a view exists with the given name
358
+ """
359
+
360
+ return sa.inspect(engine).has_table(table_name, schema=schema)
361
+
362
+
363
+ def create_temp_id_table(l_ids: list, conn: sa.engine.Connection) -> str:
364
+ """Creates a temporary table to containing a list of ids.
365
+
366
+ Parameters
367
+ ----------
368
+ l_ids : list
369
+ list of ids to be inserted into the table
370
+ conn : sqlalchemy.engine.Connection
371
+ a connection that has been opened
372
+
373
+ Returns
374
+ -------
375
+ table_name : str
376
+ name of the temporary table. The table will be deleted at the end of the connection
377
+ """
378
+
379
+ table_name = f"tab_{uuid.uuid4().hex}"
380
+
381
+ query_str = f"CREATE TEMP TABLE {table_name}(id int);"
382
+ conn.execute(sa.text(query_str))
383
+
384
+ values = ",".join((f"({id})" for id in l_ids))
385
+ query_str = f"INSERT INTO {table_name}(id) VALUES {values};"
386
+ conn.execute(sa.text(query_str))
387
+
388
+ return table_name
mt/sql/psql.py CHANGED
@@ -16,12 +16,10 @@ __all__ = [
16
16
  "pg_get_locked_transactions",
17
17
  "pg_cancel_backend",
18
18
  "pg_cancel_all_backends",
19
- "indices",
20
19
  "compliance_check",
21
20
  "as_column_name",
22
21
  "to_sql",
23
22
  "rename_schema",
24
- "list_views",
25
23
  "list_matviews",
26
24
  "list_foreign_tables",
27
25
  "list_frames",
@@ -137,12 +135,6 @@ def pg_cancel_all_backends(
137
135
  # ----- functions dealing with sql queries to overcome OperationalError -----
138
136
 
139
137
 
140
- def indices(df):
141
- """Returns the list of named indices of the dataframe, ignoring any unnamed index."""
142
- a = list(df.index.names)
143
- return a if a != [None] else []
144
-
145
-
146
138
  def compliance_check(df: pd.DataFrame):
147
139
  """Checks if a dataframe is compliant to PSQL.
148
140
 
@@ -207,7 +199,7 @@ def to_sql(
207
199
  logger: tp.Optional[logg.IndentedLoggerAdapter] = None,
208
200
  **kwargs,
209
201
  ):
210
- """Writes records stored in a DataFrame to an SQL database.
202
+ """Writes records stored in a DataFrame to a PostgreSQL database.
211
203
 
212
204
  With a number of trials to overcome OperationalError.
213
205
 
@@ -391,42 +383,6 @@ def rename_schema(
391
383
  )
392
384
 
393
385
 
394
- def list_views(
395
- engine,
396
- schema: tp.Optional[str] = None,
397
- nb_trials: int = 3,
398
- logger: tp.Optional[logg.IndentedLoggerAdapter] = None,
399
- ):
400
- """Lists all views of a given schema.
401
-
402
- Parameters
403
- ----------
404
- engine: sqlalchemy.engine.Engine
405
- an sqlalchemy connection engine created by function `create_engine()`
406
- schema: str or None
407
- a valid schema name returned from `list_schemas()`
408
- nb_trials: int
409
- number of query trials
410
- logger: mt.logg.IndentedLoggerAdapter, optional
411
- logger for debugging
412
-
413
- Returns
414
- -------
415
- out: list
416
- list of all view names
417
- """
418
- if schema is None:
419
- query_str = "select distinct viewname from pg_views;"
420
- else:
421
- query_str = (
422
- "select distinct viewname from pg_views where schemaname='{}';".format(
423
- schema
424
- )
425
- )
426
- df = read_sql(query_str, engine, nb_trials=nb_trials, logger=logger)
427
- return df["viewname"].tolist()
428
-
429
-
430
386
  def list_matviews(
431
387
  engine,
432
388
  schema: tp.Optional[str] = None,
@@ -521,7 +477,7 @@ def list_frames(
521
477
  data = []
522
478
  for item in list_tables(engine, schema=schema):
523
479
  data.append((item, "table"))
524
- for item in list_views(engine, schema=schema, nb_trials=nb_trials, logger=logger):
480
+ for item in list_views(engine, schema=schema):
525
481
  data.append((item, "view"))
526
482
  for item in list_matviews(
527
483
  engine, schema=schema, nb_trials=nb_trials, logger=logger
@@ -990,11 +946,9 @@ def frame_exists(
990
946
  retval: bool
991
947
  whether a table or a view exists with the given name
992
948
  """
993
- if frame_name in list_tables(engine, schema=schema):
949
+ if table_exists(frame_name, engine, schema=schema):
994
950
  return True
995
- if frame_name in list_views(
996
- engine, schema=schema, nb_trials=nb_trials, logger=logger
997
- ):
951
+ if frame_name in list_views(engine, schema=schema):
998
952
  return True
999
953
  return frame_name in list_matviews(
1000
954
  engine, schema=schema, nb_trials=nb_trials, logger=logger
@@ -1041,9 +995,7 @@ def drop_frame(
1041
995
  nb_trials=nb_trials,
1042
996
  logger=logger,
1043
997
  )
1044
- if frame_name in list_views(
1045
- engine, schema=schema, nb_trials=nb_trials, logger=logger
1046
- ):
998
+ if frame_name in list_views(engine, schema=schema):
1047
999
  return drop_view(
1048
1000
  frame_name,
1049
1001
  engine,
@@ -2052,11 +2004,9 @@ def readsync_table(
2052
2004
 
2053
2005
  if len(new_md5_df) != len(new_df):
2054
2006
  if logger:
2055
- logger.debug("New dataframe:\n{}".format(str(new_df)))
2056
- logger.debug("Hash dataframe:\n{}".format(str(new_md5_df)))
2057
- msg = "Something must have gone wrong. Number of hashes {} != number of records {}.".format(
2058
- len(new_md5_df), len(new_df)
2059
- )
2007
+ logger.debug(f"New dataframe:\n{str(new_df)}")
2008
+ logger.debug(f"Hash dataframe:\n{str(new_md5_df)}")
2009
+ msg = f"Something must have gone wrong. Number of hashes {len(new_md5_df)} != number of records {len(new_df)}."
2060
2010
  if raise_exception_upon_mismatch:
2061
2011
  raise RuntimeError(msg)
2062
2012
  elif logger:
@@ -2081,7 +2031,7 @@ def readsync_table(
2081
2031
 
2082
2032
  # write back
2083
2033
  if logger:
2084
- logger.debug("Saving all {} records to file...".format(len(df)))
2034
+ logger.debug(f"Saving all {len(df)} records to file...")
2085
2035
  if bg_write_csv is True:
2086
2036
  bg = BgInvoke(pd.dfsave, df, df_filepath, index=True)
2087
2037
  return df, bg
mt/sql/redshift.py CHANGED
@@ -3,6 +3,7 @@
3
3
  from mt import tp, logg
4
4
 
5
5
  from .base import *
6
+ from .psql import compliance_check
6
7
 
7
8
 
8
9
  __api__ = [
@@ -418,3 +419,109 @@ def drop_column(
418
419
  schema, table_name, column_name
419
420
  )
420
421
  exec_sql(query_str, engine, nb_trials=nb_trials, logger=logger)
422
+
423
+
424
+ # ----- functions dealing with sql queries to overcome OperationalError -----
425
+
426
+
427
+ def to_sql(
428
+ df,
429
+ name,
430
+ engine,
431
+ schema: tp.Optional[str] = None,
432
+ if_exists="fail",
433
+ nb_trials: int = 3,
434
+ logger: tp.Optional[logg.IndentedLoggerAdapter] = None,
435
+ **kwargs,
436
+ ):
437
+ """Writes records stored in a DataFrame to a Redshift database.
438
+
439
+ With a number of trials to overcome OperationalError.
440
+
441
+ Parameters
442
+ ----------
443
+ df : pandas.DataFrame
444
+ dataframe to be sent to the server
445
+ name : str
446
+ name of the table to be written to
447
+ engine : sqlalchemy.engine.Engine
448
+ connection engine to the server
449
+ schema: string, optional
450
+ Specify the schema. If None, use default schema.
451
+ if_exists: str
452
+ what to do when the table exists. Passed as-is to :func:`pandas.DataFrame.to_sql`.
453
+ nb_trials: int
454
+ number of query trials
455
+ logger: mt.logg.IndentedLoggerAdapter, optional
456
+ logger for debugging
457
+ kwargs : dict
458
+ keyword arguments passed as-is to :func:`pandas.DataFrame.to_sql`
459
+
460
+ Raises
461
+ ------
462
+ sqlalchemy.exc.ProgrammingError if the local and remote frames do not have the same structure
463
+
464
+ Notes
465
+ -----
466
+ The function takes as input a PSQL-compliant dataframe (see `compliance_check()`). It ignores
467
+ any input `index` or `index_label` keyword. Instead, it considers 2 cases. If the dataframe has
468
+ an index or indices, then the tuple of all indices is turned into the primary key. If not,
469
+ there is no primary key and no index is uploaded.
470
+
471
+ See Also
472
+ --------
473
+ pandas.DataFrame.to_sql()
474
+
475
+ """
476
+
477
+ if kwargs:
478
+ if "index" in kwargs:
479
+ raise ValueError(
480
+ "The `mt.sql.psql.to_sql()` function does not accept `index` as a keyword."
481
+ )
482
+ if "index_label" in kwargs:
483
+ raise ValueError(
484
+ "This `mt.sql.psql.to_sql()` function does not accept `index_label` as a keyword."
485
+ )
486
+
487
+ compliance_check(df)
488
+ frame_sql_str = frame_sql(name, schema=schema)
489
+
490
+ # if the remote frame does not exist, force `if_exists` to 'replace'
491
+ if not table_exists(name, engine, schema=schema):
492
+ if_exists = "replace"
493
+ local_indices = indices(df)
494
+
495
+ if local_indices:
496
+ df = df.reset_index(drop=False)
497
+ retval = run_func(
498
+ df.to_sql,
499
+ name,
500
+ engine,
501
+ schema=schema,
502
+ if_exists=if_exists,
503
+ index=False,
504
+ index_label=None,
505
+ nb_trials=nb_trials,
506
+ logger=logger,
507
+ **kwargs,
508
+ )
509
+
510
+ if if_exists == "replace":
511
+ query_str = f"ALTER TABLE {frame_sql_str} ADD PRIMARY KEY ({','.join(local_indices)});"
512
+ exec_sql(query_str, engine, nb_trials=nb_trials, logger=logger)
513
+ else:
514
+ retval = run_func(
515
+ df.to_sql,
516
+ name,
517
+ engine,
518
+ schema=schema,
519
+ if_exists=if_exists,
520
+ index=False,
521
+ index_label=None,
522
+ nb_trials=nb_trials,
523
+ logger=logger,
524
+ **kwargs,
525
+ )
526
+
527
+ return retval
mt/sql/version.py CHANGED
@@ -1,11 +1,11 @@
1
- VERSION_YEAR = 2023
2
- VERSION_MONTH = int('12')
3
- VERSION_DAY = int('15')
4
- VERSION_HOUR = int('10')
5
- VERSION_MINUTE = int('26')
1
+ VERSION_YEAR = 2024
2
+ VERSION_MONTH = int('01')
3
+ VERSION_DAY = int('09')
4
+ VERSION_HOUR = int('16')
5
+ VERSION_MINUTE = int('37')
6
6
  MAJOR_VERSION = 1
7
- MINOR_VERSION = 7
8
- PATCH_VERSION = 202312151026
9
- version_date = '2023/12/15 10:26'
7
+ MINOR_VERSION = 9
8
+ PATCH_VERSION = 202401091637
9
+ version_date = '2024/01/09 16:37'
10
10
  version = '{}.{}.{}'.format(MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION)
11
11
  __all__ = ['MAJOR_VERSION', 'MINOR_VERSION', 'PATCH_VERSION', 'version_date', 'version']
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: mtsql
3
- Version: 1.7.202312151026
3
+ Version: 1.9.202401091637
4
4
  Summary: Extra Python modules to deal with the interaction between pandas dataframes and remote SQL servers, for Minh-Tri Pham
5
5
  Home-page: https://github.com/inteplus/mtsql
6
6
  Author: ['Minh-Tri Pham']
@@ -0,0 +1,12 @@
1
+ mt/sql/__init__.py,sha256=b7zO50apZxt9Hg2eOkJhRLrXgACR8eS5b-Rphdn5qNQ,44
2
+ mt/sql/base.py,sha256=GJLSQfz0GNXgFBzK6dSCVqQ4rjyTvFEBPmsM37d8eXc,10608
3
+ mt/sql/mysql.py,sha256=n2ENDctdUqZuSaDAcrqZYtPtawq3Wx4dOPCRsCB5Q4w,4894
4
+ mt/sql/psql.py,sha256=m41LsBQ57OVVtakUZ01o_YY-vBwY5Z3TVPvSUMylNaU,65964
5
+ mt/sql/redshift.py,sha256=EliV4C9E3VuNjqFXWnTrU8Dm_utQrVwht5DF4oHl7qY,14808
6
+ mt/sql/sqlite.py,sha256=T2ak_hhNi_zRfpg_gp8JhNHn7D2kl4i-Ey6-9ANMtz0,8678
7
+ mt/sql/version.py,sha256=nb0i2eAMsoLqFeLpvANq9ovAtp3TRIZsz2c02XZ4xBs,396
8
+ mtsql-1.9.202401091637.dist-info/LICENSE,sha256=PojkRlQzTT5Eg6Nj03XoIVEefN3u8iiIFf1p4rqe_t4,1070
9
+ mtsql-1.9.202401091637.dist-info/METADATA,sha256=0OS_X0KCiNKKzDFiiXTnttkBWJSeS6OoEsdmykC4JAc,589
10
+ mtsql-1.9.202401091637.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
11
+ mtsql-1.9.202401091637.dist-info/top_level.txt,sha256=WcqGFu9cV7iMZg09iam8eNxUvGpLSKKF2Iubf6SJVOo,3
12
+ mtsql-1.9.202401091637.dist-info/RECORD,,
@@ -1,12 +0,0 @@
1
- mt/sql/__init__.py,sha256=b7zO50apZxt9Hg2eOkJhRLrXgACR8eS5b-Rphdn5qNQ,44
2
- mt/sql/base.py,sha256=sFr7O_Odfsf2AHr9kq3DXGCAFInCKgHSgLJaen507_I,9994
3
- mt/sql/mysql.py,sha256=n2ENDctdUqZuSaDAcrqZYtPtawq3Wx4dOPCRsCB5Q4w,4894
4
- mt/sql/psql.py,sha256=dRN4wH1uQ-deGb2M-3PbdUfjHQ_1fbPXnR_94X1KMIU,67364
5
- mt/sql/redshift.py,sha256=ADi1I_p8S5ZmzbLCclhxiUou5gXZrLY9Hd9yTMoprB4,11630
6
- mt/sql/sqlite.py,sha256=T2ak_hhNi_zRfpg_gp8JhNHn7D2kl4i-Ey6-9ANMtz0,8678
7
- mt/sql/version.py,sha256=PeQLGKevhlxMrPdW08UhN8-u9JaxBvwOZY0yCmqMGmc,396
8
- mtsql-1.7.202312151026.dist-info/LICENSE,sha256=PojkRlQzTT5Eg6Nj03XoIVEefN3u8iiIFf1p4rqe_t4,1070
9
- mtsql-1.7.202312151026.dist-info/METADATA,sha256=zHt5Uh3O0YP5pkRj-xT1UCmVC2HfZqmGfQqcs5GrO3c,589
10
- mtsql-1.7.202312151026.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
11
- mtsql-1.7.202312151026.dist-info/top_level.txt,sha256=WcqGFu9cV7iMZg09iam8eNxUvGpLSKKF2Iubf6SJVOo,3
12
- mtsql-1.7.202312151026.dist-info/RECORD,,