berryworld 1.0.0.196834__py3-none-any.whl → 1.0.0.197207__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@ import os
2
2
  import re
3
3
  import ast
4
4
  import math
5
+ import time
5
6
  import pyodbc
6
7
  import traceback
7
8
  import numpy as np
@@ -9,6 +10,8 @@ import pandas as pd
9
10
  import sqlalchemy as sa
10
11
  from urllib import parse
11
12
  from numbers import Number
13
+ from threading import Thread
14
+ from sqlalchemy.pool import QueuePool
12
15
 
13
16
 
14
17
  class SQLConnection:
@@ -137,9 +140,9 @@ class SQLConnection:
137
140
  if self.multi_db & (self.server.lower() == 'prod'):
138
141
  database = str(self.db_name) + 'Primary'
139
142
 
140
- constring = ('mssql+pyodbc://' + self.user_name + ':%s@' + self.server_name + '/' + database +
141
- '?driver=' + self.driver + self.trusted_certificate + self.encrypt)
142
- self.engine = sa.create_engine(constring % parse.quote_plus(self.password))
143
+ self.con_string = ('mssql+pyodbc://' + self.user_name + ':%s@' + self.server_name + '/' + database +
144
+ '?driver=' + self.driver + self.trusted_certificate + self.encrypt)
145
+ self.engine = sa.create_engine(self.con_string % parse.quote_plus(self.password))
143
146
  if not commit_as_transaction:
144
147
  self.engine = self.engine.execution_options(isolation_level="AUTOCOMMIT")
145
148
 
@@ -170,7 +173,7 @@ class SQLConnection:
170
173
  -----------------------------
171
174
  :param sql_query: Query to be sent to SQL
172
175
  :param coerce_float: Attempt to convert values of non-string, non-numeric objects (like decimal.Decimal)
173
- to floating point.
176
+ to floating point.
174
177
  :return: DataFrame gathering the requested data
175
178
  """
176
179
  self.open_read_connection()
@@ -184,21 +187,6 @@ class SQLConnection:
184
187
  self.close_connection()
185
188
  return data
186
189
 
187
- @staticmethod
188
- def _parse_df(parse_, data, col_names):
189
- """ Auxiliar function to convert list to DataFrame
190
- :param parse_: Parameter to indicate whether the data has to be transformed into a DataFrame or not
191
- :param data: List gathering the data retrieved from SQL
192
- :param col_names: List of columns to create the DataFrame
193
- :return: Formatted data
194
- """
195
- if parse_ is True:
196
- col_names = list(zip(*list(col_names)))[0]
197
- res = pd.DataFrame(list(zip(*data)), index=col_names).T
198
- else:
199
- res = [col_names, data]
200
- return res
201
-
202
190
  def sp_results(self, sql_query, resp_number=None, parse_=True, commit_as_transaction=True, no_count=True):
203
191
  """ Execute a stored procedure and retrieves all its output data
204
192
  -----------------------------
@@ -288,7 +276,7 @@ class SQLConnection:
288
276
  :param table: Table in which the data will be uploaded
289
277
  :param truncate: Indicate whether the table has to be truncated before the data is sent or not
290
278
  :param delete: Delete the rows from a table (Suitable for tables that cannot be truncated because of
291
- external constraints)
279
+ external constraints)
292
280
  :param identity: Indicate whether the identity columns will be inserted or not
293
281
  :param chunk: Indicate how many rows will be uploaded at once
294
282
  :param print_sql: boolean to indicate that you want the sql_statement to be printed on the console
@@ -296,11 +284,11 @@ class SQLConnection:
296
284
  false, it commits data by chunks.
297
285
  :param output: Outputs the columns indicated in this list
298
286
  :param bools2bits: Indicate whether the Boolean columns should be converted to BIT to be inserted into SQL
299
- :return: A DataFrame with the output columns requested if output is not None, else None
300
287
  :param nullable: Used within bools2bits function to indicate which boolean column values to convert
301
288
  :param commit_as_transaction: Indicate whether the connection will be done using the autocommit option or not
302
289
  :param infer_datetime_format: Indicate whether the datetime columns should be converted to string and if so,
303
- then the format to be used
290
+ then the format to be used
291
+ :return: A DataFrame with the output columns requested if output is not None, else None
304
292
  """
305
293
  if output is None:
306
294
  output = []
@@ -391,7 +379,7 @@ class SQLConnection:
391
379
  :param table: Table in which the data will be uploaded
392
380
  :param truncate: Indicate whether the table has to be truncated before the data is sent or not
393
381
  :param delete: Delete the rows from a table (Suitable for tables that cannot be truncated because of
394
- external constraints)
382
+ external constraints)
395
383
  :param identity: Indicate whether the identity columns will be inserted or not
396
384
  :param chunk: Indicate how many rows will be uploaded at once
397
385
  :param print_sql: boolean to indicate that you want the sql_statement to be printed on the console
@@ -495,8 +483,8 @@ class SQLConnection:
495
483
  :param bool_cols: columns to include as booleans
496
484
  :param batch_size: Number of records to update in each iteration
497
485
  :param output: Outputs the columns indicated in this list as a DataFrame. It should indicate if the column to
498
- retrieve is the inserted one or the deleted one (If nothing is indicated, then the Deleted one will be
499
- retrieved)
486
+ retrieve is the inserted one or the deleted one (If nothing is indicated, then the Deleted one
487
+ will be retrieved)
500
488
  :param nullable: Indicate whether to update the table column with null or exclude the reference from the update
501
489
  :param commit_as_transaction: Indicate whether the connection will be done using the autocommit option or not
502
490
  :return: None
@@ -631,8 +619,8 @@ class SQLConnection:
631
619
  :param print_sql: boolean to indicate that you want the sql_statement to be printed on the console
632
620
  :param bool_cols: columns to include as booleans
633
621
  :param output: Outputs the columns indicated in this list as a DataFrame. It should indicate if the column to
634
- retrieve is the inserted one or the deleted one (If nothing is indicated, then the Deleted one will be
635
- retrieved)
622
+ retrieve is the inserted one or the deleted one (If nothing is indicated, then the Deleted one
623
+ will be retrieved)
636
624
  :param chunk: Indicate how many rows will be uploaded at once
637
625
  :param commit_as_transaction: Indicate whether the connection will be done using the autocommit option or not
638
626
  :return: None
@@ -759,7 +747,7 @@ class SQLConnection:
759
747
  con_.merge(df, staging_schema, staging_table, sp_schema, sp_name, truncate=True)
760
748
  -----------------------------
761
749
  :param data: DataFrame to insert in the staging table
762
- :param staging_schema: Staging table schema
750
+ :param staging_schema: Schema to staging table
763
751
  :param staging_table: Staging table name
764
752
  :param sp_schema: Stored Procedure schema
765
753
  :param sp_name: Stored Procedure name
@@ -821,11 +809,6 @@ class SQLConnection:
821
809
  [True, False], instead of [0,1]. The method need data from type boolean to be inserted as [0, 1].
822
810
  3.- When dealing with datetime columns a similar problem arises. time_format is a dict that contains as keys
823
811
  the name of a date column and as values the format that the columns has to have.
824
- Versions comments...
825
- + Difference between version 1.0 and 1.01 is that the last one is a bit simpler, it waits for names of columns
826
- which types are booleans or datetime (and format for this one) instead of trying to figure out this columns
827
- as in version 1.0 what is sometimes problematic. So, version 1.01 is more reliable but requires more time
828
- to write the call to the method.
829
812
  -------------------------
830
813
  MERGE INTO [SCHEMA].[TABLE] AS TARGET
831
814
  USING (
@@ -864,10 +847,10 @@ class SQLConnection:
864
847
  :param update_set: list of columns to update
865
848
  :param bool_cols: list of columns gathering boolean types
866
849
  :param identity: Indicate whether the identity columns will be inserted or not, only make sense when the table
867
- in its definition has it. Its a boolean.
850
+ in its definition has it. Its a boolean.
868
851
  :param print_sql: boolean to indicate that you want the sql_statement to be printed on the console
869
- :return: None
870
852
  :param nullable: Used for the boolean_mapping_data_types to indicate which boolean column values to convert
853
+ :return: None
871
854
  """
872
855
  if data is None:
873
856
  # no data to upload
@@ -940,6 +923,21 @@ class SQLConnection:
940
923
  except Exception:
941
924
  raise Exception(traceback.format_exc())
942
925
 
926
+ @staticmethod
927
+ def _parse_df(parse_, data, col_names):
928
+ """ Auxiliar function to convert list to DataFrame
929
+ :param parse_: Parameter to indicate whether the data has to be transformed into a DataFrame or not
930
+ :param data: List gathering the data retrieved from SQL
931
+ :param col_names: List of columns to create the DataFrame
932
+ :return: Formatted data
933
+ """
934
+ if parse_ is True:
935
+ col_names = list(zip(*list(col_names)))[0]
936
+ res = pd.DataFrame(list(zip(*data)), index=col_names).T
937
+ else:
938
+ res = [col_names, data]
939
+ return res
940
+
943
941
  @staticmethod
944
942
  def date_mapping_data_types(data):
945
943
  """
@@ -967,8 +965,1184 @@ class SQLConnection:
967
965
  """
968
966
  Map datetime and boolean variables so they can be inserted in SQL
969
967
  :param data: DataFrame containing the variables to map
970
- :return: The mapped DataFrame
971
968
  :param nullable: Determine if you want to convert null values within boolean columns to boolean format or not
969
+ :return: The mapped DataFrame
970
+ """
971
+ first_index = data.index[0]
972
+ bool_col = data.columns[
973
+ [('bool' in str(type(data.loc[first_index, col]))) | ('object' in str(type(data.loc[first_index, col]))) for
974
+ col in data.columns]]
975
+ if len(bool_col) > 0:
976
+ for col in bool_col:
977
+ if nullable:
978
+ bool_not_null = data[data[col].notna()]
979
+ if bool_not_null.shape[0] > 0:
980
+ for iindex in bool_not_null.index:
981
+ data.at[iindex, col] = int(data.loc[iindex, col])
982
+ else:
983
+ data[col] = data[col].apply(lambda x: 1 if x is True else 0)
984
+
985
+ return data
986
+
987
+ @staticmethod
988
+ def id_next(con_db, table, schema, id_col, print_sql=False):
989
+ """
990
+ This static method returns the next id to be inserted into a table for sql_server
991
+ :param con_db: class to connect to a sql server database
992
+ :param table: name of the table
993
+ :param schema: name of the schema
994
+ :param id_col: name of the id column
995
+ :param print_sql: bool to indicate if you want sql statement to be print on Python Console
996
+ :return: Max ID + 1 for id_col
997
+ """
998
+ sql_statement = ("SELECT CASE WHEN MAX(%s) IS NULL THEN 1 ELSE MAX(%s) + 1 END AS [Id] FROM [%s].[%s]" % (
999
+ id_col, id_col, schema, table))
1000
+ if print_sql:
1001
+ print(sql_statement)
1002
+ df = con_db.query(sql_statement)
1003
+ id_ = df.loc[0, 'Id']
1004
+ return id_
1005
+
1006
+ @staticmethod
1007
+ def convert_decimal_str(string):
1008
+ """ Method to parse the Decimal type in python
1009
+ :param string: String variable to parse
1010
+ :return: Parsed string
1011
+ """
1012
+ string = re.sub("'\)(?!(,[ ]+\())(?=([^$]))", "", string)
1013
+ return re.sub("Decimal\('", "", string)
1014
+
1015
+ @staticmethod
1016
+ def infer_datetime(data, infer_datetime_format):
1017
+ """ Method to infer datetime columns and format them as string
1018
+ :param data: DataFrame to parse
1019
+ :param infer_datetime_format: format to be used for the datetime columns
1020
+ :return: Parsed DataFrame
1021
+ """
1022
+ for col in data.select_dtypes(include=['datetime64']).columns:
1023
+ data[col] = pd.to_datetime(data[col]).dt.strftime(infer_datetime_format)
1024
+
1025
+ return data
1026
+
1027
+
1028
+ class SQLPoolEngine:
1029
+ """ Connect a Pool Engine to a Microsoft SQL """
1030
+
1031
+ def __init__(self, db_reference, server, master=False, trusted_certificate=True, encrypt=True, multi_db=False,
1032
+ commit_as_transaction=True, pool_size=10, max_overflow=10, pool_timeout=30, timeout=300):
1033
+ """ Initialize the class
1034
+ It requires the
1035
+ SQL-DBREFERENCE-PROD = 'server_name db_name user password'
1036
+ -----------------------------
1037
+ db_reference = 'FruitFlow'
1038
+ server = 'prod'
1039
+
1040
+ pool_ = SQLPoolEngine(db_reference, server)
1041
+ -----------------------------
1042
+ :param db_reference: Database reference to connect to
1043
+ :param server: Server to connect to
1044
+ :param master: Indicate whether the connection will be done to master or to a specific database
1045
+ :param trusted_certificate: Indicate whether the connection will be done using the TrustServerCertificate
1046
+ :param encrypt: Indicate whether the connection will use SSL/TLS encryption
1047
+ :param multi_db: Indicate whether the connection will be done to a specific database or to multiple databases
1048
+ :param commit_as_transaction: Indicate whether the connection will be done using the autocommit option or not
1049
+ :param pool_size: Number of connections to keep in the pool
1050
+ :param max_overflow: Extra connections beyond pool_size
1051
+ :param pool_timeout: Timeout for getting a connection
1052
+ :param timeout: Connection timeout in seconds
1053
+ """
1054
+ self.con_string_read = None
1055
+ self.con_string_write = None
1056
+ self.engine_read = None
1057
+ self.engine_write = None
1058
+ self.con = None
1059
+ self.commit_as_transaction = commit_as_transaction
1060
+ self.pool_size = pool_size
1061
+ self.max_overflow = max_overflow
1062
+ self.pool_timeout = pool_timeout
1063
+ self.timeout = timeout
1064
+
1065
+ self.db_reference = db_reference.replace("_", "") if "_" in db_reference else db_reference
1066
+ self.server = server
1067
+ if self.server is None:
1068
+ raise ValueError("Please provide a value for server type")
1069
+
1070
+ self.multi_db = multi_db
1071
+ self.master = master
1072
+ if trusted_certificate:
1073
+ self.trusted_certificate = '&TrustServerCertificate=yes'
1074
+ else:
1075
+ self.trusted_certificate = ''
1076
+
1077
+ if encrypt:
1078
+ self.encrypt = '&Encrypt=yes'
1079
+ else:
1080
+ self.encrypt = ''
1081
+
1082
+ drivers = [driver for driver in pyodbc.drivers() if (bool(re.search(r'\d', driver)))]
1083
+ try:
1084
+ self.server_name, self.db_name, self.user_name, self.password = self.credentials()
1085
+ except Exception as e:
1086
+ raise ValueError(
1087
+ f"Cannot find a reference to {self.db_reference} and {self.server.upper()} server: {str(e)}")
1088
+
1089
+ driver_attempt = ''
1090
+ for driver in drivers:
1091
+ try:
1092
+ self.driver = driver
1093
+ self.open_read_connection(commit_as_transaction=self.commit_as_transaction)
1094
+ self.query('''SELECT TOP 1 * FROM information_schema.tables;''')
1095
+ break
1096
+ except Exception as e:
1097
+ print(e)
1098
+ driver_attempt = str(e)
1099
+
1100
+ if driver_attempt != '':
1101
+ raise ValueError(
1102
+ f"Cannot connect to db: {self.db_name} - Error: {str(driver_attempt)}")
1103
+
1104
+ self.create_write_engine(commit_as_transaction=self.commit_as_transaction)
1105
+
1106
+ # Dispose the engine after a certain timeout
1107
+ Thread(target=self.close_connection, args=(True, self.timeout)).start()
1108
+
1109
+ def credentials(self):
1110
+ """ Return the credentials used to connect to the SQL Server
1111
+ :return: Dictionary with the credentials used to connect to the SQL Server
1112
+ """
1113
+ try:
1114
+ server_creds = os.environ.get(f"SQL-{self.db_reference.upper()}")
1115
+ server_creds = ast.literal_eval(server_creds)
1116
+ except Exception as e:
1117
+ raise ValueError(f'DB reference: {self.db_reference} not found. ERROR: {e}')
1118
+
1119
+ try:
1120
+ server_creds = server_creds[self.server.lower()]
1121
+ except Exception as e:
1122
+ raise ValueError(f'Server: {self.server} not found for DB reference: {self.db_reference}. ERROR: {e}')
1123
+
1124
+ if 'server_name' not in server_creds.keys():
1125
+ raise ValueError(f"Server name not provided for {self.db_reference} on {self.server.upper()} server")
1126
+ else:
1127
+ server_name = server_creds['server_name']
1128
+
1129
+ if 'db_name' not in server_creds.keys():
1130
+ raise ValueError(f"Database name not provided for {self.db_reference} on {self.server.upper()} server")
1131
+ else:
1132
+ db_name = server_creds['db_name']
1133
+
1134
+ if 'user_name' not in server_creds.keys():
1135
+ raise ValueError(f"User name not provided for {self.db_reference} on {self.server.upper()} server")
1136
+ else:
1137
+ user_name = server_creds['user_name']
1138
+
1139
+ if 'pwd' not in server_creds.keys():
1140
+ raise ValueError(f"Password not provided for {self.db_reference} on {self.server.upper()} server")
1141
+ else:
1142
+ password = server_creds['pwd']
1143
+
1144
+ return re.sub(r'(\\)\1*', r'\1', server_name), db_name, user_name, password
1145
+
1146
+ def create_read_engine(self, commit_as_transaction=True):
1147
+ """ Create a reading engine
1148
+ :param commit_as_transaction: Indicate whether the connection will be done using the autocommit option or not
1149
+ :return: The opened connection
1150
+ """
1151
+ if self.master:
1152
+ self.con_string_read = ('mssql+pyodbc://' + self.user_name + ':%s@' + self.server_name + '/master' +
1153
+ '?driver=' + self.driver + '&trusted_connection=yes' + self.trusted_certificate +
1154
+ self.encrypt)
1155
+ else:
1156
+ self.con_string_read = ('mssql+pyodbc://' + self.user_name + ':%s@' + self.server_name + '/' +
1157
+ self.db_name + '?driver=' + self.driver + self.trusted_certificate + self.encrypt)
1158
+
1159
+ self.engine_read = sa.create_engine(self.con_string_read % parse.quote_plus(self.password),
1160
+ poolclass=QueuePool,
1161
+ pool_size=self.pool_size, # Number of connections to keep in the pool
1162
+ max_overflow=self.max_overflow, # Extra connections beyond pool_size
1163
+ pool_timeout=self.pool_timeout, # Timeout for getting a connection
1164
+ pool_recycle=self.timeout # Recycle connections after X minutes
1165
+ )
1166
+
1167
+ if not commit_as_transaction:
1168
+ self.engine_read = self.engine_read.execution_options(isolation_level="AUTOCOMMIT")
1169
+
1170
+ def create_write_engine(self, commit_as_transaction=True):
1171
+ """ Create a writing engine
1172
+ :param commit_as_transaction: Indicate whether the connection will be done using the autocommit option or not
1173
+ :return: The opened connection
1174
+ """
1175
+ database = self.db_name
1176
+ if self.multi_db & (self.server.lower() == 'prod'):
1177
+ database = str(self.db_name) + 'Primary'
1178
+
1179
+ self.con_string_write = ('mssql+pyodbc://' + self.user_name + ':%s@' + self.server_name + '/' + database +
1180
+ '?driver=' + self.driver + self.trusted_certificate + self.encrypt)
1181
+ self.engine_write = sa.create_engine(self.con_string_write % parse.quote_plus(self.password),
1182
+ poolclass=QueuePool,
1183
+ pool_size=10, # Number of connections to keep in the pool
1184
+ max_overflow=10, # Extra connections beyond pool_size
1185
+ pool_timeout=30, # Timeout for getting a connection
1186
+ pool_recycle=self.timeout # Recycle connections after X minutes
1187
+ )
1188
+
1189
+ if not commit_as_transaction:
1190
+ self.engine_write = self.engine_write.execution_options(isolation_level="AUTOCOMMIT")
1191
+
1192
+ def open_read_connection(self, commit_as_transaction=True):
1193
+ """ Open a reading connection with the Server
1194
+ :param commit_as_transaction: Indicate whether the connection will be done using the autocommit option or not
1195
+ :return: The opened connection
1196
+ """
1197
+ self.create_read_engine(commit_as_transaction=commit_as_transaction)
1198
+ self.con = self.engine_read.connect().connection
1199
+
1200
+ def close_connection(self, timeout=0):
1201
+ """ Dispose any opened engines with the Server
1202
+ :return: None
1203
+ """
1204
+ if timeout > 0:
1205
+ time.sleep(timeout)
1206
+
1207
+ if self.engine_read:
1208
+ self.engine_read.dispose()
1209
+
1210
+ if self.engine_write:
1211
+ self.engine_write.dispose()
1212
+
1213
+ def query(self, sql_query, coerce_float=False):
1214
+ """ Read data from SQL according to the sql_query
1215
+ -----------------------------
1216
+ query_str = "SELECT * FROM %s" & table
1217
+ con_.query(query_str)
1218
+ -----------------------------
1219
+ :param sql_query: Query to be sent to SQL
1220
+ :param coerce_float: Attempt to convert values of non-string, non-numeric objects (like decimal.Decimal)
1221
+ to floating point.
1222
+ :return: DataFrame gathering the requested data
1223
+ """
1224
+ if self.con is None:
1225
+ self.con = self.engine_read.connect().connection
1226
+
1227
+ data = None
1228
+ try:
1229
+ with self.engine_read.begin() as conn:
1230
+ data = pd.read_sql_query(sa.text(sql_query), conn, coerce_float=coerce_float)
1231
+ except ValueError:
1232
+ print(traceback.format_exc())
1233
+ finally:
1234
+ self.con.close()
1235
+ return data
1236
+
1237
+
1238
+ class SQLConnectionPool:
1239
+ """ Connect to a Microsoft SQL Server using connection pooling """
1240
+
1241
+ def __init__(self, pool_class):
1242
+ """ Initialize the class
1243
+ It requires an instance of the SQLPoolEngine to work properly
1244
+ -----------------------------
1245
+ con_ = SQLConnectionPool(SQLPoolEngine)
1246
+ -----------------------------
1247
+ :param pool_class: SQLAlchemy Pool class to use for the connections
1248
+ """
1249
+ self.con_read = None
1250
+ self.con_write = None
1251
+ self.engine_read = pool_class.engine_read
1252
+ self.engine_write = pool_class.engine_write
1253
+ self.con_string_read = pool_class.con_string_read
1254
+ self.con_string_write = pool_class.con_string_write
1255
+ self.commit_as_transaction = pool_class.commit_as_transaction
1256
+ self.db_name = pool_class.db_name
1257
+ self.server = pool_class.server
1258
+ self.timeout = pool_class.timeout
1259
+
1260
+ Thread(target=self.close_connection, args=(self.timeout,)).start()
1261
+
1262
+ def close_connection(self, timeout=0):
1263
+ """ Close any opened connections with the Server
1264
+ :return: None
1265
+ """
1266
+ if timeout > 0:
1267
+ time.sleep(timeout)
1268
+
1269
+ if self.con_read is not None:
1270
+ self.con_read.close()
1271
+ if self.engine_read:
1272
+ self.engine_read.dispose()
1273
+
1274
+ if self.con_write is not None:
1275
+ self.con_write.close()
1276
+ if self.engine_write:
1277
+ self.engine_write.dispose()
1278
+
1279
+ def query(self, sql_query, coerce_float=False):
1280
+ """ Read data from SQL according to the sql_query
1281
+ -----------------------------
1282
+ query_str = "SELECT * FROM %s" & table
1283
+ con_.query(query_str)
1284
+ -----------------------------
1285
+ :param sql_query: Query to be sent to SQL
1286
+ :param coerce_float: Attempt to convert values of non-string, non-numeric objects (like decimal.Decimal)
1287
+ to floating point.
1288
+ :return: DataFrame gathering the requested data
1289
+ """
1290
+ if self.con_read is None:
1291
+ self.con_read = self.engine_read.connect().connection
1292
+
1293
+ data = None
1294
+ try:
1295
+ with self.engine_read.begin() as conn:
1296
+ data = pd.read_sql_query(sa.text(sql_query), conn, coerce_float=coerce_float)
1297
+ except ValueError:
1298
+ print(traceback.format_exc())
1299
+ return data
1300
+
1301
+ def sp_results(self, sql_query, resp_number=None, parse_=True, no_count=True, commit_as_transaction=True):
1302
+ """ Execute a stored procedure and retrieves all its output data
1303
+ -----------------------------
1304
+ query_str = "EXECUTE %s" & stored_procedure
1305
+ con_.sp_results(query_str, resp_number=1)
1306
+ -----------------------------
1307
+ :param sql_query: Query to be sent to SQL
1308
+ :param resp_number: Indicate which of the stored procedures responses will be retrieved
1309
+ :param parse_: Indicate whether the output needs to be converted to a DataFrame or not
1310
+ :param no_count: Indicate whether SET NOCOUNT option is ON (True) or OFF (False)
1311
+ :param commit_as_transaction: Indicate whether the connection will be done using the autocommit option or not
1312
+ :return: DataFrame list gathering the requested data
1313
+ """
1314
+ if self.commit_as_transaction != commit_as_transaction:
1315
+ self.commit_as_transaction = commit_as_transaction
1316
+ if not commit_as_transaction:
1317
+ self.engine_write = self.engine_write.execution_options(isolation_level="AUTOCOMMIT")
1318
+ else:
1319
+ self.engine_write = self.engine_write.execution_options(isolation_level="READ COMMITTED")
1320
+
1321
+ if self.con_write is None:
1322
+ self.con_write = self.engine_write.connect().connection
1323
+
1324
+ data_list = list()
1325
+ cursor = None
1326
+ try:
1327
+ cursor = self.con_write.cursor()
1328
+ if no_count:
1329
+ cursor.execute("SET NOCOUNT ON;" + sql_query)
1330
+ else:
1331
+ cursor.execute(sql_query)
1332
+ if resp_number is not None:
1333
+ for cursor_number in range(resp_number - 1):
1334
+ cursor.nextset()
1335
+ try:
1336
+ data_list.append(self._parse_df(parse_, cursor.fetchall(), cursor.description))
1337
+ except ValueError:
1338
+ raise ValueError('Please indicate a valid resp_number')
1339
+ else:
1340
+ aux_cursor = True
1341
+ count = 0
1342
+ while aux_cursor is not False and count < 100:
1343
+ try:
1344
+ data_list.append(self._parse_df(parse_, cursor.fetchall(), cursor.description))
1345
+ aux_cursor = cursor.nextset()
1346
+ except Exception as e:
1347
+ print(e)
1348
+ cursor.nextset()
1349
+ finally:
1350
+ count += 1
1351
+ if count >= 100:
1352
+ raise RuntimeError("Method sp_results has loop over 100 times for database '%s' on server '%s'"
1353
+ % (self.db_name, self.server))
1354
+ self.con_write.commit()
1355
+ except ValueError:
1356
+ print(traceback.format_exc())
1357
+ finally:
1358
+ if cursor:
1359
+ cursor.close()
1360
+
1361
+ return data_list
1362
+
1363
+ def run_statement(self, sql_statement, commit_as_transaction=True):
1364
+ """ Execute SQL statement
1365
+ -----------------------------
1366
+ query_str = "DELETE FROM %s WHERE Id > 100" & table
1367
+ con_.run_statement(query_str)
1368
+ -----------------------------
1369
+ :param sql_statement: Statement as string to be run in SQL
1370
+ :param commit_as_transaction: Indicate whether the connection will be done using the autocommit option or not
1371
+ :return: Statement result
1372
+ """
1373
+ if self.commit_as_transaction != commit_as_transaction:
1374
+ self.commit_as_transaction = commit_as_transaction
1375
+ if not commit_as_transaction:
1376
+ self.engine_write = self.engine_write.execution_options(isolation_level="AUTOCOMMIT")
1377
+ else:
1378
+ self.engine_write = self.engine_write.execution_options(isolation_level="READ COMMITTED")
1379
+
1380
+ if self.con_write is None:
1381
+ self.con_write = self.engine_write.connect().connection
1382
+
1383
+ cursor = self.con_write.cursor()
1384
+ # Execute SQL statement
1385
+ try:
1386
+ cursor.execute(sql_statement)
1387
+ self.con_write.commit()
1388
+ except Exception:
1389
+ raise Exception(traceback.format_exc())
1390
+ finally:
1391
+ if cursor:
1392
+ cursor.close()
1393
+
1394
+ def insert(self, data, schema, table, truncate=False, delete=False, identity=False, chunk=1000, print_sql=False,
1395
+ commit_all_together=False, output=None, bools2bits=True, nullable=False, infer_datetime_format=None,
1396
+ commit_as_transaction=True):
1397
+ """ Insert data in a table in SQL truncating the table if needed
1398
+ -----------------------------
1399
+ df = pd.DataFrame({'col1': ['a', 'b'], 'col2': [1, 2]})
1400
+ con_.insert(df, table_schema, table_name)
1401
+ -----------------------------
1402
+ :param data: DataFrame containing the data to upload
1403
+ :param schema: Schema of the table in which the data will be uploaded
1404
+ :param table: Table in which the data will be uploaded
1405
+ :param truncate: Indicate whether the table has to be truncated before the data is sent or not
1406
+ :param delete: Delete the rows from a table (Suitable for tables that cannot be truncated because of
1407
+ external constraints)
1408
+ :param identity: Indicate whether the identity columns will be inserted or not
1409
+ :param chunk: Indicate how many rows will be uploaded at once
1410
+ :param print_sql: boolean to indicate that you want the sql_statement to be printed on the console
1411
+ :param commit_all_together: when it is true, it only commits data if all data has been inserted. When it is
1412
+ false, it commits data by chunks.
1413
+ :param output: Outputs the columns indicated in this list
1414
+ :param bools2bits: Indicate whether the Boolean columns should be converted to BIT to be inserted into SQL
1415
+ :param nullable: Used within bools2bits function to indicate which boolean column values to convert
1416
+ :param infer_datetime_format: Indicate whether the datetime columns should be converted to string and if so,
1417
+ then the format to be used
1418
+ :param commit_as_transaction: Indicate whether the connection will be done using the autocommit option or not
1419
+ :return: A DataFrame with the output columns requested if output is not None, else None
1420
+ """
1421
+ if output is None:
1422
+ output = []
1423
+ if data is None:
1424
+ # no data to upload
1425
+ return ValueError("The data provided is invalid!")
1426
+ cursor = None
1427
+ results = pd.DataFrame(columns=output)
1428
+
1429
+ if self.commit_as_transaction != commit_as_transaction:
1430
+ self.commit_as_transaction = commit_as_transaction
1431
+ if not commit_as_transaction:
1432
+ self.engine_write = self.engine_write.execution_options(isolation_level="AUTOCOMMIT")
1433
+ else:
1434
+ self.engine_write = self.engine_write.execution_options(isolation_level="READ COMMITTED")
1435
+
1436
+ if self.con_write is None:
1437
+ self.con_write = self.engine_write.connect().connection
1438
+
1439
+ # Mapping the date datatype columns for SQL
1440
+ data = self.date_mapping_data_types(data)
1441
+
1442
+ # Infer datetime format if provided
1443
+ if infer_datetime_format is not None:
1444
+ data = self.infer_datetime(data, infer_datetime_format)
1445
+
1446
+ # Mapping the boolean columns to bit
1447
+ if bools2bits:
1448
+ data = self.boolean_mapping_data_types(data, nullable)
1449
+
1450
+ try:
1451
+ cursor = self.con_write.cursor()
1452
+ # Truncate table if needed
1453
+ if truncate:
1454
+ cursor.execute("TRUNCATE TABLE [%s].[%s]" % (schema, table))
1455
+ # Delete all records from the table if needed
1456
+ if delete:
1457
+ cursor.execute("DELETE FROM [%s].[%s]" % (schema, table))
1458
+ # Allow to insert to an Identity column
1459
+ if identity:
1460
+ cursor.execute("SET IDENTITY_INSERT [%s].[%s] ON" % (schema, table))
1461
+ # Convert category columns to string
1462
+ cat_cols = data.columns[(data.dtypes == 'category').values].to_list()
1463
+ data[cat_cols] = data[cat_cols].astype(str)
1464
+ # Deal with bull values and apostrophes (')
1465
+ data = data.replace("'NULL'", "NULL")
1466
+ data = data.replace("'", "~~", regex=True).infer_objects(copy=False)
1467
+ data = data.fillna("null")
1468
+ # Insert data into the table destination
1469
+ records = [tuple(x) for x in data.values]
1470
+ insert_ = """INSERT INTO [%s].[%s] """ % (schema, table)
1471
+ insert_ += str(tuple(data.columns.values)).replace(
1472
+ "(\'", "([").replace('\', \'', '], [').replace('\')', '])')
1473
+ if len(output) > 0:
1474
+ insert_ += " OUTPUT Inserted.[" + "], Inserted.[".join(output) + "] "
1475
+ insert_ += """ VALUES """
1476
+
1477
+ for batch in self._chunker(records, chunk):
1478
+ rows = str(batch).strip('[]').replace("~~", "''")
1479
+ rows = rows.replace("'NULL'", "NULL").replace("'null'", 'null')
1480
+ string = insert_ + rows
1481
+ string = self.convert_decimal_str(string)
1482
+ if print_sql:
1483
+ print(string)
1484
+ cursor.execute(string)
1485
+ if len(output) > 0:
1486
+ results = pd.concat([results, pd.DataFrame.from_records(cursor.fetchall(), columns=output)])
1487
+ if ~commit_all_together:
1488
+ self.con_write.commit()
1489
+ if commit_all_together:
1490
+ self.con_write.commit()
1491
+
1492
+ # Restrict to insert to an Identity column
1493
+ if identity:
1494
+ cursor.execute("SET IDENTITY_INSERT [%s].[%s] OFF" % (schema, table))
1495
+
1496
+ if len(output) > 0:
1497
+ return results.reset_index(drop=True)
1498
+
1499
+ except Exception:
1500
+ raise Exception(traceback.format_exc())
1501
+
1502
+ finally:
1503
+ if cursor:
1504
+ cursor.close()
1505
+
1506
+ def insert_at_once(self, data, schema, table, truncate=False, delete=False, identity=False, chunk=1,
1507
+ print_sql=False, output=None, bools2bits=True, nullable=False, commit_as_transaction=True):
1508
+ """ Build all the insert statements and commit them all at once
1509
+ -----------------------------
1510
+ df = pd.DataFrame({'col1': ['a', 'b'], 'col2': [1, 2]})
1511
+ con_.insert(df, table_schema, table_name)
1512
+ -----------------------------
1513
+ :param data: DataFrame containing the data to upload
1514
+ :param schema: Schema of the table in which the data will be uploaded
1515
+ :param table: Table in which the data will be uploaded
1516
+ :param truncate: Indicate whether the table has to be truncated before the data is sent or not
1517
+ :param delete: Delete the rows from a table (Suitable for tables that cannot be truncated because of
1518
+ external constraints)
1519
+ :param identity: Indicate whether the identity columns will be inserted or not
1520
+ :param chunk: Indicate how many rows will be uploaded at once
1521
+ :param print_sql: boolean to indicate that you want the sql_statement to be printed on the console
1522
+ :param output: Outputs the columns indicated in this list
1523
+ :param bools2bits: Indicate whether the Boolean columns should be converted to BIT to be inserted into SQL
1524
+ :param nullable: Used within bools2bits function to indicate which boolean column values to convert
1525
+ :param commit_as_transaction: Indicate whether the connection will be done using the autocommit option or not
1526
+ :return: A DataFrame with the output columns requested if output is not None, else None
1527
+ """
1528
+ if output is None:
1529
+ output = []
1530
+ if data is None:
1531
+ # no data to upload
1532
+ return ValueError("The data provided is invalid!")
1533
+ cursor = None
1534
+ results = pd.DataFrame(columns=output)
1535
+
1536
+ if self.commit_as_transaction != commit_as_transaction:
1537
+ self.commit_as_transaction = commit_as_transaction
1538
+ if not commit_as_transaction:
1539
+ self.engine_write = self.engine_write.execution_options(isolation_level="AUTOCOMMIT")
1540
+ else:
1541
+ self.engine_write = self.engine_write.execution_options(isolation_level="READ COMMITTED")
1542
+
1543
+ if self.con_write is None:
1544
+ self.con_write = self.engine_write.connect().connection
1545
+
1546
+ # Mapping the date datatype columns for SQL
1547
+ data = self.date_mapping_data_types(data)
1548
+
1549
+ # Mapping the boolean columns to bit
1550
+ if bools2bits:
1551
+ data = self.boolean_mapping_data_types(data, nullable)
1552
+
1553
+ try:
1554
+ cursor = self.con_write.cursor()
1555
+ # Truncate table if needed
1556
+ if truncate:
1557
+ cursor.execute("TRUNCATE TABLE [%s].[%s]" % (schema, table))
1558
+ # Delete all records from the table if needed
1559
+ if delete:
1560
+ cursor.execute("DELETE FROM [%s].[%s]" % (schema, table))
1561
+ # Allow to insert to an Identity column
1562
+ if identity:
1563
+ cursor.execute("SET IDENTITY_INSERT [%s].[%s] ON" % (schema, table))
1564
+ # Convert category columns to string
1565
+ cat_cols = data.columns[(data.dtypes == 'category').values].to_list()
1566
+ data[cat_cols] = data[cat_cols].astype(str)
1567
+ # Deal with bull values and apostrophes (')
1568
+ data = data.replace("'NULL'", "NULL")
1569
+ data = data.replace("'", "~~", regex=True).infer_objects(copy=False)
1570
+ data = data.fillna("null")
1571
+ # Insert data into the table destination
1572
+ records = [tuple(x) for x in data.values]
1573
+ insert_ = """INSERT INTO [%s].[%s] """ % (schema, table)
1574
+ insert_ += str(tuple(data.columns.values)).replace(
1575
+ "(\'", "([").replace('\', \'', '], [').replace('\')', '])')
1576
+ if len(output) > 0:
1577
+ insert_ += " OUTPUT Inserted.[" + "], Inserted.[".join(output) + "] "
1578
+ insert_ += """ VALUES """
1579
+
1580
+ insert_statements = list()
1581
+ for batch in self._chunker(records, chunk):
1582
+ rows = str(batch).strip('[]').replace("~~", "''")
1583
+ rows = rows.replace("'NULL'", "NULL").replace("'null'", 'null')
1584
+ string = insert_ + rows
1585
+ string = self.convert_decimal_str(string)
1586
+ insert_statements.append(string)
1587
+
1588
+ if print_sql:
1589
+ print(';'.join(insert_statements))
1590
+ cursor.execute(';'.join(insert_statements))
1591
+ if len(output) > 0:
1592
+ results = pd.concat([results, pd.DataFrame.from_records(cursor.fetchall(), columns=output)])
1593
+ self.con_write.commit()
1594
+
1595
+ # Restrict to insert to an Identity column
1596
+ if identity:
1597
+ cursor.execute("SET IDENTITY_INSERT [%s].[%s] OFF" % (schema, table))
1598
+
1599
+ if len(output) > 0:
1600
+ return results.reset_index(drop=True)
1601
+
1602
+ except Exception:
1603
+ raise Exception(traceback.format_exc())
1604
+
1605
+ finally:
1606
+ if cursor:
1607
+ cursor.close()
1608
+
1609
+ def update(self, data, update_list, on_list, schema, table, bool_cols=None, print_sql=False, batch_size=100,
1610
+ output=None, nullable=True, commit_as_transaction=True):
1611
+ """ This method updates a table in batches in sql server.
1612
+ -----------------------------
1613
+ UPDATE [SCHEMA].[TABLE]
1614
+ SET update_list[0] = data[index, update_list{0}],
1615
+ update_list[1] = data[index, update_list[1]]
1616
+ OUTPUT output[0], output[1]
1617
+ WHERE on_list[0] = data[index, on_list[0]]
1618
+ AND on_list[1] = data[index, on_list[1]]
1619
+ -----------------------------
1620
+ :param data: DataFrame containing the data to update
1621
+ :param update_list: list of columns to update
1622
+ :param on_list: list of columns to apply the on clause
1623
+ :param schema: Schema of the table in which the data will be uploaded
1624
+ :param table: Table in which the data will be uploaded
1625
+ :param bool_cols: list of columns gathering boolean types
1626
+ :param print_sql: boolean to indicate that you want the sql_statement to be printed on the console
1627
+ :param bool_cols: columns to include as booleans
1628
+ :param batch_size: Number of records to update in each iteration
1629
+ :param output: Outputs the columns indicated in this list as a DataFrame. It should indicate if the column to
1630
+ retrieve is the inserted one or the deleted one (If nothing is indicated, then the Deleted one
1631
+ will be retrieved)
1632
+ :param nullable: Indicate whether to update the table column with null or exclude the reference from the update
1633
+ :param commit_as_transaction: Indicate whether the connection will be done using the autocommit option or not
1634
+ :return: None
1635
+ """
1636
+ cursor = None
1637
+ if data is None:
1638
+ # no data to update
1639
+ return ValueError("The data provided is invalid!")
1640
+
1641
+ if output is None:
1642
+ output = []
1643
+ else:
1644
+ output = [out if 'inserted' in out.lower() or 'deleted' in out.lower() else 'Deleted.[' + out + ']' for out
1645
+ in output]
1646
+ results = pd.DataFrame(columns=output)
1647
+
1648
+ # re-starting indexes
1649
+ data.reset_index(drop=True, inplace=True)
1650
+
1651
+ # Mapping boolean columns
1652
+ if bool_cols is not None:
1653
+ for col in bool_cols:
1654
+ data[col] = data[col].astype(bool)
1655
+
1656
+ # Mapping date type for SQL
1657
+ data = self.date_mapping_data_types(data)
1658
+
1659
+ # create connection
1660
+ if self.commit_as_transaction != commit_as_transaction:
1661
+ self.commit_as_transaction = commit_as_transaction
1662
+ if not commit_as_transaction:
1663
+ self.engine_write = self.engine_write.execution_options(isolation_level="AUTOCOMMIT")
1664
+ else:
1665
+ self.engine_write = self.engine_write.execution_options(isolation_level="READ COMMITTED")
1666
+
1667
+ if self.con_write is None:
1668
+ self.con_write = self.engine_write.connect().connection
1669
+
1670
+ try:
1671
+ # initialise cursor
1672
+ cursor = self.con_write.cursor()
1673
+
1674
+ # extraction of the useful columns
1675
+ data_update = data[list(set(update_list + on_list))]
1676
+
1677
+ # initialisation of the sql statement
1678
+ sql_start = ''' UPDATE [%s].[%s] SET ''' % (schema, table)
1679
+ iter_batch = math.ceil(data_update.shape[0] / batch_size)
1680
+ for batch in range(iter_batch):
1681
+ batch_update = data_update.iloc[batch * batch_size: (batch + 1) * batch_size]
1682
+
1683
+ sql_statement = ''
1684
+ for iindex in batch_update.index:
1685
+ # UPDATE [SCHEMA].[TABLE]
1686
+ sql_statement += sql_start
1687
+
1688
+ # VALUES
1689
+ for col in update_list:
1690
+ if nullable:
1691
+ if pd.isna(batch_update.loc[iindex, col]):
1692
+ sql_statement += " [%s] = NULL ," % col
1693
+ elif isinstance(batch_update.loc[iindex, col], bool):
1694
+ sql_statement += " [%s] = %s ," % (col, int(batch_update.loc[iindex, col]))
1695
+ elif isinstance(batch_update.loc[iindex, col], Number):
1696
+ sql_statement += " [%s] = %s ," % (col, batch_update.loc[iindex, col])
1697
+ else:
1698
+ sql_statement += " [%s] = '%s' ," % (col, batch_update.loc[iindex, col])
1699
+ else:
1700
+ if pd.notna(batch_update.loc[iindex, col]):
1701
+ if str(batch_update.loc[iindex, col]).upper() == 'NULL':
1702
+ continue
1703
+ elif isinstance(batch_update.loc[iindex, col], bool):
1704
+ sql_statement += " [%s] = %s ," % (col, int(batch_update.loc[iindex, col]))
1705
+ elif isinstance(batch_update.loc[iindex, col], Number):
1706
+ sql_statement += " [%s] = %s ," % (col, batch_update.loc[iindex, col])
1707
+ else:
1708
+ sql_statement += " [%s] = '%s' ," % (col, batch_update.loc[iindex, col])
1709
+
1710
+ # OUTPUT
1711
+ if len(output) > 0:
1712
+ sql_statement = sql_statement[:-1] + " OUTPUT " + ",".join(output) + ' '
1713
+
1714
+ # WHERE
1715
+ sql_statement = sql_statement[:-1] + ' WHERE '
1716
+ for col in on_list:
1717
+ if pd.isna(batch_update.loc[iindex, col]):
1718
+ sql_statement += " [%s] = NULL AND" % col
1719
+ elif isinstance(batch_update.loc[iindex, col], bool):
1720
+ sql_statement += " [%s] = %s ," % (col, int(batch_update.loc[iindex, col]))
1721
+ elif isinstance(batch_update.loc[iindex, col], Number):
1722
+ sql_statement += " [%s] = %s AND" % (col, batch_update.loc[iindex, col])
1723
+ else:
1724
+ sql_statement += " [%s] = '%s' AND" % (col, batch_update.loc[iindex, col])
1725
+
1726
+ # Addition of semicolon
1727
+ sql_statement = sql_statement[:-3] + ';'
1728
+
1729
+ if print_sql:
1730
+ print(sql_statement)
1731
+
1732
+ # executing statement
1733
+ if len(sql_statement) > 0:
1734
+ if len(output) > 0:
1735
+ cursor.execute(sql_statement)
1736
+ for cursor_number in range(len(sql_statement.split(';')) - 1):
1737
+ results = pd.concat([results, pd.DataFrame.from_records(cursor.fetchall(), columns=output)])
1738
+ cursor.nextset()
1739
+ else:
1740
+ cursor.execute(sql_statement)
1741
+ self.con_write.commit()
1742
+
1743
+ if len(output) > 0:
1744
+ return results.reset_index(drop=True)
1745
+
1746
+ except Exception:
1747
+ raise Exception(traceback.format_exc())
1748
+
1749
+ finally:
1750
+ if cursor:
1751
+ cursor.close()
1752
+
1753
+ def bulk_update(self, data, update_list, on_list, schema, table, bool_cols=None, print_sql=False, output=None,
1754
+ chunk=1000, commit_as_transaction=True):
1755
+ """ This method updates a table in batches in sql server.
1756
+ -----------------------------
1757
+ UPDATE [SCHEMA].[TABLE]
1758
+ SET update_list[0] = data[index, update_list{0}],
1759
+ update_list[1] = data[index, update_list[1]]
1760
+ OUTPUT output[0], output[1]
1761
+ WHERE on_list[0] = data[index, on_list[0]]
1762
+ AND on_list[1] = data[index, on_list[1]]
1763
+ -----------------------------
1764
+ :param data: DataFrame containing the data to update
1765
+ :param update_list: list of columns to update
1766
+ :param on_list: list of columns to apply the on clause
1767
+ :param schema: Schema of the table in which the data will be uploaded
1768
+ :param table: Table in which the data will be uploaded
1769
+ :param bool_cols: list of columns gathering boolean types
1770
+ :param print_sql: boolean to indicate that you want the sql_statement to be printed on the console
1771
+ :param bool_cols: columns to include as booleans
1772
+ :param output: Outputs the columns indicated in this list as a DataFrame. It should indicate if the column to
1773
+ retrieve is the inserted one or the deleted one (If nothing is indicated, then the Deleted one
1774
+ will be retrieved)
1775
+ :param chunk: Indicate how many rows will be uploaded at once
1776
+ :param commit_as_transaction: Indicate whether the connection will be done using the autocommit option or not
1777
+ :return: None
1778
+ """
1779
+ cursor = None
1780
+ if data is None:
1781
+ # no data to update
1782
+ return ValueError("The data provided is invalid!")
1783
+
1784
+ if output is None:
1785
+ output = []
1786
+ sql_output = []
1787
+ else:
1788
+ sql_output = [out if 'inserted' in out.lower() or 'deleted' in out.lower() else 'Deleted.[' + out + ']' for
1789
+ out
1790
+ in output]
1791
+ results = pd.DataFrame(columns=output)
1792
+
1793
+ # re-starting indexes
1794
+ data.reset_index(drop=True, inplace=True)
1795
+
1796
+ # Mapping boolean columns
1797
+ if bool_cols is not None:
1798
+ for col in bool_cols:
1799
+ data[col] = data[col].astype(bool)
1800
+
1801
+ # Mapping date type for SQL
1802
+ data = data[on_list + update_list]
1803
+ data = self.date_mapping_data_types(data)
1804
+
1805
+ # create connection
1806
+ if self.commit_as_transaction != commit_as_transaction:
1807
+ self.commit_as_transaction = commit_as_transaction
1808
+ if not commit_as_transaction:
1809
+ self.engine_write = self.engine_write.execution_options(isolation_level="AUTOCOMMIT")
1810
+ else:
1811
+ self.engine_write = self.engine_write.execution_options(isolation_level="READ COMMITTED")
1812
+
1813
+ if self.con_write is None:
1814
+ self.con_write = self.engine_write.connect().connection
1815
+
1816
+ try:
1817
+ # initialise cursor
1818
+ cursor = self.con_write.cursor()
1819
+
1820
+ # Convert category columns to string
1821
+ cat_cols = data.columns[(data.dtypes == 'category').values].to_list()
1822
+ data[cat_cols] = data[cat_cols].astype(str)
1823
+ # Deal with bull values and apostrophes (')
1824
+ data = data.replace("'NULL'", "NULL")
1825
+ data = data.replace("'", "~~", regex=True).infer_objects(copy=False)
1826
+ data = data.fillna("null")
1827
+
1828
+ records = [tuple(x) for x in data.values]
1829
+ temp_table = f'#Temp{schema}{table}'
1830
+
1831
+ for batch in self._chunker(records, chunk):
1832
+ batch_records = [tuple(x) for x in batch]
1833
+ # initialisation of the sql statement
1834
+ insert_ = f'DROP TABLE IF EXISTS {temp_table} '
1835
+ insert_ += f"SELECT * INTO {temp_table} FROM ( VALUES "
1836
+ temp_columns = str(tuple(data.columns.values)).replace("(\'", "([").replace(
1837
+ '\', \'', '], [').replace('\')', '])')
1838
+ rows = str(batch_records).strip('[]').replace("~~", "''")
1839
+ rows = rows.replace("'NULL'", "NULL").replace("'null'", 'null')
1840
+ sql_statement = insert_ + rows
1841
+ sql_statement = self.convert_decimal_str(sql_statement)
1842
+ sql_statement += f') AS TempTable {temp_columns}'
1843
+
1844
+ col_update_set = ''
1845
+ for col in update_list:
1846
+ col_update_set += f' target.{col} = source.{col},'
1847
+ col_update_set = col_update_set[:-1]
1848
+
1849
+ col_difference_check = ''
1850
+ for col in update_list:
1851
+ col_difference_check += f' target.{col} <> source.{col} OR'
1852
+ col_difference_check = col_difference_check[:-2]
1853
+
1854
+ col_join_on = ''
1855
+ for col in on_list:
1856
+ col_join_on += f' source.{col} = target.{col} AND'
1857
+ col_join_on = col_join_on[:-3]
1858
+
1859
+ sql_statement += f'UPDATE target SET {col_update_set} '
1860
+
1861
+ if len(output) > 0:
1862
+ sql_statement += f" OUTPUT {','.join(sql_output)} "
1863
+
1864
+ sql_statement += f'''FROM {schema}.{table} target
1865
+ JOIN {temp_table} as source
1866
+ ON {col_join_on}
1867
+ WHERE {col_difference_check}
1868
+ '''
1869
+
1870
+ sql_statement += f' DROP TABLE IF EXISTS {temp_table} '
1871
+
1872
+ if print_sql:
1873
+ print(sql_statement)
1874
+
1875
+ # executing statement
1876
+ if len(sql_statement) > 0:
1877
+ if len(output) > 0:
1878
+ cursor.execute(sql_statement)
1879
+ cursor.nextset()
1880
+ results = pd.concat([results, pd.DataFrame.from_records(cursor.fetchall(), columns=output)])
1881
+ else:
1882
+ cursor.execute(sql_statement)
1883
+
1884
+ self.con_write.commit()
1885
+
1886
+ if len(output) > 0:
1887
+ return results.reset_index(drop=True)
1888
+
1889
+ except Exception:
1890
+ raise Exception(traceback.format_exc())
1891
+
1892
+ finally:
1893
+ if cursor:
1894
+ cursor.close()
1895
+
1896
+ def merge(self, data, staging_schema, staging_table, sp_schema, sp_name, truncate=False, chunk=1000,
1897
+ commit_as_transaction=True):
1898
+ """ Merge data from Staging table using a Stored Procedure. It requires a table in SQL which will store the
1899
+ Staging data. The method will work as follows:
1900
+ 1.- Truncate the staging table according to the truncate parameter
1901
+ 2.- Insert the data into the staging table
1902
+ 3.- Execute a stored procedure to merge the staging table with the destination table
1903
+ -----------------------------
1904
+ df = pd.DataFrame({'col1': ['a', 'b'], 'col2': [1, 2]})
1905
+ con_.merge(df, staging_schema, staging_table, sp_schema, sp_name, truncate=True)
1906
+ -----------------------------
1907
+ :param data: DataFrame to insert in the staging table
1908
+ :param staging_schema: Staging table schema
1909
+ :param staging_table: Staging table name
1910
+ :param sp_schema: Stored Procedure schema
1911
+ :param sp_name: Stored Procedure name
1912
+ :param truncate: Indicate whether the staging table has to be truncated or not
1913
+ :param chunk: Indicate how many rows will be uploaded at once
1914
+ :param commit_as_transaction: Indicate whether the connection will be done using the autocommit option or not
1915
+ :return: None
1916
+ """
1917
+ if data is None:
1918
+ # no data to upload
1919
+ return ValueError("The data provided is invalid!")
1920
+ cursor = None
1921
+
1922
+ if self.commit_as_transaction != commit_as_transaction:
1923
+ self.commit_as_transaction = commit_as_transaction
1924
+ if not commit_as_transaction:
1925
+ self.engine_write = self.engine_write.execution_options(isolation_level="AUTOCOMMIT")
1926
+ else:
1927
+ self.engine_write = self.engine_write.execution_options(isolation_level="READ COMMITTED")
1928
+
1929
+ if self.con_write is None:
1930
+ self.con_write = self.engine_write.connect().connection
1931
+
1932
+ try:
1933
+ cursor = self.con_write.cursor()
1934
+ # Truncate Staging table if needed
1935
+ if truncate:
1936
+ trunc_insert = """TRUNCATE TABLE [%s].[%s]""" % (staging_schema, staging_table)
1937
+ cursor.execute(trunc_insert)
1938
+ self.con_write.commit()
1939
+ # Convert category columns to string
1940
+ cat_cols = data.columns[(data.dtypes == 'category').values].to_list()
1941
+ data[cat_cols] = data[cat_cols].astype(str)
1942
+ # Deal with null values and apostrophes (')
1943
+ data = data.replace("'NULL'", "NULL")
1944
+ data = data.replace("'", "~~", regex=True).infer_objects(copy=False)
1945
+ data = data.fillna("null")
1946
+ # Insert in Staging Table
1947
+ records = [tuple(x) for x in data.values]
1948
+ insert_ = """INSERT INTO [%s].[%s] """ % (staging_schema, staging_table)
1949
+ insert_ = insert_ + str(tuple(data.columns.values)).replace("\'", "") + """ VALUES """
1950
+ for batch in self._chunker(records, chunk):
1951
+ rows = str(batch).strip('[]').replace("~~", "''")
1952
+ rows = rows.replace("'NULL'", "NULL").replace("'null'", 'null')
1953
+ string = insert_ + rows
1954
+ string = self.convert_decimal_str(string)
1955
+ cursor.execute(string)
1956
+ self.con_write.commit()
1957
+ # Execute Stored Procedure
1958
+ exec_sp = """EXECUTE [%s].[%s]""" % (sp_schema, sp_name)
1959
+ cursor.execute(exec_sp)
1960
+ self.con_write.commit()
1961
+ except Exception:
1962
+ raise Exception(traceback.format_exc())
1963
+ finally:
1964
+ if cursor:
1965
+ cursor.close()
1966
+
1967
+ def merge_into(self, data, schema, table, on_list, update_check=False, update_set=None, bool_cols=None,
1968
+ identity=False, print_sql=False, nullable=False):
1969
+ """
1970
+ This method is equivalent to the 'merge into' of T-sql. Schema and table defines the Target, while data is the
1971
+ Source. Please refer to below schema for more arguments use clarifications.
1972
+ Aspects to take into consideration:
1973
+ 1.- This method will not work properly if data contains duplicates. It is not relevant if the target contains
1974
+ duplicates because DISTINCT is used to call the table.
1975
+ 2.- When having booleans in the dataset you have to pay attention because pandas get bool from sql server as
1976
+ [True, False], instead of [0,1]. The method need data from type boolean to be inserted as [0, 1].
1977
+ 3.- When dealing with datetime columns a similar problem arises. time_format is a dict that contains as keys
1978
+ the name of a date column and as values the format that the columns has to have.
1979
+ -------------------------
1980
+ MERGE INTO [SCHEMA].[TABLE] AS TARGET
1981
+ USING (
1982
+ data
1983
+ ) AS SOURCE
1984
+ ON TARGET.on_list[0] = SOURCE.on_list[0]
1985
+ AND TARGET.on_list[1] = SOURCE.on_list[1]
1986
+ ...
1987
+ AND TARGET.on_list[n] = SOURCE.on_list[n]
1988
+ WHEN MATCHED AND (
1989
+ TARGET.update_check[0] <> SOURCE.update_check[0]
1990
+ OR TARGET.update_check[1] <> SOURCE.update_check[1]
1991
+ ...
1992
+ OR TARGET.update_check[n] <> SOURCE.update_check[n]
1993
+ )
1994
+ UPDATE SET TARGET.update_check[0] = SOURCE.update_check[0],
1995
+ ...
1996
+ TARGET.update_check[n] = SOURCE.update_check[n],
1997
+ TARGET.update_set[0] = SOURCE.update_set[0],
1998
+ TARGET.update_set[1] = SOURCE.update_set[1],
1999
+ ....
2000
+ TARGET.update_set[n] = SOURCE.update_set[n]
2001
+ WHEN NOT MATCHED BY TARGET THEN
2002
+ INSERT
2003
+ (
2004
+ all columns from [SCHEMA].[TABLE]
2005
+ )
2006
+ VALUES
2007
+ (all columns from data)
2008
+ -------------------------------
2009
+ :param data: DataFrame containing the data to upload/update
2010
+ :param schema: Schema of the table in which the data will be uploaded
2011
+ :param table: Table in which the data will be uploaded
2012
+ :param on_list: list of columns to apply the on clause
2013
+ :param update_check: list of columns to do the check
2014
+ :param update_set: list of columns to update
2015
+ :param bool_cols: list of columns gathering boolean types
2016
+ :param identity: Indicate whether the identity columns will be inserted or not, only make sense when the table
2017
+ in its definition has it. It's a boolean.
2018
+ :param print_sql: boolean to indicate that you want the sql_statement to be printed on the console
2019
+ :param nullable: Used for the boolean_mapping_data_types to indicate which boolean column values to convert
2020
+ :return: None
2021
+ """
2022
+ if data is None:
2023
+ # no data to upload
2024
+ return ValueError("The data provided is invalid!")
2025
+
2026
+ if data.shape[0] != data.drop_duplicates().shape[0]:
2027
+ return TypeError("There are duplicates values in your dataframe, it will not work properly on "
2028
+ "pd.concat().drop_duplicates()")
2029
+
2030
+ # if update_set has values assigned, update check has to have values assigned
2031
+ if update_set is not None:
2032
+ if update_check is None:
2033
+ return ValueError("Please, to use update_set assigned values to update_check")
2034
+ else:
2035
+ update_set = update_check
2036
+
2037
+ # Mapping boolean columns
2038
+ if bool_cols is not None:
2039
+ for col in bool_cols:
2040
+ data[col] = data[col].astype(bool)
2041
+
2042
+ # Mapping date and boolean type for SQL
2043
+ data = self.date_mapping_data_types(data)
2044
+ data = self.boolean_mapping_data_types(data, nullable)
2045
+
2046
+ try:
2047
+ # call the table from the server
2048
+ data_table = self.query("""SELECT DISTINCT * FROM [%s].[%s]""" % (schema, table))
2049
+
2050
+ if data_table.shape[0] == 0:
2051
+ print("The destination table is empty so all the data will be inserted")
2052
+ self.insert(data, schema, table)
2053
+
2054
+ else:
2055
+ for data_col in data.columns:
2056
+ if ("int" in str(type(data_table[data_col].iloc[0]))) & (
2057
+ data_table[data_col].isnull().sum() > 0):
2058
+ data_table[data_col] = data_table[data_col].astype(float)
2059
+ else:
2060
+ data_table[data_col] = data_table[data_col].astype(type(data[data_col].iloc[0]))
2061
+
2062
+ coincidence = pd.DataFrame()
2063
+ if data_table.shape[0] > 0:
2064
+ for col in data_table.columns.values.tolist():
2065
+ if isinstance(data_table.loc[0, col], bool):
2066
+ data_table[col] = data_table[col].apply(
2067
+ lambda x: 1 if x is True else 0 if x is False else np.nan)
2068
+ if bool_cols is not None:
2069
+ for col in bool_cols:
2070
+ data_table[col] = data_table[col].astype(bool)
2071
+ # join the input table with the one in the database
2072
+ coincidence = data.merge(data_table[on_list], how='inner', on=on_list)
2073
+ # WHEN MATCHED AND ... UPDATE SET
2074
+ if update_check:
2075
+ coincidence2 = coincidence.merge(data_table[list(set(on_list + update_check))],
2076
+ how='inner',
2077
+ on=list(set(on_list + update_check)))
2078
+ data_update = pd.concat([coincidence, coincidence2], ignore_index=True)
2079
+ data_update.drop_duplicates(keep=False, inplace=True)
2080
+ if data_update.shape[0] > 0:
2081
+ self.update(data_update, list(set(update_set + update_check)), on_list, schema, table,
2082
+ print_sql=print_sql)
2083
+
2084
+ # WHEN NOT MATCHED BY TARGET THEN... INSERT
2085
+ data_insert = pd.concat([data, coincidence], ignore_index=True)
2086
+ data_insert.drop_duplicates(keep=False, inplace=True)
2087
+ if data_insert.shape[0] > 0:
2088
+ self.insert(data_insert, schema, table, identity=identity, print_sql=print_sql)
2089
+
2090
+ except Exception:
2091
+ raise Exception(traceback.format_exc())
2092
+
2093
+ @staticmethod
2094
+ def _parse_df(parse_, data, col_names):
2095
+ """ Auxiliar function to convert list to DataFrame
2096
+ :param parse_: Parameter to indicate whether the data has to be transformed into a DataFrame or not
2097
+ :param data: List gathering the data retrieved from SQL
2098
+ :param col_names: List of columns to create the DataFrame
2099
+ :return: Formatted data
2100
+ """
2101
+ if parse_ is True:
2102
+ col_names = list(zip(*list(col_names)))[0]
2103
+ res = pd.DataFrame(list(zip(*data)), index=col_names).T
2104
+ else:
2105
+ res = [col_names, data]
2106
+ return res
2107
+
2108
+ @staticmethod
2109
+ def _chunker(seq, size):
2110
+ """ Split the data set in chunks to be sent to SQL
2111
+ :param seq: Sequence of records to be split
2112
+ :param size: Size of the chunks to split the data
2113
+ :return: The DataFrame divided in chunks
2114
+ """
2115
+ return (seq[pos:pos + size] for pos in range(0, len(seq), size))
2116
+
2117
+ @staticmethod
2118
+ def date_mapping_data_types(data):
2119
+ """
2120
+ Map datetime and boolean variables so they can be inserted in SQL
2121
+ :param data: DataFrame containing the variables to map
2122
+ :return: The mapped DataFrame
2123
+ """
2124
+ first_index = data.index[0]
2125
+ date_col = data.columns[
2126
+ [('date' in str(type(data.loc[first_index, col]))) | ('timestamp' in str(type(data.loc[first_index, col])))
2127
+ for col in data.columns]]
2128
+ if len(date_col) > 0:
2129
+ for col in date_col:
2130
+ data[col] = pd.to_datetime(data[col])
2131
+ if data[col].dtypes == 'O':
2132
+ data[col] = data[col].dt.strftime('%Y-%m-%d')
2133
+ else:
2134
+ data[col] = data[col].dt.strftime('%Y-%m-%d %H:%M:%S')
2135
+ data.loc[data[col] == 'NaT', col] = np.nan
2136
+
2137
+ return data
2138
+
2139
+ @staticmethod
2140
+ def boolean_mapping_data_types(data, nullable=False):
2141
+ """
2142
+ Map datetime and boolean variables so they can be inserted in SQL
2143
+ :param data: DataFrame containing the variables to map
2144
+ :param nullable: Determine if you want to convert null values within boolean columns to boolean format or not
2145
+ :return: The mapped DataFrame
972
2146
  """
973
2147
  first_index = data.index[0]
974
2148
  bool_col = data.columns[
@@ -990,7 +2164,7 @@ class SQLConnection:
990
2164
  def id_next(con_db, table, schema, id_col, print_sql=False):
991
2165
  """
992
2166
  This static method returns the next id to be inserted into a table for sql_server
993
- :param con_db: class to connect to a sql server dabatase
2167
+ :param con_db: class to connect to a sql server database
994
2168
  :param table: name of the table
995
2169
  :param schema: name of the schema
996
2170
  :param id_col: name of the id column