icsDataValidation 1.0.371__py3-none-any.whl → 1.0.415__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. icsDataValidation/configuration.py +0 -0
  2. icsDataValidation/connection_setups/__init__.py +0 -0
  3. icsDataValidation/connection_setups/azure_connection_setup.py +2 -1
  4. icsDataValidation/connection_setups/databricks_connection_setup.py +0 -0
  5. icsDataValidation/connection_setups/exasol_connection_setup.py +0 -0
  6. icsDataValidation/connection_setups/oracle_connection_setup.py +0 -0
  7. icsDataValidation/connection_setups/snowflake_connection_setup.py +0 -0
  8. icsDataValidation/connection_setups/sqlserver_connection_setup.py +20 -0
  9. icsDataValidation/connection_setups/teradata_connection_setup.py +0 -0
  10. icsDataValidation/core/__init__.py +0 -0
  11. icsDataValidation/core/database_objects.py +0 -0
  12. icsDataValidation/core/object_comparison.py +0 -0
  13. icsDataValidation/input_parameters/__init__.py +0 -0
  14. icsDataValidation/input_parameters/testing_tool_params.py +4 -3
  15. icsDataValidation/main.py +15 -11
  16. icsDataValidation/output_parameters/__init__.py +0 -0
  17. icsDataValidation/output_parameters/result_params.py +0 -0
  18. icsDataValidation/services/__init__.py +0 -0
  19. icsDataValidation/services/comparison_service.py +80 -76
  20. icsDataValidation/services/database_services/__init__.py +0 -0
  21. icsDataValidation/services/database_services/azure_service.py +69 -43
  22. icsDataValidation/services/database_services/databricks_hive_metastore_service.py +20 -7
  23. icsDataValidation/services/database_services/databricks_unity_catalog_service.py +20 -12
  24. icsDataValidation/services/database_services/exasol_service.py +26 -23
  25. icsDataValidation/services/database_services/oracle_service.py +64 -55
  26. icsDataValidation/services/database_services/snowflake_service.py +85 -36
  27. icsDataValidation/services/database_services/sqlserver_service.py +868 -0
  28. icsDataValidation/services/database_services/teradata_service.py +54 -37
  29. icsDataValidation/services/initialization_service.py +0 -0
  30. icsDataValidation/services/result_service.py +0 -0
  31. icsDataValidation/services/system_service.py +4 -0
  32. icsDataValidation/services/testset_service.py +0 -0
  33. icsDataValidation/utils/__init__.py +0 -0
  34. icsDataValidation/utils/file_util.py +0 -0
  35. icsDataValidation/utils/logger_util.py +0 -0
  36. icsDataValidation/utils/pandas_util.py +0 -0
  37. icsDataValidation/utils/parallelization_util.py +0 -0
  38. icsDataValidation/utils/sql_util.py +0 -0
  39. icsdatavalidation-1.0.415.dist-info/METADATA +298 -0
  40. {icsDataValidation-1.0.371.dist-info → icsdatavalidation-1.0.415.dist-info}/RECORD +18 -16
  41. {icsDataValidation-1.0.371.dist-info → icsdatavalidation-1.0.415.dist-info}/WHEEL +1 -1
  42. {icsDataValidation-1.0.371.dist-info → icsdatavalidation-1.0.415.dist-info}/top_level.txt +0 -0
  43. icsDataValidation-1.0.371.dist-info/METADATA +0 -21
@@ -1,9 +1,9 @@
1
1
  import pyodbc
2
- import pandas.io.sql
2
+ import pandas.io.sql
3
3
  import pandas as pd
4
4
  import logging
5
5
 
6
- from typing import Union, List, Dict
6
+ from typing import Union, List, Dict
7
7
 
8
8
  from icsDataValidation.utils.logger_util import configure_dev_ops_logger
9
9
  from icsDataValidation.core.database_objects import DatabaseObject
@@ -17,7 +17,7 @@ logger = logging.getLogger('Azure_Service')
17
17
  logger.setLevel(logging.INFO)
18
18
  configure_dev_ops_logger(logger)
19
19
 
20
- class AzureService(object):
20
+ class AzureService:
21
21
  def __init__(self, connection_params: dict):
22
22
  self.connection_params =connection_params
23
23
  self.azure_connection = None
@@ -34,12 +34,17 @@ class AzureService(object):
34
34
  self.azure_connection.close()
35
35
 
36
36
  def _connect_to_azure(self):
37
- azure_connection_string = f"DRIVER={self.connection_params['Driver']};SERVER={self.connection_params['Server']};PORT=1443;DATABASE={self.connection_params['Database']};UID={self.connection_params['User']};PWD={self.connection_params['Password']}"
38
- #'DRIVER='+driver+';SERVER='+server+';DATABASE='+database+';UID='+username+';PWD='+ password
37
+ azure_connection_string = (
38
+ f"DRIVER={self.connection_params['Driver']};"
39
+ f"SERVER={self.connection_params['Server']};"
40
+ f"PORT={self.connection_params['Port']};"
41
+ f"DATABASE={self.connection_params['Database']};"
42
+ f"UID={self.connection_params['User']};"
43
+ f"PWD={self.connection_params['Password']}"
44
+ )
39
45
  self.azure_connection = pyodbc.connect(azure_connection_string)
40
46
  return self.azure_connection
41
47
 
42
-
43
48
  @staticmethod
44
49
  def _get_error_message(excepction: Exception, statement: str) -> None:
45
50
  """
@@ -65,21 +70,20 @@ class AzureService(object):
65
70
 
66
71
  if object_type_restriction=='include_all' or object_type_restriction=='include_only_tables':
67
72
  if schema:
68
- query_db_tables=f"select schema_name(t.schema_id) as schema_name, t.name as table_name from sys.tables t where schema_name(t.schema_id) = '{object.schema}' order by schema_name;"
69
- else:
73
+ query_db_tables=f"select schema_name(t.schema_id) as schema_name, t.name as table_name from sys.tables t where schema_name(t.schema_id) = '{schema}' order by schema_name;"
74
+ else:
70
75
  query_db_tables=f"select schema_name(t.schema_id) as schema_name, t.name as table_name from sys.tables t order by schema_name;"
71
76
 
72
77
  all_database_tables = self.execute_queries(query_db_tables)
73
78
 
74
-
75
79
  elif object_type_restriction=='include_all' or object_type_restriction=='include_only_views':
76
80
  if schema:
77
- query_db_views=f"select schema_name(t.schema_id) as schema_name, t.name as table_name from sys.views t where schema_name(t.schema_id) = '{object.schema}' order by schema_name;"
78
- else:
81
+ query_db_views=f"select schema_name(t.schema_id) as schema_name, t.name as table_name from sys.views t where schema_name(t.schema_id) = '{schema}' order by schema_name;"
82
+ else:
79
83
  query_db_views=f"select schema_name(t.schema_id) as schema_name, t.name as table_name from sys.views t order by schema_name;"
80
84
 
81
85
  all_database_views = self.execute_queries(query_db_views)
82
-
86
+
83
87
  database_objects=[]
84
88
  for row in all_database_tables:
85
89
  database_table=f'{database}.{row[0].upper()}.{row[1].upper()}'
@@ -110,13 +114,13 @@ class AzureService(object):
110
114
  def get_row_count_from_object(self, object : DatabaseObject) -> int:
111
115
  if self.azure_connection is None:
112
116
  self._connect_to_azure()
113
-
117
+
114
118
  query_get_row_count = f"select count(*) as ROW_COUNT from {object.schema}.{object.name};"
115
119
 
116
120
  row_count = self.execute_queries(query_get_row_count).fetchall()[0][0]
117
121
 
118
122
  return row_count
119
-
123
+
120
124
  def get_data_types_from_object(self, object : DatabaseObject, column_intersections: list) -> dict:
121
125
  results = []
122
126
 
@@ -124,27 +128,25 @@ class AzureService(object):
124
128
  self._connect_to_azure()
125
129
 
126
130
  column_intersections = str(column_intersections)[1:-1]
127
- query_get_data_types_from_object=f"select col.name, t.name as data_type from sys.tables as tab inner join sys.columns as col on tab.object_id = col.object_id left join sys.types as t on col.user_type_id = t.user_type_id where tab.name = '{object.name}' and schema_name(tab.schema_id) = '{object.schema}'"
131
+ query_get_data_types_from_object=f"select col.name, t.name as data_type from sys.tables as tab inner join sys.columns as col on tab.object_id = col.object_id left join sys.types as t on col.user_type_id = t.user_type_id where tab.name = '{object.name}' and schema_name(tab.schema_id) = '{object.schema}'"
128
132
  dict_colummns_datatype=self.execute_queries(query_get_data_types_from_object).fetchall()
129
133
 
130
- for row in dict_colummns_datatype:
131
- # logger.info(type(row))
132
- row_to_list = [elem for elem in row]
133
- results.append({"COLUMN_NAME":row_to_list[0],"DATA_TYPE":row_to_list[1]})
134
+ results = [{"COLUMN_NAME":row[0],"DATA_TYPE":row[1]} for row in dict_colummns_datatype]
134
135
 
135
136
  return results
136
137
 
137
- def get_count_distincts_from_object(self, object : DatabaseObject, column_intersections: list) -> dict:
138
+ def get_count_distincts_from_object(self, object : DatabaseObject, column_intersections: list,
139
+ enclose_column_by_double_quotes: bool = False) -> dict:
138
140
  if self.azure_connection is None:
139
141
  self._connect_to_azure()
140
142
 
141
143
  unions=""
142
144
  for column in column_intersections:
143
145
  unions +=f"UNION SELECT '{column}' AS COLUMN_NAME, COUNT(DISTINCT {column}) AS COUNT_DISTINCT FROM {object.schema}.{object.name}"
144
-
146
+
145
147
  query_get_count_distincts_from_object=f"{unions[5:]} ORDER BY COUNT_DISTINCT;"
146
148
  dict_count_distincts=self.execute_queries(query_get_count_distincts_from_object).fetchall()
147
-
149
+
148
150
  return dict_count_distincts
149
151
 
150
152
  def get_table_size(self, object : DatabaseObject) -> int:
@@ -154,7 +156,25 @@ class AzureService(object):
154
156
 
155
157
  return size
156
158
 
157
- def create_checksums(self, object : DatabaseObject, column_intersections: list) -> List[Dict]:
159
+ def create_checksums(
160
+ self,
161
+ object : DatabaseObject,
162
+ column_intersections: list,
163
+ enclose_column_by_double_quotes: bool = False
164
+ ) -> List[Dict]:
165
+ """creates checksums for given object in compliance with given conditions
166
+
167
+ Args:
168
+ object (DatabaseObject): table or view
169
+ column_intersections (list): columns that are used for checksums
170
+ where_clause (str, optional): Optional filter criteria given as sql-usable string. Defaults to "".
171
+ exclude_columns (list, optional): columns to exlude from calculation. Defaults to [].
172
+ numeric_scale (int, optional): number of decimal places for aggregations. Defaults to None.
173
+
174
+ Returns:
175
+ List[Dict]: checksums for columns of object
176
+ """
177
+
158
178
  if self.azure_connection is None:
159
179
  self._connect_to_azure()
160
180
 
@@ -162,7 +182,7 @@ class AzureService(object):
162
182
 
163
183
  dict_colummns_datatype=self.get_data_types_from_object(object, column_intersections)
164
184
 
165
- # dict_colummns_datatype_dict = dict(zip(dict_colummns_datatype[::2], dict_colummns_datatype[1::2]))
185
+ # dict_colummns_datatype_dict = dict(zip(dict_colummns_datatype[::2], dict_colummns_datatype[1::2]))
166
186
 
167
187
  aggregates = ""
168
188
  count_nulls = ""
@@ -174,13 +194,13 @@ class AzureService(object):
174
194
  count_nulls += f", sum(case when {column} is null then 1 else 0 end) as countnulls_{column}"
175
195
 
176
196
  if column_datatype.lower() == 'tinyint' or column_datatype.lower() == 'smallint' or column_datatype.lower() == 'int' or column_datatype.lower() == 'bigint' or column_datatype.lower() == 'decimal' or column_datatype.lower() == 'numeric' or column_datatype.lower() == 'smallmoney' or column_datatype.lower() == 'money' or column_datatype.lower() == 'float' or column_datatype.lower() == 'real':
177
-
197
+
178
198
  aggregates += f", sum({column}) as sum_{column}"
179
199
 
180
200
  elif column_datatype.lower() == 'char' or column_datatype.lower() == 'varchar' or column_datatype.lower() == 'varchar' or column_datatype.lower() == 'text' or column_datatype.lower() == 'nchar' or column_datatype.lower() == 'nvarchar' or column_datatype.lower() == 'nvarchar' or column_datatype.lower() == 'ntext' or column_datatype.lower() == 'binary' or column_datatype.lower() == 'varbinary' or column_datatype.lower() == 'varbinary' or column_datatype.lower() == 'datetime' or column_datatype.lower() == 'datetime2' or column_datatype.lower() == 'smalldatetime' or column_datatype.lower() == 'date' or column_datatype.lower() == 'time' or column_datatype.lower() == 'datetimeoffset' or column_datatype.lower() == 'timestamp':
181
-
201
+
182
202
  aggregates += f", count(distinct lower({column})) as countdistinct_{column}"
183
-
203
+
184
204
  elif column_datatype.lower() == 'bit':
185
205
 
186
206
  aggregates += f", (SELECT CONCAT ((select count(*) as val FROM {object.schema}.{object.name} WHERE {column} = 1),'_',(select count(*) as val from {object.schema}.{object.name} WHERE {column} = 0))) AS aggregateboolean_{column}"
@@ -210,26 +230,27 @@ class AzureService(object):
210
230
  agg_result = 0
211
231
  else:
212
232
  agg_result = aggregation_results[i]
213
-
233
+
214
234
  if countnulls_results[i] is None:
215
235
  cnt_result = 0
216
236
  else:
217
237
  cnt_result = countnulls_results[i]
218
-
238
+
219
239
  test_list.append([[item.split("_", 1)[0] for item in aggregation_columns][i],agg_result,cnt_result])
220
240
 
221
241
  checksums = dict(zip([item.split("_", 1)[1] for item in aggregation_columns] , test_list))
222
242
 
223
243
  return checksums
224
244
 
225
- def create_pandas_df_from_group_by(self, object : DatabaseObject, column_intersections: list, group_by_column: str) -> List[Dict]:
245
+ def create_pandas_df_from_group_by(self, object : DatabaseObject, column_intersections: list, group_by_column: str,
246
+ enclose_column_by_double_quotes: bool = False) -> List[Dict]:
226
247
 
227
248
  if self.teradata_connection is None:
228
249
  self._connect_to_teradata()
229
250
 
230
251
  aggregation_columns= [f"{column.upper()}" for column in column_intersections if column != group_by_column]
231
252
 
232
- dict_colummns_datatype=self.get_data_types_from_object(object, aggregation_columns)
253
+ dict_colummns_datatype=self.get_data_types_from_object(object, aggregation_columns)
233
254
 
234
255
  aggregates = ""
235
256
 
@@ -238,13 +259,13 @@ class AzureService(object):
238
259
  column_datatype = column_datatype.split('(')[0]
239
260
 
240
261
  if column_datatype.lower() == 'tinyint' or column_datatype.lower() == 'smallint' or column_datatype.lower() == 'int' or column_datatype.lower() == 'bigint' or column_datatype.lower() == 'decimal' or column_datatype.lower() == 'numeric' or column_datatype.lower() == 'smallmoney' or column_datatype.lower() == 'money' or column_datatype.lower() == 'float' or column_datatype.lower() == 'real':
241
-
262
+
242
263
  aggregates += f", sum({column}) as sum_{column}"
243
264
 
244
265
  elif column_datatype.lower() == 'char' or column_datatype.lower() == 'varchar' or column_datatype.lower() == 'varchar' or column_datatype.lower() == 'text' or column_datatype.lower() == 'nchar' or column_datatype.lower() == 'nvarchar' or column_datatype.lower() == 'nvarchar' or column_datatype.lower() == 'ntext' or column_datatype.lower() == 'binary' or column_datatype.lower() == 'varbinary' or column_datatype.lower() == 'varbinary' or column_datatype.lower() == 'datetime' or column_datatype.lower() == 'datetime2' or column_datatype.lower() == 'smalldatetime' or column_datatype.lower() == 'date' or column_datatype.lower() == 'time' or column_datatype.lower() == 'datetimeoffset' or column_datatype.lower() == 'timestamp':
245
-
266
+
246
267
  aggregates += f", count(distinct lower({column})) as countdistinct_{column}"
247
-
268
+
248
269
  elif column_datatype.lower() == 'bit':
249
270
 
250
271
  aggregates += f", (SELECT CONCAT ((select count(*) as val FROM {object.schema}.{object.name} WHERE {column} = 1),'_',(select count(*) as val from {object.schema}.{object.name} WHERE {column} = 0))) AS aggregateboolean_{column}"
@@ -257,15 +278,22 @@ class AzureService(object):
257
278
 
258
279
  return group_by_aggregation_pdf
259
280
 
260
- def create_pandas_df(self, object : DatabaseObject, intersection_columns_trgt_src: list , where_clause:str="", exclude_columns:list=[]) -> pd.DataFrame:
261
-
281
+ def create_pandas_df(
282
+ self,
283
+ object : DatabaseObject,
284
+ intersection_columns_trgt_src: list,
285
+ where_clause:str="",
286
+ exclude_columns:list=[],
287
+ enclose_column_by_double_quotes: bool = False
288
+ ) -> pd.DataFrame:
289
+
262
290
  if self.azure_connection is None:
263
291
  self._connect_to_azure()
264
-
292
+
265
293
  intersection_columns_trgt_src_ = ', '.join(list(set(intersection_columns_trgt_src) - set(exclude_columns)))
266
294
 
267
295
  df_query = f"select {intersection_columns_trgt_src_} from {object.schema}.{object.name} {where_clause};"
268
-
296
+
269
297
  pdf = self.execute_queries(df_query,True)
270
298
 
271
299
  return pdf
@@ -274,7 +302,7 @@ class AzureService(object):
274
302
  def execute_queries(self, query: Union[str, List[str]],return_as_pdf:bool=False) -> Union[List[Dict], List[List[Dict]]]:
275
303
  if self.azure_connection is None:
276
304
  self._connect_to_azure()
277
-
305
+
278
306
  query_list: List[str] = query if isinstance(query, list) else [query]
279
307
 
280
308
  results = []
@@ -285,7 +313,7 @@ class AzureService(object):
285
313
  query_result = pandas.io.sql.read_sql(single_query, self.azure_connection)
286
314
  else:
287
315
  query_result=self.azure_connection.execute(single_query)
288
-
316
+
289
317
  results.append(query_result)
290
318
 
291
319
  except Exception as err:
@@ -311,10 +339,8 @@ class AzureService(object):
311
339
  for single_statement in statement_list:
312
340
  stripped_statement = (
313
341
  single_statement.strip()
314
- )
342
+ )
315
343
  _ = self.azure_connection.execute(stripped_statement)
316
344
 
317
345
  except Exception as err:
318
346
  raise Exception(self._get_error_message(err, single_statement)) from err
319
-
320
-
@@ -85,6 +85,7 @@ class DatabricksHiveMetastoreService(object):
85
85
  numeric_columns: list,
86
86
  numeric_scale: int,
87
87
  where_exists: bool = True,
88
+ enclose_column_by_double_quotes: bool = False,
88
89
  ) -> str:
89
90
  """generates in_clause from list ready to expand the where clause, numeric values are rounded
90
91
 
@@ -110,7 +111,7 @@ class DatabricksHiveMetastoreService(object):
110
111
  in_clause_cols = f" WHERE (("
111
112
  for key in key_filters.keys():
112
113
  if key in numeric_columns:
113
- in_clause_cols += f"""ROUND({key.replace("'", "")},2)""" + ","
114
+ in_clause_cols += f"""ROUND({key.replace("'", "")}, {numeric_scale})""" + ","
114
115
  else:
115
116
  in_clause_cols += key.replace("'", "") + ","
116
117
  in_clause_cols = in_clause_cols[:-1] + ")"
@@ -118,7 +119,8 @@ class DatabricksHiveMetastoreService(object):
118
119
  return in_clause
119
120
 
120
121
  def _get_column_clause(
121
- self, column_list: list, columns_datatype: list, numeric_scale, key_columns
122
+ self, column_list: list, columns_datatype: list, numeric_scale, key_columns,
123
+ enclose_column_by_double_quotes: bool = False
122
124
  ) -> dict:
123
125
  """turns list of desired columns into a sql compatible string
124
126
 
@@ -336,6 +338,7 @@ class DatabricksHiveMetastoreService(object):
336
338
  column_intersections: list,
337
339
  where_clause: str = "",
338
340
  exclude_columns: list = [],
341
+ enclose_column_by_double_quotes: bool = False,
339
342
  ) -> dict:
340
343
  """get distinct count for every column in a database object that is in column intersections list
341
344
 
@@ -412,6 +415,7 @@ class DatabricksHiveMetastoreService(object):
412
415
  where_clause: str = "",
413
416
  exclude_columns: list = [],
414
417
  numeric_scale: int = None,
418
+ enclose_column_by_double_quotes: bool = False,
415
419
  ) -> List[Dict]:
416
420
  """creates checksums for given object in compliance with given conditions
417
421
 
@@ -514,6 +518,7 @@ class DatabricksHiveMetastoreService(object):
514
518
  where_clause: str,
515
519
  exclude_columns: list,
516
520
  numeric_scale: int = None,
521
+ enclose_column_by_double_quotes: bool = False,
517
522
  ) -> List[Dict]:
518
523
  """execution of multiple aggregations at once
519
524
 
@@ -660,7 +665,12 @@ class DatabricksHiveMetastoreService(object):
660
665
  )
661
666
 
662
667
  def create_pandas_df(
663
- self, object: DatabaseObject, intersection_columns_trgt_src: list, where_clause:str="", exclude_columns:list=[]
668
+ self,
669
+ object: DatabaseObject,
670
+ intersection_columns_trgt_src: list,
671
+ where_clause:str="",
672
+ exclude_columns:list=[],
673
+ enclose_column_by_double_quotes: bool = False
664
674
  ) -> pd.DataFrame:
665
675
  """creates pandas dataframes with all data from given object in given columns
666
676
 
@@ -693,6 +703,7 @@ class DatabricksHiveMetastoreService(object):
693
703
  dedicated_columns: list = [],
694
704
  sample_count: int = 10,
695
705
  numeric_scale: int = None,
706
+ enclose_column_by_double_quotes: bool = False,
696
707
  ) -> List[Dict]:
697
708
  if self.databricks_connection is None:
698
709
  self._connect_to_databricks()
@@ -742,7 +753,7 @@ class DatabricksHiveMetastoreService(object):
742
753
  values = list(key_filters.values())
743
754
  if values[0] != []:
744
755
  in_clause = self._get_in_clause(
745
- key_filters, numeric_columns, numeric_scale, where_exists
756
+ key_filters, numeric_columns, numeric_scale, where_exists, enclose_column_by_double_quotes
746
757
  )
747
758
  else:
748
759
  in_clause = ""
@@ -750,13 +761,14 @@ class DatabricksHiveMetastoreService(object):
750
761
  elif key_intersection != [] and not is_dedicated:
751
762
  keys = str(key_intersection)[1:-1].replace("'", "")
752
763
  column_clause, numeric_columns, used_columns = self._get_column_clause(
753
- column_intersections, dict_colummns_datatype, numeric_scale, key_columns
764
+ column_intersections, dict_colummns_datatype, numeric_scale, key_columns,
765
+ enclose_column_by_double_quotes
754
766
  )
755
767
  if (key_filters != {}) & (filter_intersection != []):
756
768
  values = list(key_filters.values())
757
769
  if values[0] != []:
758
770
  in_clause = self._get_in_clause(
759
- key_filters, numeric_columns, numeric_scale, where_exists
771
+ key_filters, numeric_columns, numeric_scale, where_exists, enclose_column_by_double_quotes
760
772
  )
761
773
  else:
762
774
  in_clause = ""
@@ -767,7 +779,8 @@ class DatabricksHiveMetastoreService(object):
767
779
  )
768
780
  column_intersections.sort()
769
781
  column_clause, numeric_columns, used_columns = self._get_column_clause(
770
- column_intersections, dict_colummns_datatype, numeric_scale, key_columns
782
+ column_intersections, dict_colummns_datatype, numeric_scale, key_columns,
783
+ enclose_column_by_double_quotes
771
784
  )
772
785
  sample_query = f"SELECT {column_clause} FROM {object.database}.{object.schema}.{object.name} TABLESAMPLE ({sample_count} ROWS) {where_clause};"
773
786
 
@@ -85,6 +85,7 @@ class DatabricksUnityCatalogService(object):
85
85
  numeric_columns: list,
86
86
  numeric_scale: int,
87
87
  where_exists: bool = True,
88
+ enclose_column_by_double_quotes: bool = False,
88
89
  ) -> str:
89
90
  """generates in_clause from list ready to expand the where clause, numeric values are rounded
90
91
 
@@ -110,7 +111,7 @@ class DatabricksUnityCatalogService(object):
110
111
  in_clause_cols = f" WHERE (("
111
112
  for key in key_filters.keys():
112
113
  if key in numeric_columns:
113
- in_clause_cols += f"""ROUND({key.replace("'", "")},2)""" + ","
114
+ in_clause_cols += f"""ROUND({key.replace("'", "")}, {numeric_scale})""" + ","
114
115
  else:
115
116
  in_clause_cols += key.replace("'", "") + ","
116
117
  in_clause_cols = in_clause_cols[:-1] + ")"
@@ -118,7 +119,8 @@ class DatabricksUnityCatalogService(object):
118
119
  return in_clause
119
120
 
120
121
  def _get_column_clause(
121
- self, column_list: list, columns_datatype: list, numeric_scale, key_columns
122
+ self, column_list: list, columns_datatype: list, numeric_scale, key_columns,
123
+ enclose_column_by_double_quotes: bool = False
122
124
  ) -> dict:
123
125
  """turns list of desired columns into a sql compatible string
124
126
 
@@ -334,6 +336,7 @@ class DatabricksUnityCatalogService(object):
334
336
  column_intersections: list,
335
337
  where_clause: str = "",
336
338
  exclude_columns: list = [],
339
+ enclose_column_by_double_quotes: bool = False,
337
340
  ) -> dict:
338
341
  """get distinct count for every column in a database object that is in column intersections list
339
342
 
@@ -410,6 +413,7 @@ class DatabricksUnityCatalogService(object):
410
413
  where_clause: str = "",
411
414
  exclude_columns: list = [],
412
415
  numeric_scale: int = None,
416
+ enclose_column_by_double_quotes: bool = False,
413
417
  ) -> List[Dict]:
414
418
  """creates checksums for given object in compliance with given conditions
415
419
 
@@ -504,6 +508,7 @@ class DatabricksUnityCatalogService(object):
504
508
  where_clause: str,
505
509
  exclude_columns: list,
506
510
  numeric_scale: int = None,
511
+ enclose_column_by_double_quotes: bool = False,
507
512
  ) -> List[Dict]:
508
513
  """execution of multiple aggregations at once
509
514
 
@@ -648,10 +653,11 @@ class DatabricksUnityCatalogService(object):
648
653
  grouping_columns_final,
649
654
  error_dict
650
655
  )
651
-
656
+
652
657
 
653
658
  def create_pandas_df(
654
- self, object: DatabaseObject, intersection_columns_trgt_src: list, where_clause:str="", exclude_columns:list=[]
659
+ self, object: DatabaseObject, intersection_columns_trgt_src: list, where_clause:str="", exclude_columns:list=[],
660
+ enclose_column_by_double_quotes: bool = False
655
661
  ) -> pd.DataFrame:
656
662
  """creates pandas dataframes with all data from given object in given columns
657
663
 
@@ -684,6 +690,7 @@ class DatabricksUnityCatalogService(object):
684
690
  dedicated_columns: list = [],
685
691
  sample_count: int = 10,
686
692
  numeric_scale: int = None,
693
+ enclose_column_by_double_quotes: bool = False,
687
694
  ) -> List[Dict]:
688
695
  if self.databricks_connection is None:
689
696
  self._connect_to_databricks()
@@ -728,12 +735,13 @@ class DatabricksUnityCatalogService(object):
728
735
  dict_colummns_datatype,
729
736
  numeric_scale,
730
737
  key_columns,
738
+ enclose_column_by_double_quotes
731
739
  )
732
740
  if (key_filters != {}) & (filter_intersection != []):
733
741
  values = list(key_filters.values())
734
742
  if values[0] != []:
735
743
  in_clause = self._get_in_clause(
736
- key_filters, numeric_columns, numeric_scale, where_exists
744
+ key_filters, numeric_columns, numeric_scale, where_exists, enclose_column_by_double_quotes
737
745
  )
738
746
  else:
739
747
  in_clause = ""
@@ -741,13 +749,14 @@ class DatabricksUnityCatalogService(object):
741
749
  elif key_intersection != [] and not is_dedicated:
742
750
  keys = str(key_intersection)[1:-1].replace("'", "")
743
751
  column_clause, numeric_columns, used_columns = self._get_column_clause(
744
- column_intersections, dict_colummns_datatype, numeric_scale, key_columns
752
+ column_intersections, dict_colummns_datatype, numeric_scale, key_columns,
753
+ enclose_column_by_double_quotes
745
754
  )
746
755
  if (key_filters != {}) & (filter_intersection != []):
747
756
  values = list(key_filters.values())
748
757
  if values[0] != []:
749
758
  in_clause = self._get_in_clause(
750
- key_filters, numeric_columns, numeric_scale, where_exists
759
+ key_filters, numeric_columns, numeric_scale, where_exists, enclose_column_by_double_quotes
751
760
  )
752
761
  else:
753
762
  in_clause = ""
@@ -758,7 +767,8 @@ class DatabricksUnityCatalogService(object):
758
767
  )
759
768
  column_intersections.sort()
760
769
  column_clause, numeric_columns, used_columns = self._get_column_clause(
761
- column_intersections, dict_colummns_datatype, numeric_scale, key_columns
770
+ column_intersections, dict_colummns_datatype, numeric_scale, key_columns,
771
+ enclose_column_by_double_quotes
762
772
  )
763
773
  sample_query = f"SELECT {column_clause} FROM {object.database}.{object.schema}.{object.name} TABLESAMPLE ({sample_count} ROWS) {where_clause};"
764
774
 
@@ -1095,7 +1105,7 @@ class DatabricksUnityCatalogService(object):
1095
1105
  src_filter = object_result['SRC_FILTER']
1096
1106
  trgt_filter = object_result['TRGT_FILTER']
1097
1107
  excluded_columns = object_result['EXCLUDED_COLUMNS']
1098
- columns_equal = object_result['COLUMNS_EQUAL']
1108
+ columns_equal = object_result['COLUMNS_EQUAL']
1099
1109
  column_intersection = str(object_result['COLUMN_INTERSECTION'])
1100
1110
  src_columns_minus_trgt_columns = object_result['SRC_COLUMNS_MINUS_TRGT_COLUMNS']
1101
1111
  trgt_columns_minus_src_columns = object_result['TRGT_COLUMNS_MINUS_SRC_COLUMNS']
@@ -1262,7 +1272,7 @@ class DatabricksUnityCatalogService(object):
1262
1272
 
1263
1273
  self.execute_statement(statement)
1264
1274
 
1265
-
1275
+
1266
1276
 
1267
1277
  # extract the information needed for the table on object level
1268
1278
  for object_result in results['OBJECTS']:
@@ -1375,5 +1385,3 @@ class DatabricksUnityCatalogService(object):
1375
1385
  ;"""
1376
1386
 
1377
1387
  self.execute_statement(insert_statement)
1378
-
1379
-