dataops-testgen 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. dataops_testgen-2.2.0.dist-info/LICENSE +203 -0
  2. dataops_testgen-2.2.0.dist-info/METADATA +287 -0
  3. dataops_testgen-2.2.0.dist-info/NOTICE +5 -0
  4. dataops_testgen-2.2.0.dist-info/RECORD +270 -0
  5. dataops_testgen-2.2.0.dist-info/WHEEL +5 -0
  6. dataops_testgen-2.2.0.dist-info/entry_points.txt +2 -0
  7. dataops_testgen-2.2.0.dist-info/top_level.txt +1 -0
  8. testgen/__init__.py +0 -0
  9. testgen/__main__.py +770 -0
  10. testgen/commands/__init__.py +0 -0
  11. testgen/commands/queries/__init__.py +0 -0
  12. testgen/commands/queries/execute_cat_tests_query.py +95 -0
  13. testgen/commands/queries/execute_tests_query.py +160 -0
  14. testgen/commands/queries/generate_tests_query.py +94 -0
  15. testgen/commands/queries/profiling_query.py +366 -0
  16. testgen/commands/queries/test_parameter_validation_query.py +88 -0
  17. testgen/commands/run_execute_cat_tests.py +162 -0
  18. testgen/commands/run_execute_tests.py +168 -0
  19. testgen/commands/run_generate_tests.py +107 -0
  20. testgen/commands/run_get_entities.py +122 -0
  21. testgen/commands/run_launch_db_config.py +84 -0
  22. testgen/commands/run_observability_exporter.py +330 -0
  23. testgen/commands/run_profiling_bridge.py +495 -0
  24. testgen/commands/run_quick_start.py +168 -0
  25. testgen/commands/run_setup_profiling_tools.py +96 -0
  26. testgen/commands/run_test_definition.py +146 -0
  27. testgen/commands/run_test_parameter_validation.py +135 -0
  28. testgen/commands/run_upgrade_db_config.py +156 -0
  29. testgen/common/__init__.py +8 -0
  30. testgen/common/clean_sql.py +53 -0
  31. testgen/common/credentials.py +25 -0
  32. testgen/common/database/__init__.py +0 -0
  33. testgen/common/database/database_service.py +629 -0
  34. testgen/common/database/flavor/__init__.py +0 -0
  35. testgen/common/database/flavor/flavor_service.py +75 -0
  36. testgen/common/database/flavor/mssql_flavor_service.py +34 -0
  37. testgen/common/database/flavor/postgresql_flavor_service.py +5 -0
  38. testgen/common/database/flavor/redshift_flavor_service.py +22 -0
  39. testgen/common/database/flavor/snowflake_flavor_service.py +69 -0
  40. testgen/common/database/flavor/trino_flavor_service.py +21 -0
  41. testgen/common/date_service.py +68 -0
  42. testgen/common/display_service.py +85 -0
  43. testgen/common/docker_service.py +76 -0
  44. testgen/common/encrypt.py +55 -0
  45. testgen/common/get_pipeline_parms.py +57 -0
  46. testgen/common/logs.py +79 -0
  47. testgen/common/process_service.py +62 -0
  48. testgen/common/read_file.py +69 -0
  49. testgen/settings.py +440 -0
  50. testgen/template/dbsetup/010_create_base_schema.sql +2 -0
  51. testgen/template/dbsetup/020_create_standard_functions_sprocs.sql +179 -0
  52. testgen/template/dbsetup/030_initialize_new_schema_structure.sql +735 -0
  53. testgen/template/dbsetup/040_populate_new_schema_project.sql +59 -0
  54. testgen/template/dbsetup/050_populate_new_schema_metadata.sql +1517 -0
  55. testgen/template/dbsetup/060_create_standard_views.sql +248 -0
  56. testgen/template/dbsetup/070_create_default_users.sql +17 -0
  57. testgen/template/dbsetup/075_grant_role_rights.sql +43 -0
  58. testgen/template/dbsetup/080_set_current_revision.sql +5 -0
  59. testgen/template/dbupgrade/0100_incremental_upgrade.sql +5 -0
  60. testgen/template/dbupgrade/0101_incremental_upgrade.sql +15 -0
  61. testgen/template/dbupgrade/0102_incremental_upgrade.sql +4 -0
  62. testgen/template/dbupgrade/0103_incremental_upgrade.sql +22 -0
  63. testgen/template/dbupgrade/0104_incremental_upgrade.sql +44 -0
  64. testgen/template/dbupgrade/0105_incremental_upgrade.sql +1 -0
  65. testgen/template/dbupgrade/0106_incremental_upgrade.sql +5 -0
  66. testgen/template/dbupgrade/0107_incremental_upgrade.sql +3 -0
  67. testgen/template/dbupgrade_helpers/get_tg_revision.sql +2 -0
  68. testgen/template/exec_cat_tests/ex_cat_build_agg_table_tests.sql +116 -0
  69. testgen/template/exec_cat_tests/ex_cat_get_distinct_tables.sql +11 -0
  70. testgen/template/exec_cat_tests/ex_cat_results_parse.sql +69 -0
  71. testgen/template/exec_cat_tests/ex_cat_retrieve_agg_test_parms.sql +6 -0
  72. testgen/template/exec_cat_tests/ex_cat_test_query.sql +8 -0
  73. testgen/template/execution/ex_finalize_test_run_results.sql +37 -0
  74. testgen/template/execution/ex_get_tests_non_cat.sql +47 -0
  75. testgen/template/execution/ex_update_test_record_in_testrun_table.sql +27 -0
  76. testgen/template/execution/ex_write_test_record_to_testrun_table.sql +6 -0
  77. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_no_drops_generic.sql +48 -0
  78. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_num_incr_generic.sql +34 -0
  79. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_above_generic.sql +49 -0
  80. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_percent_within_generic.sql +49 -0
  81. testgen/template/flavors/generic/exec_query_tests/ex_aggregate_match_same_generic.sql +49 -0
  82. testgen/template/flavors/generic/exec_query_tests/ex_custom_query_generic.sql +39 -0
  83. testgen/template/flavors/generic/exec_query_tests/ex_data_match_2way_generic.sql +58 -0
  84. testgen/template/flavors/generic/exec_query_tests/ex_data_match_generic.sql +44 -0
  85. testgen/template/flavors/generic/exec_query_tests/ex_prior_match_generic.sql +37 -0
  86. testgen/template/flavors/generic/exec_query_tests/ex_relative_entropy_generic.sql +53 -0
  87. testgen/template/flavors/generic/exec_query_tests/ex_window_match_no_drops_generic.sql +46 -0
  88. testgen/template/flavors/generic/exec_query_tests/ex_window_match_same_generic.sql +59 -0
  89. testgen/template/flavors/generic/profiling/contingency_counts.sql +3 -0
  90. testgen/template/flavors/generic/validate_tests/ex_get_project_column_list_generic.sql +3 -0
  91. testgen/template/flavors/mssql/exec_query_tests/ex_relative_entropy_mssql.sql +53 -0
  92. testgen/template/flavors/mssql/profiling/project_ddf_query_mssql.sql +35 -0
  93. testgen/template/flavors/mssql/profiling/project_profiling_query_mssql.yaml +246 -0
  94. testgen/template/flavors/mssql/profiling/project_secondary_profiling_query_mssql.sql +36 -0
  95. testgen/template/flavors/mssql/setup_profiling_tools/00_drop_existing_functions_mssql.sql +8 -0
  96. testgen/template/flavors/mssql/setup_profiling_tools/01_create_functions_mssql.sql +12 -0
  97. testgen/template/flavors/mssql/setup_profiling_tools/02_create_functions_mssql.sql +54 -0
  98. testgen/template/flavors/mssql/setup_profiling_tools/create_qc_schema_mssql.sql +4 -0
  99. testgen/template/flavors/mssql/setup_profiling_tools/grant_execute_privileges_mssql.sql +1 -0
  100. testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_no_drops_postgresql.sql +46 -0
  101. testgen/template/flavors/postgresql/exec_query_tests/ex_window_match_same_postgresql.sql +59 -0
  102. testgen/template/flavors/postgresql/profiling/project_ddf_query_postgresql.sql +42 -0
  103. testgen/template/flavors/postgresql/profiling/project_profiling_query_postgresql.yaml +225 -0
  104. testgen/template/flavors/postgresql/profiling/project_secondary_profiling_query_postgresql.sql +28 -0
  105. testgen/template/flavors/postgresql/setup_profiling_tools/create_functions_postgresql.sql +157 -0
  106. testgen/template/flavors/postgresql/setup_profiling_tools/create_qc_schema_postgresql.sql +1 -0
  107. testgen/template/flavors/postgresql/setup_profiling_tools/grant_execute_privileges_postgresql.sql +2 -0
  108. testgen/template/flavors/redshift/profiling/project_ddf_query_redshift.sql +38 -0
  109. testgen/template/flavors/redshift/profiling/project_profiling_query_redshift.yaml +221 -0
  110. testgen/template/flavors/redshift/profiling/project_secondary_profiling_query_redshift.sql +29 -0
  111. testgen/template/flavors/redshift/setup_profiling_tools/create_functions_redshift.sql +115 -0
  112. testgen/template/flavors/redshift/setup_profiling_tools/create_qc_schema_redshift.sql +1 -0
  113. testgen/template/flavors/redshift/setup_profiling_tools/grant_execute_privileges_redshift.sql +2 -0
  114. testgen/template/flavors/snowflake/profiling/project_ddf_query_snowflake.sql +38 -0
  115. testgen/template/flavors/snowflake/profiling/project_profiling_query_snowflake.yaml +220 -0
  116. testgen/template/flavors/snowflake/profiling/project_secondary_profiling_query_snowflake.sql +29 -0
  117. testgen/template/flavors/snowflake/setup_profiling_tools/create_functions_snowflake.sql +69 -0
  118. testgen/template/flavors/snowflake/setup_profiling_tools/create_qc_schema_snowflake.sql +1 -0
  119. testgen/template/flavors/snowflake/setup_profiling_tools/grant_execute_privileges_snowflake.sql +6 -0
  120. testgen/template/flavors/trino/profiling/project_profiling_query_trino.yaml +219 -0
  121. testgen/template/flavors/trino/setup_profiling_tools/create_functions_trino.sql +92 -0
  122. testgen/template/flavors/trino/setup_profiling_tools/create_qc_schema_trino.sql +1 -0
  123. testgen/template/gen_funny_cat_tests/gen_test_constant.sql +104 -0
  124. testgen/template/gen_funny_cat_tests/gen_test_distinct_value_ct.sql +98 -0
  125. testgen/template/gen_funny_cat_tests/gen_test_row_ct.sql +57 -0
  126. testgen/template/gen_funny_cat_tests/gen_test_row_ct_pct.sql +59 -0
  127. testgen/template/generation/gen_delete_old_tests.sql +5 -0
  128. testgen/template/generation/gen_insert_test_suite.sql +5 -0
  129. testgen/template/generation/gen_retrieve_or_insert_test_suite.sql +58 -0
  130. testgen/template/generation/gen_standard_test_type_list.sql +13 -0
  131. testgen/template/generation/gen_standard_tests.sql +48 -0
  132. testgen/template/get_entities/get_connection.sql +21 -0
  133. testgen/template/get_entities/get_connections_list.sql +9 -0
  134. testgen/template/get_entities/get_latest.sql +4 -0
  135. testgen/template/get_entities/get_profile.sql +12 -0
  136. testgen/template/get_entities/get_profile_info.sql +17 -0
  137. testgen/template/get_entities/get_profile_list.sql +17 -0
  138. testgen/template/get_entities/get_profile_screen.sql +275 -0
  139. testgen/template/get_entities/get_project_list.sql +6 -0
  140. testgen/template/get_entities/get_table_group_list.sql +10 -0
  141. testgen/template/get_entities/get_test_generation_list.sql +18 -0
  142. testgen/template/get_entities/get_test_info.sql +41 -0
  143. testgen/template/get_entities/get_test_results_for_run_cli.sql +16 -0
  144. testgen/template/get_entities/get_test_run_list.sql +24 -0
  145. testgen/template/get_entities/get_test_suite.sql +13 -0
  146. testgen/template/get_entities/get_test_suite_list.sql +18 -0
  147. testgen/template/get_entities/list_test_types.sql +4 -0
  148. testgen/template/observability/get_event_data.sql +23 -0
  149. testgen/template/observability/get_test_results.sql +41 -0
  150. testgen/template/observability/update_test_results_exported_to_observability.sql +12 -0
  151. testgen/template/parms/parms_profiling.sql +34 -0
  152. testgen/template/parms/parms_test_execution.sql +13 -0
  153. testgen/template/parms/parms_test_gen.sql +23 -0
  154. testgen/template/profiling/contingency_columns.sql +7 -0
  155. testgen/template/profiling/datatype_suggestions.sql +56 -0
  156. testgen/template/profiling/functional_datatype.sql +523 -0
  157. testgen/template/profiling/functional_tabletype_stage.sql +48 -0
  158. testgen/template/profiling/functional_tabletype_update.sql +8 -0
  159. testgen/template/profiling/pii_flag.sql +133 -0
  160. testgen/template/profiling/profile_anomalies_screen_column.sql +22 -0
  161. testgen/template/profiling/profile_anomalies_screen_multi_column.sql +58 -0
  162. testgen/template/profiling/profile_anomalies_screen_table.sql +22 -0
  163. testgen/template/profiling/profile_anomalies_screen_table_dates.sql +30 -0
  164. testgen/template/profiling/profile_anomalies_screen_variants.sql +40 -0
  165. testgen/template/profiling/profile_anomaly_types_get.sql +3 -0
  166. testgen/template/profiling/project_get_table_sample_count.sql +22 -0
  167. testgen/template/profiling/project_profile_run_record_insert.sql +8 -0
  168. testgen/template/profiling/project_profile_run_record_update.sql +5 -0
  169. testgen/template/profiling/project_profile_run_record_update_status.sql +5 -0
  170. testgen/template/profiling/project_update_profile_results_to_estimates.sql +32 -0
  171. testgen/template/profiling/refresh_anomalies.sql +33 -0
  172. testgen/template/profiling/refresh_data_chars_from_profiling.sql +156 -0
  173. testgen/template/profiling/secondary_profiling_columns.sql +12 -0
  174. testgen/template/profiling/secondary_profiling_delete.sql +4 -0
  175. testgen/template/profiling/secondary_profiling_update.sql +18 -0
  176. testgen/template/quick_start/populate_target_data.sql +1077 -0
  177. testgen/template/quick_start/recreate_target_data_schema.sql +167 -0
  178. testgen/template/quick_start/update_target_data.sql +100 -0
  179. testgen/template/updates/create_tmp_test_definition.sql +19 -0
  180. testgen/template/updates/get_test_def_parms.sql +38 -0
  181. testgen/template/updates/populate_stg_test_definitions.sql +184 -0
  182. testgen/template/validate_tests/ex_disable_tests_test_definitions.sql +5 -0
  183. testgen/template/validate_tests/ex_flag_tests_test_definitions.sql +64 -0
  184. testgen/template/validate_tests/ex_get_project_column_list_generic.sql +3 -0
  185. testgen/template/validate_tests/ex_get_test_column_list_tg.sql +65 -0
  186. testgen/template/validate_tests/ex_write_test_val_errors.sql +22 -0
  187. testgen/ui/__init__.py +0 -0
  188. testgen/ui/app.py +98 -0
  189. testgen/ui/assets/dk_logo.svg +46 -0
  190. testgen/ui/assets/question_mark.png +0 -0
  191. testgen/ui/assets/scripts.js +68 -0
  192. testgen/ui/assets/style.css +140 -0
  193. testgen/ui/bootstrap.py +109 -0
  194. testgen/ui/components/__init__.py +0 -0
  195. testgen/ui/components/frontend/css/KFOlCnqEu92Fr1MmEU9fBBc4.woff2 +0 -0
  196. testgen/ui/components/frontend/css/KFOlCnqEu92Fr1MmEU9fChc4EsA.woff2 +0 -0
  197. testgen/ui/components/frontend/css/KFOmCnqEu92Fr1Mu4mxK.woff2 +0 -0
  198. testgen/ui/components/frontend/css/KFOmCnqEu92Fr1Mu7GxKOzY.woff2 +0 -0
  199. testgen/ui/components/frontend/css/material-symbols-rounded.css +24 -0
  200. testgen/ui/components/frontend/css/material-symbols-rounded.woff2 +0 -0
  201. testgen/ui/components/frontend/css/roboto-font-faces.css +35 -0
  202. testgen/ui/components/frontend/css/shared.css +36 -0
  203. testgen/ui/components/frontend/img/dk_logo.svg +46 -0
  204. testgen/ui/components/frontend/index.html +17 -0
  205. testgen/ui/components/frontend/js/components/breadcrumbs.js +86 -0
  206. testgen/ui/components/frontend/js/components/button.js +66 -0
  207. testgen/ui/components/frontend/js/components/location.js +62 -0
  208. testgen/ui/components/frontend/js/components/select.js +75 -0
  209. testgen/ui/components/frontend/js/components/sidebar.js +358 -0
  210. testgen/ui/components/frontend/js/main.js +99 -0
  211. testgen/ui/components/frontend/js/streamlit.js +19 -0
  212. testgen/ui/components/frontend/js/van.min.js +1 -0
  213. testgen/ui/components/utils/__init__.py +0 -0
  214. testgen/ui/components/utils/callbacks.py +51 -0
  215. testgen/ui/components/utils/component.py +13 -0
  216. testgen/ui/components/widgets/__init__.py +6 -0
  217. testgen/ui/components/widgets/breadcrumbs.py +32 -0
  218. testgen/ui/components/widgets/location.py +65 -0
  219. testgen/ui/components/widgets/modal.py +97 -0
  220. testgen/ui/components/widgets/sidebar.py +69 -0
  221. testgen/ui/navigation/__init__.py +0 -0
  222. testgen/ui/navigation/menu.py +42 -0
  223. testgen/ui/navigation/page.py +20 -0
  224. testgen/ui/navigation/router.py +63 -0
  225. testgen/ui/queries/__init__.py +0 -0
  226. testgen/ui/queries/authentication_queries.py +47 -0
  227. testgen/ui/queries/connection_queries.py +121 -0
  228. testgen/ui/queries/profiling_queries.py +148 -0
  229. testgen/ui/queries/project_queries.py +9 -0
  230. testgen/ui/queries/table_group_queries.py +186 -0
  231. testgen/ui/queries/test_definition_queries.py +270 -0
  232. testgen/ui/queries/test_run_queries.py +32 -0
  233. testgen/ui/queries/test_suite_queries.py +145 -0
  234. testgen/ui/scripts/__init__.py +0 -0
  235. testgen/ui/scripts/patch_streamlit.py +111 -0
  236. testgen/ui/services/__init__.py +0 -0
  237. testgen/ui/services/authentication_service.py +119 -0
  238. testgen/ui/services/connection_service.py +220 -0
  239. testgen/ui/services/database_service.py +282 -0
  240. testgen/ui/services/form_service.py +1008 -0
  241. testgen/ui/services/javascript_service.py +44 -0
  242. testgen/ui/services/query_service.py +316 -0
  243. testgen/ui/services/string_service.py +12 -0
  244. testgen/ui/services/table_group_service.py +130 -0
  245. testgen/ui/services/test_definition_service.py +117 -0
  246. testgen/ui/services/test_run_service.py +13 -0
  247. testgen/ui/services/test_suite_service.py +76 -0
  248. testgen/ui/services/toolbar_service.py +77 -0
  249. testgen/ui/session.py +46 -0
  250. testgen/ui/views/__init__.py +0 -0
  251. testgen/ui/views/app_log_modal.py +92 -0
  252. testgen/ui/views/connections.py +72 -0
  253. testgen/ui/views/connections_base.py +367 -0
  254. testgen/ui/views/login.py +40 -0
  255. testgen/ui/views/not_found.py +16 -0
  256. testgen/ui/views/overview.py +34 -0
  257. testgen/ui/views/profiling_anomalies.py +501 -0
  258. testgen/ui/views/profiling_details.py +335 -0
  259. testgen/ui/views/profiling_modal.py +40 -0
  260. testgen/ui/views/profiling_results.py +206 -0
  261. testgen/ui/views/profiling_summary.py +177 -0
  262. testgen/ui/views/project_settings.py +74 -0
  263. testgen/ui/views/table_groups.py +530 -0
  264. testgen/ui/views/test_definitions.py +1020 -0
  265. testgen/ui/views/test_results.py +908 -0
  266. testgen/ui/views/test_runs.py +195 -0
  267. testgen/ui/views/test_suites.py +545 -0
  268. testgen/utils/__init__.py +0 -0
  269. testgen/utils/plugins.py +17 -0
  270. testgen/utils/singleton.py +14 -0
@@ -0,0 +1,42 @@
1
+ SELECT '{PROJECT_CODE}' as project_code,
2
+ CURRENT_TIMESTAMP AT TIME ZONE 'UTC' as refresh_timestamp,
3
+ c.table_schema,
4
+ c.table_name,
5
+ c.column_name,
6
+ CASE
7
+ WHEN c.data_type = 'timestamp without time zone' THEN 'timestamp'
8
+ WHEN c.data_type = 'text'
9
+ OR (c.data_type = 'character varying' and c.character_maximum_length is NULL) THEN 'varchar(65535)'
10
+ WHEN c.data_type = 'character varying'
11
+ THEN 'varchar(' || CAST(c.character_maximum_length AS VARCHAR) || ')'
12
+ WHEN c.data_type = 'character' THEN 'char(' || CAST(c.character_maximum_length AS VARCHAR) || ')'
13
+ WHEN c.data_type = 'numeric' THEN 'numeric'
14
+ || COALESCE( '(' || CAST(c.numeric_precision AS VARCHAR) || ','
15
+ || CAST(c.numeric_scale AS VARCHAR) || ')', '')
16
+ ELSE c.data_type
17
+ END AS data_type,
18
+ COALESCE(c.character_maximum_length, CASE WHEN c.data_type IN ('text', 'character varying') THEN 65535 END)
19
+ as character_maximum_length,
20
+ c.ordinal_position,
21
+ CASE
22
+ WHEN c.data_type ILIKE '%char%' or c.data_type = 'text'
23
+ THEN 'A'
24
+ WHEN c.data_type ILIKE 'boolean'
25
+ THEN 'B'
26
+ WHEN c.data_type ILIKE 'date'
27
+ OR c.data_type ILIKE 'timestamp%'
28
+ THEN 'D'
29
+ WHEN c.data_type ILIKE 'time without time zone'
30
+ THEN 'T'
31
+ WHEN LOWER(c.data_type) IN ('bigint', 'double precision', 'integer', 'smallint', 'real')
32
+ OR c.data_type ILIKE 'numeric%'
33
+ THEN 'N'
34
+ ELSE
35
+ 'X' END AS general_type,
36
+ CASE
37
+ WHEN c.data_type = 'numeric' THEN COALESCE(numeric_scale, 1) > 0
38
+ ELSE numeric_scale > 0
39
+ END as is_decimal
40
+ FROM information_schema.columns c
41
+ WHERE c.table_schema = '{DATA_SCHEMA}' {TABLE_CRITERIA}
42
+ ORDER BY c.table_schema, c.table_name, c.ordinal_position
@@ -0,0 +1,225 @@
1
+ ---
2
+ strTemplate01_sampling: "SELECT "
3
+ strTemplate01_else: "SELECT "
4
+ strTemplate02_all: |
5
+ {CONNECTION_ID} as connection_id,
6
+ '{PROJECT_CODE}' as project_code,
7
+ '{TABLE_GROUPS_ID}' as table_groups_id,
8
+ '{DATA_SCHEMA}' AS schema_name,
9
+ '{RUN_DATE}' AS run_date,
10
+ '{DATA_TABLE}' AS table_name,
11
+ {COL_POS} AS position,
12
+ '{COL_NAME_SANITIZED}' AS column_name,
13
+ '{COL_TYPE}' AS column_type,
14
+ '{COL_GEN_TYPE}' AS general_type,
15
+ COUNT(*) AS record_ct,
16
+ COUNT("{COL_NAME}") AS value_ct,
17
+ COUNT(DISTINCT "{COL_NAME}") AS distinct_value_ct,
18
+ SUM(CASE WHEN "{COL_NAME}" IS NULL THEN 1 ELSE 0 END) AS null_value_ct,
19
+ strTemplate03_ADN: MIN(LENGTH(CAST("{COL_NAME}" AS TEXT))) AS min_length,
20
+ MAX(LENGTH(CAST("{COL_NAME}" AS TEXT))) AS max_length,
21
+ AVG(NULLIF(LENGTH(CAST("{COL_NAME}" AS TEXT)), 0)::FLOAT) AS avg_length,
22
+ strTemplate03_else: NULL as min_length,
23
+ NULL as max_length,
24
+ NULL as avg_length,
25
+ strTemplate04_A: SUM(CASE
26
+ WHEN TRIM("{COL_NAME}") ~ '^0(\.0*)?$' THEN 1 ELSE 0
27
+ END) AS zero_value_ct,
28
+ strTemplate04_N: SUM( 1 - ABS(SIGN("{COL_NAME}")) )::BIGINT AS zero_value_ct,
29
+ strTemplate04_else: NULL as zero_value_ct,
30
+ strTemplate05_A: COUNT(DISTINCT UPPER(TRANSLATE("{COL_NAME}", ' '',.-', ''))) as distinct_std_value_ct,
31
+ SUM(CASE
32
+ WHEN "{COL_NAME}" = '' THEN 1
33
+ ELSE 0
34
+ END) AS zero_length_ct,
35
+ SUM( CASE
36
+ WHEN "{COL_NAME}" BETWEEN ' !' AND '!' THEN 1
37
+ ELSE 0
38
+ END ) AS lead_space_ct,
39
+ SUM( CASE WHEN "{COL_NAME}" ILIKE '"%"' OR "{COL_NAME}" ILIKE '''%''' THEN 1 ELSE 0 END ) as quoted_value_ct,
40
+ SUM( CASE WHEN "{COL_NAME}" ~ '[0-9]' THEN 1 ELSE 0 END ) as includes_digit_ct,
41
+ SUM( CASE
42
+ WHEN "{COL_NAME}" IN ('.', '?', ' ') THEN 1
43
+ WHEN LOWER("{COL_NAME}") SIMILAR TO '(^.{2,}|-{2,}|0{2,}|9{2,}|x{2,}|z{2,}$)' THEN 1
44
+ WHEN LOWER("{COL_NAME}") IN ('blank','error','missing','tbd',
45
+ 'n/a','#na','none','null','unknown') THEN 1
46
+ WHEN LOWER("{COL_NAME}") IN ('(blank)','(error)','(missing)','(tbd)',
47
+ '(n/a)','(#na)','(none)','(null)','(unknown)') THEN 1
48
+ WHEN LOWER("{COL_NAME}") IN ('[blank]','[error]','[missing]','[tbd]',
49
+ '[n/a]','[#na]','[none]','[null]','[unknown]') THEN 1
50
+ ELSE 0
51
+ END ) AS filled_value_ct,
52
+ LEFT(MIN(NULLIF("{COL_NAME}", '')), 100) AS min_text,
53
+ LEFT(MAX(NULLIF("{COL_NAME}", '')), 100) AS max_text,
54
+ SUM({DATA_QC_SCHEMA}.fndk_isnum(LEFT("{COL_NAME}", 31))) AS numeric_ct,
55
+ SUM({DATA_QC_SCHEMA}.fndk_isdate(LEFT("{COL_NAME}", 26))) AS date_ct,
56
+ CASE
57
+ WHEN SUM( CASE WHEN "{COL_NAME}" ~ '^[0-9]{1,5}[a-zA-Z]?\s\w{1,5}\.?\s?\w*\s?\w*\s[a-zA-Z]{1,6}\.?\s?[0-9]{0,5}[A-Z]{0,1}$'
58
+ THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.8 THEN 'STREET_ADDR'
59
+ WHEN SUM(CASE WHEN "{COL_NAME}" IN ('AL','AK','AS','AZ','AR','CA','CO','CT','DE','DC','FM','FL','GA','GU','HI','ID','IL','IN','IA','KS','KY','LA','ME','MH','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','MP','OH','OK','OR','PW','PA','PR','RI','SC','SD','TN','TX','UT','VT','VI','VA','WA','WV','WI','WY','AE','AP','AA')
60
+ THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'STATE_USA'
61
+ WHEN SUM( CASE WHEN "{COL_NAME}" SIMILAR TO '^([\+]1 |1-|)[\+]?[(]?[0-9]{3}[)][ ]?[-\s\.]?[0-9]{3}[-\s\.]?[0-9]{4,6}$'
62
+ OR "{COL_NAME}" SIMILAR TO '^([\+]1 |1-|)[2-9][01][0-9][-| ]?[0-9]{3}[-| ]?[0-9]{4}$'
63
+ THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'PHONE_USA'
64
+ WHEN SUM( CASE WHEN "{COL_NAME}" ~ '^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$'
65
+ THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'EMAIL'
66
+ WHEN SUM( CASE WHEN TRANSLATE("{COL_NAME}",'012345678','999999999') IN ('99999', '999999999', '99999-9999')
67
+ THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'ZIP_USA'
68
+ WHEN SUM( CASE WHEN "{COL_NAME}" ~ '^[\w\s\-]+(?<!\s)\.(txt|csv|tsv|dat|doc|pdf|xlsx)$'
69
+ THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'FILE_NAME'
70
+ WHEN SUM( CASE WHEN "{COL_NAME}" SIMILAR TO '^([0-9]{4}[- ]){3}[0-9]{4}$'
71
+ THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.8 THEN 'CREDIT_CARD'
72
+ WHEN SUM( CASE WHEN "{COL_NAME}" ~ '^([^,|\t]{1,20}[,|\t]){2,}[^,|\t]{0,20}([,|\t]{0,1}[^,|\t]{0,20})*$'
73
+ AND "{COL_NAME}" !~ '\s(and|but|or|yet)\s'
74
+ THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.8 THEN 'DELIMITED_DATA'
75
+ WHEN SUM ( CASE WHEN "{COL_NAME}" SIMILAR TO '^[0-8][0-9]{2}-[0-9]{2}-[0-9]{4}$'
76
+ AND LEFT("{COL_NAME}", 3) NOT BETWEEN '734' AND '749'
77
+ AND LEFT("{COL_NAME}", 3) <> '666' THEN 1 END)::FLOAT/COUNT("{COL_NAME}")::FLOAT > 0.9 THEN 'SSN'
78
+ END as std_pattern_match,
79
+ strTemplate05_else: NULL as distinct_std_value_ct,
80
+ NULL as zero_length_ct,
81
+ NULL as lead_space_ct,
82
+ NULL as quoted_value_ct,
83
+ NULL as includes_digit_ct,
84
+ NULL as filled_value_ct,
85
+ NULL as min_text,
86
+ NULL as max_text,
87
+ NULL as numeric_ct,
88
+ NULL as date_ct,
89
+ NULL as std_pattern_match,
90
+ strTemplate06_A_patterns: ( SELECT LEFT(STRING_AGG(pattern, ' | ' ORDER BY ct DESC) , 1000) AS concat_pats
91
+ FROM (
92
+ SELECT CAST(COUNT(*) AS VARCHAR(10)) || ' | ' || pattern AS pattern,
93
+ COUNT(*) AS ct
94
+ FROM ( SELECT REGEXP_REPLACE(REGEXP_REPLACE( REGEXP_REPLACE(
95
+ "{COL_NAME}", '[a-z]', 'a', 'g'),
96
+ '[A-Z]', 'A', 'g'),
97
+ '[0-9]', 'N', 'g') AS pattern
98
+ FROM {DATA_SCHEMA}.{DATA_TABLE}
99
+ WHERE "{COL_NAME}" > ' ' AND (SELECT MAX(LENGTH("{COL_NAME}"))
100
+ FROM {DATA_SCHEMA}.{DATA_TABLE}) BETWEEN 3 and {PARM_MAX_PATTERN_LENGTH}) p
101
+ GROUP BY pattern
102
+ HAVING pattern > ' '
103
+ ORDER BY COUNT(*) DESC
104
+ LIMIT 5
105
+ ) ps) AS top_patterns,
106
+ strTemplate06_else: NULL as top_patterns,
107
+ strTemplate07_A_freq: ( SELECT LEFT(STRING_AGG(val, ' | ' ORDER BY ct DESC), 1000) as concat_vals
108
+ FROM (
109
+ SELECT TOP 10 CAST(COUNT(*) as VARCHAR(10)) || ' | ' || "{COL_NAME}" as val,
110
+ COUNT(*) as ct
111
+ FROM {DATA_SCHEMA}.{DATA_TABLE}
112
+ WHERE "{COL_NAME}" > ' '
113
+ GROUP BY "{COL_NAME}"
114
+ HAVING "{COL_NAME}" > ' '
115
+ ORDER BY COUNT(*), "{COL_NAME}" DESC
116
+ ) ps
117
+ ) AS top_freq_values,
118
+ strTemplate07_else: NULL as top_freq_values,
119
+ strTemplate08_N: MIN("{COL_NAME}") AS min_value,
120
+ MIN(CASE WHEN "{COL_NAME}" > 0 THEN "{COL_NAME}" ELSE NULL END) AS min_value_over_0,
121
+ MAX("{COL_NAME}") AS max_value,
122
+ AVG(CAST("{COL_NAME}" AS FLOAT)) AS avg_value,
123
+ STDDEV(CAST("{COL_NAME}" AS FLOAT)) AS stdev_value,
124
+ MIN(pct_25) as percentile_25,
125
+ MIN(pct_50) as percentile_50,
126
+ MIN(pct_75) as percentile_75,
127
+ strTemplate08_else: NULL as min_value,
128
+ NULL as min_value_over_0,
129
+ NULL as max_value,
130
+ NULL as avg_value,
131
+ NULL as stdev_value,
132
+ NULL as percentile_25,
133
+ NULL as percentile_50,
134
+ NULL as percentile_75,
135
+ strTemplate10_N_dec: SUM(ROUND(MOD("{COL_NAME}", 1), 5)) as fractional_sum,
136
+
137
+ strTemplate10_else: NULL as fractional_sum,
138
+
139
+ strTemplate11_D: CASE
140
+ WHEN MIN("{COL_NAME}") IS NULL THEN NULL
141
+ ELSE GREATEST(MIN("{COL_NAME}"), '0001-01-01')
142
+ END as min_date,
143
+ MAX("{COL_NAME}") as max_date,
144
+ SUM(CASE
145
+ WHEN {DATA_QC_SCHEMA}.DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}') > 12 THEN 1
146
+ ELSE 0
147
+ END) AS before_1yr_date_ct,
148
+ SUM(CASE
149
+ WHEN {DATA_QC_SCHEMA}.DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}') > 60 THEN 1
150
+ ELSE 0
151
+ END) AS before_5yr_date_ct,
152
+ SUM(CASE
153
+ WHEN {DATA_QC_SCHEMA}.DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}') > 240 THEN 1
154
+ ELSE 0
155
+ END) AS before_20yr_date_ct,
156
+ SUM(CASE
157
+ WHEN {DATA_QC_SCHEMA}.DATEDIFF('DAY', "{COL_NAME}", '{RUN_DATE}') BETWEEN 0 AND 365 THEN 1
158
+ ELSE 0
159
+ END) AS within_1yr_date_ct,
160
+ SUM(CASE
161
+ WHEN {DATA_QC_SCHEMA}.DATEDIFF('DAY', "{COL_NAME}", '{RUN_DATE}') BETWEEN 0 AND 30 THEN 1
162
+ ELSE 0
163
+ END) AS within_1mo_date_ct,
164
+ SUM(CASE
165
+ WHEN "{COL_NAME}" > '{RUN_DATE}' THEN 1 ELSE 0
166
+ END) AS future_date_ct,
167
+ COUNT(DISTINCT {DATA_QC_SCHEMA}.DATEDIFF('DAY', "{COL_NAME}", '{RUN_DATE}' ) ) as date_days_present,
168
+ COUNT(DISTINCT {DATA_QC_SCHEMA}.DATEDIFF('WEEK', "{COL_NAME}", '{RUN_DATE}' ) ) as date_weeks_present,
169
+ COUNT(DISTINCT {DATA_QC_SCHEMA}.DATEDIFF('MON', "{COL_NAME}", '{RUN_DATE}' ) ) as date_months_present,
170
+
171
+
172
+ strTemplate11_else: NULL as min_date,
173
+ NULL as max_date,
174
+ NULL as before_1yr_date_ct,
175
+ NULL as before_5yr_date_ct,
176
+ NULL as before_20yr_date_ct,
177
+ NULL as within_1yr_date_ct,
178
+ NULL as within_1mo_date_ct,
179
+ NULL as future_date_ct,
180
+ NULL as date_days_present,
181
+ NULL as date_weeks_present,
182
+ NULL as date_months_present,
183
+
184
+ strTemplate12_B: SUM(CAST("{COL_NAME}" AS INTEGER)) AS boolean_true_ct,
185
+
186
+ strTemplate12_else: NULL as boolean_true_ct,
187
+
188
+ strTemplate13_ALL: NULL AS datatype_suggestion,
189
+ strTemplate14_A_do_patterns: ( SELECT COUNT(DISTINCT REGEXP_REPLACE( REGEXP_REPLACE( REGEXP_REPLACE(
190
+ "{COL_NAME}", '[a-z]', 'a', 'g'),
191
+ '[A-Z]', 'A', 'g'),
192
+ '[0-9]', 'N', 'g')
193
+ ) AS pattern_ct
194
+ FROM {DATA_SCHEMA}.{DATA_TABLE}
195
+ WHERE "{COL_NAME}" > ' ' ) AS distinct_pattern_ct,
196
+ SUM(SIGN(LENGTH(TRIM("{COL_NAME}")) - LENGTH(REGEXP_REPLACE(TRIM("{COL_NAME}"), ' ', '', 'g')))::BIGINT) AS embedded_space_ct,
197
+ AVG(LENGTH(TRIM("{COL_NAME}")) - LENGTH(REGEXP_REPLACE(TRIM("{COL_NAME}"), ' ', '', 'g'))::FLOAT) AS avg_embedded_spaces,
198
+
199
+ strTemplate14_A_no_patterns: NULL as distinct_pattern_ct,
200
+ SUM(SIGN(LENGTH(TRIM("{COL_NAME}")) - LENGTH(REGEXP_REPLACE(TRIM("{COL_NAME}"), ' ', '', 'g')))::BIGINT) AS embedded_space_ct,
201
+ AVG(LENGTH(TRIM("{COL_NAME}")) - LENGTH(REGEXP_REPLACE(TRIM("{COL_NAME}"), ' ', '', 'g'))::FLOAT) AS avg_embedded_spaces,
202
+
203
+ strTemplate14_else: NULL as distinct_pattern_ct,
204
+ NULL as embedded_space_ct,
205
+ NULL as avg_embedded_spaces,
206
+
207
+ strTemplate15_ALL: NULL as functional_data_type,
208
+ NULL as functional_table_type,
209
+
210
+ strTemplate16_ALL: " '{PROFILE_RUN_ID}' as profile_run_id"
211
+
212
+ strTemplate98_sampling: ' FROM {DATA_SCHEMA}.{DATA_TABLE} '
213
+
214
+ strTemplate98_else: ' FROM {DATA_SCHEMA}.{DATA_TABLE} '
215
+
216
+ strTemplate99_N: |
217
+ , (SELECT
218
+ PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY "{COL_NAME}") AS pct_25,
219
+ PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY "{COL_NAME}") AS pct_50,
220
+ PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY "{COL_NAME}") AS pct_75
221
+ FROM {DATA_SCHEMA}.{DATA_TABLE} LIMIT 1) pctile
222
+
223
+ strTemplate99_else: ' '
224
+
225
+ strTemplate100_sampling: 'WHERE RAND() <= 1.0 / {PROFILE_SAMPLE_RATIO}'
@@ -0,0 +1,28 @@
1
+ -- Get Freqs for selected columns
2
+ WITH ranked_vals AS (
3
+ SELECT "{COL_NAME}",
4
+ COUNT(*) AS ct,
5
+ ROW_NUMBER() OVER (ORDER BY COUNT(*) DESC, "{COL_NAME}") AS rn
6
+ FROM {DATA_SCHEMA}.{DATA_TABLE}
7
+ WHERE "{COL_NAME}" > ' '
8
+ GROUP BY "{COL_NAME}"
9
+ ),
10
+ consol_vals AS (
11
+ SELECT COALESCE(CASE WHEN rn <= 10 THEN '| ' || "{COL_NAME}" || ' | ' || CAST(ct AS VARCHAR)
12
+ ELSE NULL
13
+ END, '| Other Values (' || CAST(COUNT(DISTINCT "{COL_NAME}") as VARCHAR) || ') | ' || CAST(SUM(ct) as VARCHAR) ) AS val,
14
+ MIN(rn) as min_rn
15
+ FROM ranked_vals
16
+ GROUP BY CASE WHEN rn <= 10 THEN '| ' || "{COL_NAME}" || ' | ' || CAST(ct AS VARCHAR)
17
+ ELSE NULL
18
+ END
19
+ )
20
+ SELECT '{PROJECT_CODE}' as project_code,
21
+ '{DATA_SCHEMA}' as schema_name,
22
+ '{RUN_DATE}' as run_date,
23
+ '{DATA_TABLE}' as table_name,
24
+ '{COL_NAME}' as column_name,
25
+ REPLACE(STRING_AGG(val, '^#^' ORDER BY min_rn), '^#^', CHR(10)) AS top_freq_values,
26
+ ( SELECT MD5(STRING_AGG(DISTINCT "{COL_NAME}", '|' ORDER BY "{COL_NAME}")) as dvh
27
+ FROM {DATA_SCHEMA}.{DATA_TABLE} ) as distinct_value_hash
28
+ FROM consol_vals;
@@ -0,0 +1,157 @@
1
+ CREATE OR REPLACE FUNCTION {DATA_QC_SCHEMA}.DATEDIFF(difftype character varying, firstdate timestamp without time zone, seconddate timestamp without time zone)
2
+ RETURNS BIGINT AS $$
3
+ SELECT
4
+ CASE
5
+ WHEN UPPER(difftype) IN ('DAY', 'DD', 'D') THEN
6
+ DATE(seconddate) - DATE(firstdate)
7
+ WHEN UPPER(difftype) IN ('WEEK','WK', 'W') THEN
8
+ (DATE(seconddate) - DATE(firstdate)) / 7
9
+ WHEN UPPER(difftype) IN ('MON', 'MONTH', 'MM') THEN
10
+ (DATE_PART('year', seconddate) - DATE_PART('year', firstdate)) * 12 + (DATE_PART('month', seconddate) - DATE_PART('month', firstdate))
11
+ WHEN UPPER(difftype) IN ('QUARTER', 'QTR', 'Q') THEN
12
+ ((DATE_PART('year', seconddate) - DATE_PART('year', firstdate)) * 4) + (DATE_PART('quarter', seconddate) - DATE_PART('quarter', firstdate))
13
+ WHEN UPPER(difftype) IN ('YEAR', 'YY', 'Y') THEN
14
+ DATE_PART('year', seconddate) - DATE_PART('year', firstdate)
15
+ ELSE
16
+ NULL::BIGINT
17
+ END;
18
+ $$ LANGUAGE sql IMMUTABLE STRICT;
19
+
20
+ CREATE OR REPLACE FUNCTION {DATA_QC_SCHEMA}.fn_charcount(instring character varying, searchstring character varying) returns bigint
21
+ language plpgsql
22
+ as
23
+ $$
24
+ BEGIN
25
+ RETURN (CHAR_LENGTH(instring) - CHAR_LENGTH(REPLACE(instring, searchstring, ''))) / CHAR_LENGTH(searchstring);
26
+ END;
27
+ $$;
28
+
29
+
30
+ CREATE OR REPLACE FUNCTION {DATA_QC_SCHEMA}.fn_parsefreq(top_freq_values VARCHAR(1000), rowno INTEGER, colno INTEGER) returns VARCHAR(1000)
31
+ language plpgsql
32
+ as
33
+ $$
34
+ BEGIN
35
+ RETURN SPLIT_PART(SPLIT_PART(top_freq_values, CHR(10), rowno), '|', colno+1);
36
+ END;
37
+ $$;
38
+
39
+
40
+ CREATE
41
+ OR REPLACE FUNCTION {DATA_QC_SCHEMA}.fndk_isnum(VARCHAR)
42
+ RETURNS INTEGER
43
+ IMMUTABLE
44
+ AS
45
+ $$
46
+ SELECT CASE
47
+ WHEN $1 ~ E'^\\s*[+-]?\\$?\\s*[0-9]+(,[0-9]{3})*(\\.[0-9]*)?[\\%]?\\s*$' THEN 1
48
+ ELSE 0
49
+ END;
50
+ $$
51
+ LANGUAGE sql;
52
+
53
+
54
+
55
+
56
+
57
+ CREATE
58
+ OR REPLACE FUNCTION {DATA_QC_SCHEMA}.fndk_isdate(VARCHAR)
59
+ RETURNS INTEGER
60
+ IMMUTABLE
61
+ AS $$
62
+ SELECT CASE
63
+ -- YYYY-MM-DD HH:MM:SS SSSSSS or YYYY-MM-DD HH:MM:SS
64
+ WHEN $1 ~ '^(\\d{4})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01])\\s(2[0-3]|[01][0-9]):([0-5][0-9]):([0-5][0-9])(\\s[0-9]{6})?$'
65
+ THEN CASE
66
+ WHEN LEFT($1, 4):: INT BETWEEN 1800 AND 2200
67
+ AND (
68
+ ( SUBSTRING ($1, 6, 2) IN ('01', '03', '05', '07', '08',
69
+ '10', '12')
70
+ AND SUBSTRING ($1, 9, 2):: INT BETWEEN 1 AND 31 )
71
+ OR ( SUBSTRING ($1, 6, 2) IN ('04', '06', '09')
72
+ AND SUBSTRING ($1, 9, 2):: INT BETWEEN 1 AND 30 )
73
+ OR ( SUBSTRING ($1, 6, 2) = '02'
74
+ AND SUBSTRING ($1, 9, 2):: INT :: INT BETWEEN 1 AND 29)
75
+ )
76
+ THEN 1
77
+ ELSE 0
78
+ END
79
+ -- YYYYMMDDHHMMSSSSSS or YYYYMMDD
80
+ WHEN $1 ~ '^(\\d{4})(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])(2[0-3]|[01][0-9])([0-5][0-9])([0-5][0-9])([0-9]{6})$'
81
+ OR $1 ~ '^(\\d{4})(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01])(2[0-3]|[01][0-9])$'
82
+ THEN CASE
83
+ WHEN LEFT($1, 4)::INT BETWEEN 1800 AND 2200
84
+ AND (
85
+ ( SUBSTRING($1, 5, 2) IN ('01', '03', '05', '07', '08',
86
+ '10', '12')
87
+ AND SUBSTRING($1, 7, 2)::INT BETWEEN 1 AND 31 )
88
+ OR ( SUBSTRING($1, 5, 2) IN ('04', '06', '09')
89
+ AND SUBSTRING($1, 7, 2)::INT BETWEEN 1 AND 30 )
90
+ OR ( SUBSTRING($1, 5, 2) = '02'
91
+ AND SUBSTRING($1, 7, 2)::INT::INT BETWEEN 1 AND 29)
92
+ )
93
+ THEN 1
94
+ ELSE 0
95
+ END
96
+ -- Exclude anything else long
97
+ WHEN LENGTH($1) > 11 THEN 0
98
+ -- YYYY-MMM/MM-DD
99
+ WHEN REGEXP_REPLACE(UPPER($1), '(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)', '12', 'g')
100
+ ~ '[12][09][0-9][0-9]-[0-1]?[0-9]-[0-3]?[0-9]'
101
+ THEN CASE
102
+ WHEN SPLIT_PART($1, '-', 1)::INT BETWEEN 1800 AND 2200
103
+ AND (
104
+ ( UPPER(SPLIT_PART($1, '-', 2)) IN ('01', '03', '05', '07', '08',
105
+ '1', '3', '5', '7', '8', '10', '12',
106
+ 'JAN', 'MAR', 'MAY', 'JUL', 'AUG',
107
+ 'OCT', 'DEC')
108
+ AND SPLIT_PART($1, '-', 3)::INT BETWEEN 1 AND 31 )
109
+ OR ( UPPER(SPLIT_PART($1, '-', 2)) IN ('04', '06', '09', '4', '6', '9', '11',
110
+ 'APR', 'JUN', 'SEP', 'NOV')
111
+ AND SPLIT_PART($1, '-', 3)::INT BETWEEN 1 AND 30 )
112
+ OR ( UPPER(SPLIT_PART($1, '-', 2)) IN ('02', '2', 'FEB')
113
+ AND SPLIT_PART($1, '-', 3)::INT BETWEEN 1 AND 29)
114
+ )
115
+ THEN 1
116
+ ELSE 0
117
+ END
118
+ -- MM/-DD/-YY/YYYY
119
+ WHEN REPLACE($1, '-', '/') ~ '^[0-1]?[0-9]/[0-3]?[0-9]/[12][09][0-9][0-9]$'
120
+ OR REPLACE($1, '-', '/') ~ '^[0-1]?[0-9]/[0-3]?[0-9]/[0-9][0-9]$'
121
+ THEN
122
+ CASE
123
+ WHEN SPLIT_PART(REPLACE($1, '-', '/'), '/', 1)::INT BETWEEN 1 AND 12
124
+ AND (
125
+ ( SPLIT_PART(REPLACE($1, '-', '/'), '/', 1)::INT IN (1, 3, 5, 7, 8, 10, 12)
126
+ AND SPLIT_PART(REPLACE($1, '-', '/'), '/', 2)::INT BETWEEN 1 AND 31 )
127
+ OR ( SPLIT_PART(REPLACE($1, '-', '/'), '/', 1)::INT IN (4, 6, 9, 11)
128
+ AND SPLIT_PART(REPLACE($1, '-', '/'), '/', 2)::INT BETWEEN 1 AND 30 )
129
+ OR ( SPLIT_PART(REPLACE($1, '-', '/'), '/', 1)::INT = 2
130
+ AND SPLIT_PART(REPLACE($1, '-', '/'), '/', 2)::INT BETWEEN 1 AND 29)
131
+ )
132
+ AND
133
+ ('20' || RIGHT(SPLIT_PART(REPLACE($1, '-', '/'), '/', 3), 2))::INT BETWEEN 1800 AND 2200
134
+ THEN 1
135
+ ELSE 0
136
+ END
137
+ -- DD-MMM-YYYY
138
+ WHEN UPPER($1) ~ '[0-3]?[0-9]-(JAN|FEB|MAR|APR|MAY|JUN|JUL|AUG|SEP|OCT|NOV|DEC)-[12][09][0-9][0-9]'
139
+ THEN
140
+ CASE
141
+ WHEN SPLIT_PART($1, '-', 3)::INT BETWEEN 1800 AND 2200
142
+ AND (
143
+ ( UPPER(SPLIT_PART($1, '-', 2)) IN ('JAN', 'MAR', 'MAY', 'JUL', 'AUG', 'OCT', 'DEC')
144
+ AND SPLIT_PART($1, '-', 1)::INT BETWEEN 1 AND 31 )
145
+ OR ( UPPER(SPLIT_PART($1, '-', 2)) IN ('APR', 'JUN', 'SEP', 'NOV')
146
+ AND SPLIT_PART($1, '-', 1)::INT BETWEEN 1 AND 30 )
147
+ OR ( UPPER(SPLIT_PART($1, '-', 2)) = 'FEB'
148
+ AND SPLIT_PART($1, '-', 1)::INT BETWEEN 1 AND 29)
149
+ )
150
+ THEN 1
151
+ ELSE 0
152
+ END
153
+ ELSE 0
154
+ END
155
+ as isdate
156
+ $$
157
+ LANGUAGE sql;
@@ -0,0 +1 @@
1
+ CREATE SCHEMA IF NOT exists {DATA_QC_SCHEMA};
@@ -0,0 +1,2 @@
1
+ GRANT ALL PRIVILEGES ON SCHEMA {DATA_QC_SCHEMA} TO {DB_USER};
2
+ GRANT EXECUTE ON ALL FUNCTIONS IN SCHEMA {DATA_QC_SCHEMA} TO {DB_USER};
@@ -0,0 +1,38 @@
1
+ SELECT '{PROJECT_CODE}' as project_code,
2
+ CURRENT_TIMESTAMP AT TIME ZONE 'UTC' as refresh_timestamp,
3
+ c.table_schema,
4
+ c.table_name,
5
+ c.column_name,
6
+ CASE
7
+ WHEN c.data_type = 'timestamp without time zone' THEN 'timestamp'
8
+ WHEN c.data_type = 'character varying'
9
+ THEN 'varchar(' || CAST(c.character_maximum_length AS VARCHAR) || ')'
10
+ WHEN c.data_type = 'character' THEN 'char(' || CAST(c.character_maximum_length AS VARCHAR) || ')'
11
+ WHEN c.data_type = 'numeric' THEN 'numeric'
12
+ || COALESCE( '(' || CAST(c.numeric_precision AS VARCHAR) || ','
13
+ || CAST(c.numeric_scale AS VARCHAR) || ')', '')
14
+ ELSE c.data_type END AS data_type,
15
+ c.character_maximum_length,
16
+ c.ordinal_position,
17
+ CASE
18
+ WHEN c.data_type ILIKE '%char%'
19
+ THEN 'A'
20
+ WHEN c.data_type ILIKE 'boolean'
21
+ THEN 'B'
22
+ WHEN c.data_type ILIKE 'date'
23
+ OR c.data_type ILIKE 'timestamp%'
24
+ THEN 'D'
25
+ WHEN c.data_type ILIKE 'time without time zone'
26
+ THEN 'T'
27
+ WHEN LOWER(c.data_type) IN ('bigint', 'double precision', 'integer', 'smallint', 'real')
28
+ OR c.data_type ILIKE 'numeric%'
29
+ THEN 'N'
30
+ ELSE
31
+ 'X' END AS general_type,
32
+ CASE
33
+ WHEN c.data_type = 'numeric' THEN COALESCE(numeric_scale, 1) > 0
34
+ ELSE numeric_scale > 0
35
+ END as is_decimal
36
+ FROM information_schema.columns c
37
+ WHERE c.table_schema = '{DATA_SCHEMA}' {TABLE_CRITERIA}
38
+ ORDER BY c.table_schema, c.table_name, c.ordinal_position