quollio-core 0.4.9__tar.gz → 0.4.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. {quollio_core-0.4.9 → quollio_core-0.4.11}/PKG-INFO +1 -1
  2. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/__init__.py +1 -1
  3. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/bigquery.py +10 -1
  4. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/bricks.py +2 -3
  5. quollio_core-0.4.11/quollio_core/helper/log.py +17 -0
  6. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/profilers/databricks.py +35 -32
  7. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/profilers/redshift.py +41 -74
  8. quollio_core-0.4.11/quollio_core/profilers/snowflake.py +225 -0
  9. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/profilers/sqllineage.py +0 -1
  10. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/redshift.py +3 -5
  11. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/repository/databricks.py +3 -3
  12. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/repository/dbt.py +0 -1
  13. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/repository/qdc.py +0 -3
  14. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/repository/redshift.py +0 -1
  15. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/repository/snowflake.py +6 -1
  16. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/snowflake.py +4 -5
  17. quollio_core-0.4.9/quollio_core/profilers/snowflake.py +0 -256
  18. {quollio_core-0.4.9 → quollio_core-0.4.11}/LICENSE +0 -0
  19. {quollio_core-0.4.9 → quollio_core-0.4.11}/README.md +0 -0
  20. {quollio_core-0.4.9 → quollio_core-0.4.11}/pyproject.toml +0 -0
  21. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/databricks/.gitignore +0 -0
  22. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/databricks/README.md +0 -0
  23. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/databricks/analyses/.gitkeep +0 -0
  24. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/databricks/dbt_project.yml +0 -0
  25. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/databricks/macros/.gitkeep +0 -0
  26. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/databricks/models/quollio_lineage_column_level.sql +0 -0
  27. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/databricks/models/quollio_lineage_column_level.yml +0 -0
  28. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/databricks/models/quollio_lineage_table_level.sql +0 -0
  29. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/databricks/models/quollio_lineage_table_level.yml +0 -0
  30. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/databricks/models/sources.yml +0 -0
  31. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/databricks/package-lock.yml +0 -0
  32. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/databricks/packages.yml +0 -0
  33. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/databricks/profiles/profiles_template.yml +0 -0
  34. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/databricks/seeds/.gitkeep +0 -0
  35. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/databricks/snapshots/.gitkeep +0 -0
  36. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/redshift/README.md +0 -0
  37. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/redshift/analyses/.gitkeep +0 -0
  38. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/redshift/dbt_project.yml +0 -0
  39. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/redshift/macros/.gitkeep +0 -0
  40. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/redshift/macros/materialization/divided_view.sql +0 -0
  41. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/redshift/models/quollio_lineage_table_level.sql +0 -0
  42. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/redshift/models/quollio_lineage_table_level.yml +0 -0
  43. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/redshift/models/quollio_lineage_view_level.sql +0 -0
  44. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/redshift/models/quollio_lineage_view_level.yml +0 -0
  45. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/redshift/models/quollio_sqllineage_sources.sql +0 -0
  46. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/redshift/models/quollio_sqllineage_sources.yml +0 -0
  47. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/redshift/models/quollio_stats_columns.sql +0 -0
  48. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/redshift/models/quollio_stats_columns.yml +0 -0
  49. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/redshift/models/quollio_stats_profiling_columns.sql +0 -0
  50. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/redshift/models/quollio_stats_profiling_columns.yml +0 -0
  51. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/redshift/models/sources.yml +0 -0
  52. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/redshift/package-lock.yml +0 -0
  53. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/redshift/packages.yml +0 -0
  54. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/redshift/profiles/profiles_template.yml +0 -0
  55. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/redshift/seeds/.gitkeep +0 -0
  56. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/redshift/snapshots/.gitkeep +0 -0
  57. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/seeds/.gitkeep +0 -0
  58. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/snowflake/README.md +0 -0
  59. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/snowflake/analyses/.gitkeep +0 -0
  60. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/snowflake/dbt_project.yml +0 -0
  61. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/snowflake/macros/.gitkeep +0 -0
  62. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/snowflake/macros/materialization/divided_view.sql +0 -0
  63. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_column_level.sql +0 -0
  64. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_column_level.yml +0 -0
  65. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_table_level.sql +0 -0
  66. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_table_level.yml +0 -0
  67. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/snowflake/models/quollio_sqllineage_sources.sql +0 -0
  68. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/snowflake/models/quollio_sqllineage_sources.yml +0 -0
  69. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.sql +0 -0
  70. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.yml +0 -0
  71. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.sql +0 -0
  72. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.yml +0 -0
  73. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/snowflake/models/sources.yml +0 -0
  74. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/snowflake/package-lock.yml +0 -0
  75. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/snowflake/packages.yml +0 -0
  76. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/snowflake/profiles/profiles_template.yml +0 -0
  77. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/snowflake/seeds/.gitkeep +0 -0
  78. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/dbt_projects/snowflake/snapshots/.gitkeep +0 -0
  79. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/helper/__init__.py +0 -0
  80. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/helper/core.py +0 -0
  81. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/helper/env_default.py +0 -0
  82. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/profilers/__init__.py +0 -0
  83. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/profilers/bigquery.py +0 -0
  84. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/profilers/lineage.py +0 -0
  85. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/profilers/stats.py +0 -0
  86. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/repository/__init__.py +0 -0
  87. {quollio_core-0.4.9 → quollio_core-0.4.11}/quollio_core/repository/bigquery.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: quollio-core
3
- Version: 0.4.9
3
+ Version: 0.4.11
4
4
  Summary: Quollio Core
5
5
  Author-email: quollio-dev <qt.dev@quollio.com>
6
6
  Maintainer-email: RyoAriyama <ryo.arym@gmail.com>, tharuta <35373297+TakumiHaruta@users.noreply.github.com>
@@ -1,4 +1,4 @@
1
1
  """Quollio Core"""
2
2
 
3
- __version__ = "0.4.9"
3
+ __version__ = "0.4.11"
4
4
  __author__ = "Quollio Technologies, Inc"
@@ -3,6 +3,7 @@ import json
3
3
  import logging
4
4
 
5
5
  from quollio_core.helper.env_default import env_default
6
+ from quollio_core.helper.log import set_log_level
6
7
  from quollio_core.profilers.bigquery import bigquery_table_lineage
7
8
  from quollio_core.repository import qdc
8
9
  from quollio_core.repository.bigquery import get_credentials, get_org_id
@@ -88,14 +89,22 @@ if __name__ == "__main__":
88
89
  help="GCP regions where the data is located. Multiple regions can be provided separated by space.",
89
90
  nargs="+",
90
91
  )
92
+ parser.add_argument(
93
+ "--log_level",
94
+ type=str,
95
+ choices=["debug", "info", "warn", "error", "none"],
96
+ action=env_default("LOG_LEVEL"),
97
+ required=False,
98
+ help="The log level for dbt commands. Default value is info",
99
+ )
91
100
 
92
101
  args = parser.parse_args()
102
+ set_log_level(level=args.log_level)
93
103
 
94
104
  if len(args.commands) == 0:
95
105
  raise ValueError("No command is provided")
96
106
 
97
107
  if "load_lineage" in args.commands:
98
-
99
108
  qdc_client = qdc.QDCExternalAPIClient(
100
109
  base_url=args.api_url, client_id=args.client_id, client_secret=args.client_secret
101
110
  )
@@ -4,6 +4,7 @@ import os
4
4
 
5
5
  from quollio_core.helper.core import setup_dbt_profile, trim_prefix
6
6
  from quollio_core.helper.env_default import env_default
7
+ from quollio_core.helper.log import set_log_level
7
8
  from quollio_core.profilers.databricks import (
8
9
  databricks_column_level_lineage,
9
10
  databricks_column_stats,
@@ -20,7 +21,6 @@ def build_view(
20
21
  target_tables: str = "",
21
22
  log_level: str = "info",
22
23
  ) -> None:
23
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
24
24
 
25
25
  logger.info("Build profiler views using dbt")
26
26
  # set parameters
@@ -64,7 +64,6 @@ def load_lineage(
64
64
  tenant_id: str,
65
65
  enable_column_lineage: bool = False,
66
66
  ) -> None:
67
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
68
67
 
69
68
  logger.info("Generate Databricks table to table lineage.")
70
69
  databricks_table_level_lineage(
@@ -99,7 +98,6 @@ def load_column_stats(
99
98
  qdc_client: qdc.QDCExternalAPIClient,
100
99
  tenant_id: str,
101
100
  ) -> None:
102
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
103
101
 
104
102
  logger.info("Generate Databricks column stats.")
105
103
  databricks_column_stats(
@@ -243,6 +241,7 @@ if __name__ == "__main__":
243
241
  )
244
242
 
245
243
  args = parser.parse_args()
244
+ set_log_level(level=args.log_level)
246
245
 
247
246
  conn = db.DatabricksConnectionConfig(
248
247
  # MEMO: Metadata agent allows the string 'https://' as a host name but is not allowed by intelligence agent.
@@ -0,0 +1,17 @@
1
+ import logging
2
+
3
+
4
+ def set_log_level(level: str = "info") -> None:
5
+ fmt = "%(asctime)s - %(levelname)s - %(name)s - %(message)s"
6
+ if level == "info":
7
+ logging.basicConfig(level=logging.INFO, format=fmt)
8
+ elif level == "debug":
9
+ logging.basicConfig(level=logging.DEBUG, format=fmt)
10
+ elif level == "warn":
11
+ logging.basicConfig(level=logging.WARNING, format=fmt)
12
+ elif level == "error":
13
+ logging.basicConfig(level=logging.ERROR, format=fmt)
14
+ elif level == "critical":
15
+ logging.basicConfig(level=logging.CRITICAL, format=fmt)
16
+ else:
17
+ logging.basicConfig(level=logging.NOTSET, format=fmt)
@@ -19,7 +19,6 @@ def databricks_table_level_lineage(
19
19
  tenant_id: str,
20
20
  dbt_table_name: str = "quollio_lineage_table_level",
21
21
  ) -> None:
22
- logging.basicConfig(level=logging.info, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
23
22
  with databricks.DatabricksQueryExecutor(config=conn) as databricks_executor:
24
23
  results = databricks_executor.get_query_results(
25
24
  query=f"""
@@ -61,7 +60,6 @@ def databricks_column_level_lineage(
61
60
  tenant_id: str,
62
61
  dbt_table_name: str = "quollio_lineage_column_level",
63
62
  ) -> None:
64
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
65
63
  with databricks.DatabricksQueryExecutor(config=conn) as databricks_executor:
66
64
  results = databricks_executor.get_query_results(
67
65
  query=f"""
@@ -140,42 +138,47 @@ def _get_column_stats(
140
138
  raise ValueError(f"Invalid table name: {table['table_fqdn']}")
141
139
  with databricks.DatabricksQueryExecutor(config=conn) as databricks_executor:
142
140
  query = """
143
- SELECT
144
- "{monitored_table_catalog}" as DB_NAME,
145
- "{monitored_table_schema}" as SCHEMA_NAME,
146
- "{monitored_table_name}" as TABLE_NAME,
147
- t.COLUMN_NAME,
148
- t.DATA_TYPE,
149
- t.distinct_count as CARDINALITY,
150
- t.MAX as MAX_VALUE,
151
- t.MIN as MIN_VALUE,
152
- t.AVG as AVG_VALUE,
153
- t.MEDIAN as MEDIAN_VALUE,
154
- t.STDDEV as STDDEV_VALUE,
155
- t.NUM_NULLS as NULL_COUNT,
156
- t.frequent_items[0].item AS MODE_VALUE,
157
- MAX(t.window) AS LATEST
158
- FROM
159
- {monitoring_table} t
160
- WHERE
161
- t.column_name not in (':table')
162
- GROUP BY
163
- t.COLUMN_NAME,
164
- t.DATA_TYPE,
165
- t.distinct_count,
166
- t.MAX,
167
- t.MIN,
168
- t.AVG,
169
- t.MEDIAN,
170
- t.STDDEV,
171
- t.NUM_NULLS,
172
- t.frequent_items
141
+ WITH profile_record_history AS (
142
+ SELECT
143
+ COLUMN_NAME
144
+ , distinct_count as CARDINALITY
145
+ , MAX as MAX_VALUE
146
+ , MIN as MIN_VALUE
147
+ , AVG as AVG_VALUE
148
+ , MEDIAN as MEDIAN_VALUE
149
+ , STDDEV as STDDEV_VALUE
150
+ , NUM_NULLS as NULL_COUNT
151
+ , frequent_items[0].item AS MODE_VALUE
152
+ , row_number() over(partition by column_name order by window desc) rownum
153
+ FROM
154
+ {monitoring_table}
155
+ WHERE
156
+ column_name not in (':table')
157
+ )
158
+ SELECT
159
+ "{monitored_table_catalog}" as DB_NAME
160
+ , "{monitored_table_schema}" as SCHEMA_NAME
161
+ , "{monitored_table_name}" as TABLE_NAME
162
+ , COLUMN_NAME
163
+ , CARDINALITY
164
+ , MAX_VALUE
165
+ , MIN_VALUE
166
+ , AVG_VALUE
167
+ , MEDIAN_VALUE
168
+ , STDDEV_VALUE
169
+ , NULL_COUNT
170
+ , MODE_VALUE
171
+ FROM
172
+ profile_record_history
173
+ WHERE
174
+ rownum = 1
173
175
  """.format(
174
176
  monitoring_table=table["table_fqdn"],
175
177
  monitored_table_catalog=monitored_table[0],
176
178
  monitored_table_schema=monitored_table[1],
177
179
  monitored_table_name=monitored_table[2],
178
180
  )
181
+ logger.debug(f"The following sql will be fetched to retrieve stats values. {query}")
179
182
  stats.append(databricks_executor.get_query_results(query))
180
183
  return stats
181
184
 
@@ -14,7 +14,6 @@ def redshift_table_level_lineage(
14
14
  tenant_id: str,
15
15
  dbt_table_name: str,
16
16
  ) -> None:
17
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
18
17
  with redshift.RedshiftQueryExecutor(config=conn) as redshift_executor:
19
18
  results = redshift_executor.get_query_results(
20
19
  query="""
@@ -55,22 +54,7 @@ def redshift_table_level_lineage(
55
54
  return
56
55
 
57
56
 
58
- def _get_target_tables_query(db: str, schema: str) -> str:
59
- query = """
60
- SELECT
61
- DISTINCT
62
- database_name
63
- , schema_name
64
- , table_name
65
- FROM
66
- {db}.{schema}.quollio_stats_profiling_columns
67
- """.format(
68
- db=db, schema=schema
69
- )
70
- return query
71
-
72
-
73
- def _get_stats_tables_query(db: str, schema: str) -> str:
57
+ def _gen_get_stats_views_query(db: str, schema: str) -> str:
74
58
  query = """
75
59
  SELECT
76
60
  DISTINCT
@@ -93,70 +77,54 @@ def redshift_table_stats(
93
77
  qdc_client: qdc.QDCExternalAPIClient,
94
78
  tenant_id: str,
95
79
  ) -> None:
96
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
97
80
 
98
81
  with redshift.RedshiftQueryExecutor(config=conn) as redshift_executor:
99
- req_count = 0
100
- target_query = _get_target_tables_query(
82
+ stats_query = _gen_get_stats_views_query(
101
83
  db=conn.database,
102
84
  schema=conn.schema,
103
85
  )
104
- target_assets = redshift_executor.get_query_results(query=target_query)
86
+ stats_views = redshift_executor.get_query_results(query=stats_query)
105
87
 
106
- stats_query = _get_stats_tables_query(
107
- db=conn.database,
108
- schema=conn.schema,
109
- )
110
- stats_columns = redshift_executor.get_query_results(query=stats_query)
111
- for target_asset in target_assets:
112
- for stats_column in stats_columns:
113
- stats_query = """
114
- SELECT
115
- db_name
116
- , schema_name
117
- , table_name
118
- , column_name
119
- , max_value
120
- , min_value
121
- , null_count
122
- , cardinality
123
- , avg_value
124
- , median_value
125
- , mode_value
126
- , stddev_value
127
- FROM
128
- {db}.{schema}.{table}
129
- WHERE
130
- db_name = '{target_db}'
131
- and schema_name = '{target_schema}'
132
- and table_name = '{target_table}'
133
- """.format(
134
- db=stats_column[0],
135
- schema=stats_column[1],
136
- table=stats_column[2],
137
- target_db=target_asset[0],
138
- target_schema=target_asset[1],
139
- target_table=target_asset[2],
88
+ req_count = 0
89
+ for stats_view in stats_views:
90
+ stats_query = """
91
+ SELECT
92
+ db_name
93
+ , schema_name
94
+ , table_name
95
+ , column_name
96
+ , max_value
97
+ , min_value
98
+ , null_count
99
+ , cardinality
100
+ , avg_value
101
+ , median_value
102
+ , mode_value
103
+ , stddev_value
104
+ FROM
105
+ {db}.{schema}.{table}
106
+ """.format(
107
+ db=stats_view[0],
108
+ schema=stats_view[1],
109
+ table=stats_view[2],
110
+ )
111
+ stats_result = redshift_executor.get_query_results(query=stats_query)
112
+ payloads = gen_table_stats_payload_from_tuple(tenant_id=tenant_id, endpoint=conn.host, stats=stats_result)
113
+ for payload in payloads:
114
+ logger.info(
115
+ "Generating table stats. asset: {db} -> {schema} -> {table} -> {column}".format(
116
+ db=payload.db,
117
+ schema=payload.schema,
118
+ table=payload.table,
119
+ column=payload.column,
120
+ )
140
121
  )
141
- stats_result = redshift_executor.get_query_results(query=stats_query)
142
- payloads = gen_table_stats_payload_from_tuple(
143
- tenant_id=tenant_id, endpoint=conn.host, stats=stats_result
122
+ status_code = qdc_client.update_stats_by_id(
123
+ global_id=payload.global_id,
124
+ payload=payload.body.get_column_stats(),
144
125
  )
145
- for payload in payloads:
146
- logger.info(
147
- "Generating table stats. asset: {db} -> {schema} -> {table} -> {column}".format(
148
- db=payload.db,
149
- schema=payload.schema,
150
- table=payload.table,
151
- column=payload.column,
152
- )
153
- )
154
- status_code = qdc_client.update_stats_by_id(
155
- global_id=payload.global_id,
156
- payload=payload.body.get_column_stats(),
157
- )
158
- if status_code == 200:
159
- req_count += 1
126
+ if status_code == 200:
127
+ req_count += 1
160
128
  logger.info(f"Generating table stats is finished. {req_count} stats are ingested.")
161
129
  return
162
130
 
@@ -166,7 +134,6 @@ def redshift_table_level_sqllineage(
166
134
  qdc_client: qdc.QDCExternalAPIClient,
167
135
  tenant_id: str,
168
136
  ) -> None:
169
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
170
137
  redshift_connector = redshift.RedshiftQueryExecutor(conn)
171
138
  results = redshift_connector.get_query_results(
172
139
  query="""
@@ -0,0 +1,225 @@
1
+ import logging
2
+
3
+ from quollio_core.profilers.lineage import (
4
+ gen_column_lineage_payload,
5
+ gen_table_lineage_payload,
6
+ parse_snowflake_results,
7
+ )
8
+ from quollio_core.profilers.sqllineage import SQLLineage
9
+ from quollio_core.profilers.stats import gen_table_stats_payload
10
+ from quollio_core.repository import qdc, snowflake
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def snowflake_table_to_table_lineage(
16
+ conn: snowflake.SnowflakeConnectionConfig,
17
+ qdc_client: qdc.QDCExternalAPIClient,
18
+ tenant_id: str,
19
+ ) -> None:
20
+ with snowflake.SnowflakeQueryExecutor(conn) as sf_executor:
21
+ results = sf_executor.get_query_results(
22
+ query="""
23
+ SELECT
24
+ *
25
+ FROM
26
+ {db}.{schema}.QUOLLIO_LINEAGE_TABLE_LEVEL
27
+ """.format(
28
+ db=conn.account_database,
29
+ schema=conn.account_schema,
30
+ )
31
+ )
32
+ parsed_results = parse_snowflake_results(results=results)
33
+ update_table_lineage_inputs = gen_table_lineage_payload(
34
+ tenant_id=tenant_id,
35
+ endpoint=conn.account_id,
36
+ tables=parsed_results,
37
+ )
38
+
39
+ req_count = 0
40
+ for update_table_lineage_input in update_table_lineage_inputs:
41
+ logger.info(
42
+ "Generating table lineage. downstream: {db} -> {schema} -> {table}".format(
43
+ db=update_table_lineage_input.downstream_database_name,
44
+ schema=update_table_lineage_input.downstream_schema_name,
45
+ table=update_table_lineage_input.downstream_table_name,
46
+ )
47
+ )
48
+ status_code = qdc_client.update_lineage_by_id(
49
+ global_id=update_table_lineage_input.downstream_global_id,
50
+ payload=update_table_lineage_input.upstreams.as_dict(),
51
+ )
52
+ if status_code == 200:
53
+ req_count += 1
54
+ logger.info(f"Generating table lineage is finished. {req_count} lineages are ingested.")
55
+ return
56
+
57
+
58
+ def snowflake_column_to_column_lineage(
59
+ conn: snowflake.SnowflakeConnectionConfig,
60
+ qdc_client: qdc.QDCExternalAPIClient,
61
+ tenant_id: str,
62
+ ) -> None:
63
+ with snowflake.SnowflakeQueryExecutor(conn) as sf_executor:
64
+ results = sf_executor.get_query_results(
65
+ query="""
66
+ SELECT
67
+ *
68
+ FROM
69
+ {db}.{schema}.QUOLLIO_LINEAGE_COLUMN_LEVEL
70
+ """.format(
71
+ db=conn.account_database,
72
+ schema=conn.account_schema,
73
+ )
74
+ )
75
+ update_column_lineage_inputs = gen_column_lineage_payload(
76
+ tenant_id=tenant_id,
77
+ endpoint=conn.account_id,
78
+ columns=results,
79
+ )
80
+
81
+ req_count = 0
82
+ for update_column_lineage_input in update_column_lineage_inputs:
83
+ logger.info(
84
+ "Generating column lineage. downstream: {db} -> {schema} -> {table} -> {column}".format(
85
+ db=update_column_lineage_input.downstream_database_name,
86
+ schema=update_column_lineage_input.downstream_schema_name,
87
+ table=update_column_lineage_input.downstream_table_name,
88
+ column=update_column_lineage_input.downstream_column_name,
89
+ )
90
+ )
91
+ status_code = qdc_client.update_lineage_by_id(
92
+ global_id=update_column_lineage_input.downstream_global_id,
93
+ payload=update_column_lineage_input.upstreams.as_dict(),
94
+ )
95
+ if status_code == 200:
96
+ req_count += 1
97
+ logger.info(f"Generating column lineage is finished. {req_count} lineages are ingested.")
98
+ return
99
+
100
+
101
+ def snowflake_table_level_sqllineage(
102
+ conn: snowflake.SnowflakeConnectionConfig,
103
+ qdc_client: qdc.QDCExternalAPIClient,
104
+ tenant_id: str,
105
+ ) -> None:
106
+ with snowflake.SnowflakeQueryExecutor(conn) as sf_executor:
107
+ results = sf_executor.get_query_results(
108
+ query="""
109
+ SELECT
110
+ database_name
111
+ , schema_name
112
+ , query_text
113
+ FROM
114
+ {db}.{schema}.QUOLLIO_SQLLINEAGE_SOURCES
115
+ """.format(
116
+ db=conn.account_database,
117
+ schema=conn.account_schema,
118
+ )
119
+ )
120
+ update_table_lineage_inputs_list = list()
121
+ sql_lineage = SQLLineage()
122
+ for result in results:
123
+ src_tables, dest_table = sql_lineage.get_table_level_lineage_source(
124
+ sql=result["QUERY_TEXT"],
125
+ dialect="snowflake",
126
+ dest_db=result["DATABASE_NAME"],
127
+ dest_schema=result["SCHEMA_NAME"],
128
+ )
129
+ update_table_lineage_inputs = sql_lineage.gen_lineage_input(
130
+ tenant_id=tenant_id, endpoint=conn.account_id, src_tables=src_tables, dest_table=dest_table
131
+ )
132
+ update_table_lineage_inputs_list.append(update_table_lineage_inputs)
133
+
134
+ req_count = 0
135
+ for update_table_lineage_input in update_table_lineage_inputs_list:
136
+ logger.info(
137
+ "Generating table lineage. downstream: {db} -> {schema} -> {table}".format(
138
+ db=update_table_lineage_input.downstream_database_name,
139
+ schema=update_table_lineage_input.downstream_schema_name,
140
+ table=update_table_lineage_input.downstream_table_name,
141
+ )
142
+ )
143
+ status_code = qdc_client.update_lineage_by_id(
144
+ global_id=update_table_lineage_input.downstream_global_id,
145
+ payload=update_table_lineage_input.upstreams.as_dict(),
146
+ )
147
+ if status_code == 200:
148
+ req_count += 1
149
+ logger.info(f"Generating table lineage is finished. {req_count} lineages are ingested.")
150
+ return
151
+
152
+
153
+ def snowflake_table_stats(
154
+ conn: snowflake.SnowflakeConnectionConfig,
155
+ qdc_client: qdc.QDCExternalAPIClient,
156
+ tenant_id: str,
157
+ ) -> None:
158
+ with snowflake.SnowflakeQueryExecutor(conn) as sf_executor:
159
+ stats_query = _gen_get_stats_views_query(
160
+ db=conn.account_database,
161
+ schema=conn.account_schema,
162
+ )
163
+ stats_views = sf_executor.get_query_results(query=stats_query)
164
+
165
+ req_count = 0
166
+ for stats_view in stats_views:
167
+ stats_query = """
168
+ SELECT
169
+ db_name
170
+ , schema_name
171
+ , table_name
172
+ , column_name
173
+ , max_value
174
+ , min_value
175
+ , null_count
176
+ , cardinality
177
+ , avg_value
178
+ , median_value
179
+ , mode_value
180
+ , stddev_value
181
+ FROM
182
+ {db}.{schema}.{table}
183
+ """.format(
184
+ db=stats_view["TABLE_CATALOG"],
185
+ schema=stats_view["TABLE_SCHEMA"],
186
+ table=stats_view["TABLE_NAME"],
187
+ )
188
+ logger.debug(f"The following sql will be fetched to retrieve stats values. {stats_query}")
189
+ stats_result = sf_executor.get_query_results(query=stats_query)
190
+ payloads = gen_table_stats_payload(tenant_id=tenant_id, endpoint=conn.account_id, stats=stats_result)
191
+ for payload in payloads:
192
+ logger.info(
193
+ "Generating table stats. asset: {db} -> {schema} -> {table} -> {column}".format(
194
+ db=payload.db,
195
+ schema=payload.schema,
196
+ table=payload.table,
197
+ column=payload.column,
198
+ )
199
+ )
200
+ status_code = qdc_client.update_stats_by_id(
201
+ global_id=payload.global_id,
202
+ payload=payload.body.get_column_stats(),
203
+ )
204
+ if status_code == 200:
205
+ req_count += 1
206
+ logger.info(f"Generating table stats is finished. {req_count} stats are ingested.")
207
+ return
208
+
209
+
210
+ def _gen_get_stats_views_query(db: str, schema: str) -> str:
211
+ query = """
212
+ SELECT
213
+ DISTINCT
214
+ TABLE_CATALOG
215
+ , TABLE_SCHEMA
216
+ , TABLE_NAME
217
+ FROM
218
+ {db}.INFORMATION_SCHEMA.TABLES
219
+ WHERE
220
+ startswith(TABLE_NAME, 'QUOLLIO_STATS_COLUMNS_')
221
+ AND TABLE_SCHEMA = UPPER('{schema}')
222
+ """.format(
223
+ db=db, schema=schema
224
+ )
225
+ return query
@@ -54,7 +54,6 @@ class SQLLineage:
54
54
  dest_db: str = None,
55
55
  dest_schema: str = None,
56
56
  ) -> Tuple[Set[Table], Table]:
57
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
58
57
  try:
59
58
  statement: sqlglot.Expression = sqlglot.parse_one(sql=sql, error_level=sqlglot.ErrorLevel.RAISE)
60
59
  except ParseError as e:
@@ -4,6 +4,7 @@ import os
4
4
 
5
5
  from quollio_core.helper.core import setup_dbt_profile
6
6
  from quollio_core.helper.env_default import env_default
7
+ from quollio_core.helper.log import set_log_level
7
8
  from quollio_core.profilers.redshift import (
8
9
  redshift_table_level_lineage,
9
10
  redshift_table_level_sqllineage,
@@ -20,8 +21,6 @@ def build_view(
20
21
  target_tables: str = "",
21
22
  log_level: str = "info",
22
23
  ) -> None:
23
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
24
-
25
24
  logger.info("Build profiler views using dbt")
26
25
  # set parameters
27
26
  dbt_client = dbt.DBTClient()
@@ -74,7 +73,6 @@ def load_lineage(
74
73
  qdc_client: qdc.QDCExternalAPIClient,
75
74
  tenant_id: str,
76
75
  ) -> None:
77
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
78
76
  logger.info("Generate redshift table to table lineage.")
79
77
  redshift_table_level_lineage(
80
78
  conn=conn,
@@ -101,7 +99,6 @@ def load_stats(
101
99
  qdc_client: qdc.QDCExternalAPIClient,
102
100
  tenant_id: str,
103
101
  ) -> None:
104
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
105
102
 
106
103
  logger.info("Generate redshift stats.")
107
104
  redshift_table_stats(
@@ -119,7 +116,6 @@ def load_sqllineage(
119
116
  qdc_client: qdc.QDCExternalAPIClient,
120
117
  tenant_id: str,
121
118
  ) -> None:
122
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
123
119
 
124
120
  logger.info("Generate Redshift sqllineage.")
125
121
  redshift_table_level_sqllineage(
@@ -266,6 +262,8 @@ if __name__ == "__main__":
266
262
  help="The client secrete that is created on Quollio console to let clients access Quollio External API",
267
263
  )
268
264
  args = parser.parse_args()
265
+ set_log_level(level=args.log_level)
266
+
269
267
  conn = redshift.RedshiftConnectionConfig(
270
268
  host=args.host,
271
269
  build_user=args.build_user,
@@ -5,7 +5,7 @@ from typing import Dict, List, Optional
5
5
  from databricks.sdk.core import Config, HeaderFactory, oauth_service_principal
6
6
  from databricks.sql.client import Connection, connect
7
7
 
8
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
8
+ logger = logging.getLogger(__name__)
9
9
 
10
10
 
11
11
  @dataclass
@@ -47,8 +47,8 @@ class DatabricksQueryExecutor:
47
47
  cur.execute(query)
48
48
  result: List[Dict[str, str]] = cur.fetchall()
49
49
  except Exception as e:
50
- logging.error(query, exc_info=True)
51
- logging.error("databricks get_query_results failed. %s", e)
50
+ logger.error(query, exc_info=True)
51
+ logger.error("databricks get_query_results failed. %s", e)
52
52
  raise
53
53
 
54
54
  for row in result:
@@ -11,7 +11,6 @@ class DBTClient:
11
11
  self.dbt = dbtRunner()
12
12
 
13
13
  def invoke(self, cmd: str, project_dir: str, profile_dir: str, options: List[str] = None) -> dbtRunnerResult:
14
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
15
14
  req = [cmd, "--project-dir", project_dir, "--profiles-dir", profile_dir]
16
15
  if options is not None:
17
16
  req.extend(options)