quollio-core 0.4.8__tar.gz → 0.4.10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. {quollio_core-0.4.8 → quollio_core-0.4.10}/PKG-INFO +1 -1
  2. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/__init__.py +1 -1
  3. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/bigquery.py +10 -1
  4. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/bricks.py +2 -3
  5. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/dbt_project.yml +1 -1
  6. quollio_core-0.4.10/quollio_core/helper/log.py +17 -0
  7. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/profilers/databricks.py +0 -2
  8. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/profilers/redshift.py +41 -74
  9. quollio_core-0.4.10/quollio_core/profilers/snowflake.py +225 -0
  10. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/profilers/sqllineage.py +0 -1
  11. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/redshift.py +3 -5
  12. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/repository/databricks.py +3 -3
  13. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/repository/dbt.py +0 -1
  14. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/repository/qdc.py +0 -3
  15. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/repository/redshift.py +0 -1
  16. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/repository/snowflake.py +6 -1
  17. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/snowflake.py +4 -5
  18. quollio_core-0.4.8/quollio_core/profilers/snowflake.py +0 -256
  19. {quollio_core-0.4.8 → quollio_core-0.4.10}/LICENSE +0 -0
  20. {quollio_core-0.4.8 → quollio_core-0.4.10}/README.md +0 -0
  21. {quollio_core-0.4.8 → quollio_core-0.4.10}/pyproject.toml +0 -0
  22. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/.gitignore +0 -0
  23. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/README.md +0 -0
  24. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/analyses/.gitkeep +0 -0
  25. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/dbt_project.yml +0 -0
  26. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/macros/.gitkeep +0 -0
  27. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/models/quollio_lineage_column_level.sql +0 -0
  28. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/models/quollio_lineage_column_level.yml +0 -0
  29. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/models/quollio_lineage_table_level.sql +0 -0
  30. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/models/quollio_lineage_table_level.yml +0 -0
  31. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/models/sources.yml +0 -0
  32. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/package-lock.yml +0 -0
  33. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/packages.yml +0 -0
  34. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/profiles/profiles_template.yml +0 -0
  35. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/seeds/.gitkeep +0 -0
  36. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/databricks/snapshots/.gitkeep +0 -0
  37. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/README.md +0 -0
  38. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/analyses/.gitkeep +0 -0
  39. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/macros/.gitkeep +0 -0
  40. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/macros/materialization/divided_view.sql +0 -0
  41. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/models/quollio_lineage_table_level.sql +0 -0
  42. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/models/quollio_lineage_table_level.yml +0 -0
  43. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/models/quollio_lineage_view_level.sql +0 -0
  44. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/models/quollio_lineage_view_level.yml +0 -0
  45. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/models/quollio_sqllineage_sources.sql +0 -0
  46. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/models/quollio_sqllineage_sources.yml +0 -0
  47. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/models/quollio_stats_columns.sql +0 -0
  48. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/models/quollio_stats_columns.yml +0 -0
  49. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/models/quollio_stats_profiling_columns.sql +0 -0
  50. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/models/quollio_stats_profiling_columns.yml +0 -0
  51. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/models/sources.yml +0 -0
  52. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/package-lock.yml +0 -0
  53. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/packages.yml +0 -0
  54. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/profiles/profiles_template.yml +0 -0
  55. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/seeds/.gitkeep +0 -0
  56. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/redshift/snapshots/.gitkeep +0 -0
  57. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/seeds/.gitkeep +0 -0
  58. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/README.md +0 -0
  59. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/analyses/.gitkeep +0 -0
  60. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/dbt_project.yml +0 -0
  61. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/macros/.gitkeep +0 -0
  62. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/macros/materialization/divided_view.sql +0 -0
  63. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_column_level.sql +0 -0
  64. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_column_level.yml +0 -0
  65. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_table_level.sql +0 -0
  66. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_table_level.yml +0 -0
  67. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/models/quollio_sqllineage_sources.sql +0 -0
  68. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/models/quollio_sqllineage_sources.yml +0 -0
  69. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.sql +0 -0
  70. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.yml +0 -0
  71. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.sql +0 -0
  72. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.yml +0 -0
  73. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/models/sources.yml +0 -0
  74. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/package-lock.yml +0 -0
  75. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/packages.yml +0 -0
  76. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/profiles/profiles_template.yml +0 -0
  77. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/seeds/.gitkeep +0 -0
  78. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/dbt_projects/snowflake/snapshots/.gitkeep +0 -0
  79. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/helper/__init__.py +0 -0
  80. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/helper/core.py +0 -0
  81. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/helper/env_default.py +0 -0
  82. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/profilers/__init__.py +0 -0
  83. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/profilers/bigquery.py +0 -0
  84. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/profilers/lineage.py +0 -0
  85. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/profilers/stats.py +0 -0
  86. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/repository/__init__.py +0 -0
  87. {quollio_core-0.4.8 → quollio_core-0.4.10}/quollio_core/repository/bigquery.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: quollio-core
3
- Version: 0.4.8
3
+ Version: 0.4.10
4
4
  Summary: Quollio Core
5
5
  Author-email: quollio-dev <qt.dev@quollio.com>
6
6
  Maintainer-email: RyoAriyama <ryo.arym@gmail.com>, tharuta <35373297+TakumiHaruta@users.noreply.github.com>
@@ -1,4 +1,4 @@
1
1
  """Quollio Core"""
2
2
 
3
- __version__ = "0.4.8"
3
+ __version__ = "0.4.10"
4
4
  __author__ = "Quollio Technologies, Inc"
@@ -3,6 +3,7 @@ import json
3
3
  import logging
4
4
 
5
5
  from quollio_core.helper.env_default import env_default
6
+ from quollio_core.helper.log import set_log_level
6
7
  from quollio_core.profilers.bigquery import bigquery_table_lineage
7
8
  from quollio_core.repository import qdc
8
9
  from quollio_core.repository.bigquery import get_credentials, get_org_id
@@ -88,14 +89,22 @@ if __name__ == "__main__":
88
89
  help="GCP regions where the data is located. Multiple regions can be provided separated by space.",
89
90
  nargs="+",
90
91
  )
92
+ parser.add_argument(
93
+ "--log_level",
94
+ type=str,
95
+ choices=["debug", "info", "warn", "error", "none"],
96
+ action=env_default("LOG_LEVEL"),
97
+ required=False,
98
+ help="The log level for dbt commands. Default value is info",
99
+ )
91
100
 
92
101
  args = parser.parse_args()
102
+ set_log_level(level=args.log_level)
93
103
 
94
104
  if len(args.commands) == 0:
95
105
  raise ValueError("No command is provided")
96
106
 
97
107
  if "load_lineage" in args.commands:
98
-
99
108
  qdc_client = qdc.QDCExternalAPIClient(
100
109
  base_url=args.api_url, client_id=args.client_id, client_secret=args.client_secret
101
110
  )
@@ -4,6 +4,7 @@ import os
4
4
 
5
5
  from quollio_core.helper.core import setup_dbt_profile, trim_prefix
6
6
  from quollio_core.helper.env_default import env_default
7
+ from quollio_core.helper.log import set_log_level
7
8
  from quollio_core.profilers.databricks import (
8
9
  databricks_column_level_lineage,
9
10
  databricks_column_stats,
@@ -20,7 +21,6 @@ def build_view(
20
21
  target_tables: str = "",
21
22
  log_level: str = "info",
22
23
  ) -> None:
23
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
24
24
 
25
25
  logger.info("Build profiler views using dbt")
26
26
  # set parameters
@@ -64,7 +64,6 @@ def load_lineage(
64
64
  tenant_id: str,
65
65
  enable_column_lineage: bool = False,
66
66
  ) -> None:
67
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
68
67
 
69
68
  logger.info("Generate Databricks table to table lineage.")
70
69
  databricks_table_level_lineage(
@@ -99,7 +98,6 @@ def load_column_stats(
99
98
  qdc_client: qdc.QDCExternalAPIClient,
100
99
  tenant_id: str,
101
100
  ) -> None:
102
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
103
101
 
104
102
  logger.info("Generate Databricks column stats.")
105
103
  databricks_column_stats(
@@ -243,6 +241,7 @@ if __name__ == "__main__":
243
241
  )
244
242
 
245
243
  args = parser.parse_args()
244
+ set_log_level(level=args.log_level)
246
245
 
247
246
  conn = db.DatabricksConnectionConfig(
248
247
  # MEMO: Metadata agent allows the string 'https://' as a host name but is not allowed by intelligence agent.
@@ -18,4 +18,4 @@ clean-targets:
18
18
  models:
19
19
  +dbt-osmosis: "{model}.yml"
20
20
  +grants:
21
- select: ["{{ var('query_user') }}"]
21
+ select: ["\"{{ var('query_user') }}\""]
@@ -0,0 +1,17 @@
1
+ import logging
2
+
3
+
4
+ def set_log_level(level: str = "info") -> None:
5
+ fmt = "%(asctime)s - %(levelname)s - %(name)s - %(message)s"
6
+ if level == "info":
7
+ logging.basicConfig(level=logging.INFO, format=fmt)
8
+ elif level == "debug":
9
+ logging.basicConfig(level=logging.DEBUG, format=fmt)
10
+ elif level == "warn":
11
+ logging.basicConfig(level=logging.WARNING, format=fmt)
12
+ elif level == "error":
13
+ logging.basicConfig(level=logging.ERROR, format=fmt)
14
+ elif level == "critical":
15
+ logging.basicConfig(level=logging.CRITICAL, format=fmt)
16
+ else:
17
+ logging.basicConfig(level=logging.NOTSET, format=fmt)
@@ -19,7 +19,6 @@ def databricks_table_level_lineage(
19
19
  tenant_id: str,
20
20
  dbt_table_name: str = "quollio_lineage_table_level",
21
21
  ) -> None:
22
- logging.basicConfig(level=logging.info, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
23
22
  with databricks.DatabricksQueryExecutor(config=conn) as databricks_executor:
24
23
  results = databricks_executor.get_query_results(
25
24
  query=f"""
@@ -61,7 +60,6 @@ def databricks_column_level_lineage(
61
60
  tenant_id: str,
62
61
  dbt_table_name: str = "quollio_lineage_column_level",
63
62
  ) -> None:
64
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
65
63
  with databricks.DatabricksQueryExecutor(config=conn) as databricks_executor:
66
64
  results = databricks_executor.get_query_results(
67
65
  query=f"""
@@ -14,7 +14,6 @@ def redshift_table_level_lineage(
14
14
  tenant_id: str,
15
15
  dbt_table_name: str,
16
16
  ) -> None:
17
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
18
17
  with redshift.RedshiftQueryExecutor(config=conn) as redshift_executor:
19
18
  results = redshift_executor.get_query_results(
20
19
  query="""
@@ -55,22 +54,7 @@ def redshift_table_level_lineage(
55
54
  return
56
55
 
57
56
 
58
- def _get_target_tables_query(db: str, schema: str) -> str:
59
- query = """
60
- SELECT
61
- DISTINCT
62
- database_name
63
- , schema_name
64
- , table_name
65
- FROM
66
- {db}.{schema}.quollio_stats_profiling_columns
67
- """.format(
68
- db=db, schema=schema
69
- )
70
- return query
71
-
72
-
73
- def _get_stats_tables_query(db: str, schema: str) -> str:
57
+ def _gen_get_stats_views_query(db: str, schema: str) -> str:
74
58
  query = """
75
59
  SELECT
76
60
  DISTINCT
@@ -93,70 +77,54 @@ def redshift_table_stats(
93
77
  qdc_client: qdc.QDCExternalAPIClient,
94
78
  tenant_id: str,
95
79
  ) -> None:
96
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
97
80
 
98
81
  with redshift.RedshiftQueryExecutor(config=conn) as redshift_executor:
99
- req_count = 0
100
- target_query = _get_target_tables_query(
82
+ stats_query = _gen_get_stats_views_query(
101
83
  db=conn.database,
102
84
  schema=conn.schema,
103
85
  )
104
- target_assets = redshift_executor.get_query_results(query=target_query)
86
+ stats_views = redshift_executor.get_query_results(query=stats_query)
105
87
 
106
- stats_query = _get_stats_tables_query(
107
- db=conn.database,
108
- schema=conn.schema,
109
- )
110
- stats_columns = redshift_executor.get_query_results(query=stats_query)
111
- for target_asset in target_assets:
112
- for stats_column in stats_columns:
113
- stats_query = """
114
- SELECT
115
- db_name
116
- , schema_name
117
- , table_name
118
- , column_name
119
- , max_value
120
- , min_value
121
- , null_count
122
- , cardinality
123
- , avg_value
124
- , median_value
125
- , mode_value
126
- , stddev_value
127
- FROM
128
- {db}.{schema}.{table}
129
- WHERE
130
- db_name = '{target_db}'
131
- and schema_name = '{target_schema}'
132
- and table_name = '{target_table}'
133
- """.format(
134
- db=stats_column[0],
135
- schema=stats_column[1],
136
- table=stats_column[2],
137
- target_db=target_asset[0],
138
- target_schema=target_asset[1],
139
- target_table=target_asset[2],
88
+ req_count = 0
89
+ for stats_view in stats_views:
90
+ stats_query = """
91
+ SELECT
92
+ db_name
93
+ , schema_name
94
+ , table_name
95
+ , column_name
96
+ , max_value
97
+ , min_value
98
+ , null_count
99
+ , cardinality
100
+ , avg_value
101
+ , median_value
102
+ , mode_value
103
+ , stddev_value
104
+ FROM
105
+ {db}.{schema}.{table}
106
+ """.format(
107
+ db=stats_view[0],
108
+ schema=stats_view[1],
109
+ table=stats_view[2],
110
+ )
111
+ stats_result = redshift_executor.get_query_results(query=stats_query)
112
+ payloads = gen_table_stats_payload_from_tuple(tenant_id=tenant_id, endpoint=conn.host, stats=stats_result)
113
+ for payload in payloads:
114
+ logger.info(
115
+ "Generating table stats. asset: {db} -> {schema} -> {table} -> {column}".format(
116
+ db=payload.db,
117
+ schema=payload.schema,
118
+ table=payload.table,
119
+ column=payload.column,
120
+ )
140
121
  )
141
- stats_result = redshift_executor.get_query_results(query=stats_query)
142
- payloads = gen_table_stats_payload_from_tuple(
143
- tenant_id=tenant_id, endpoint=conn.host, stats=stats_result
122
+ status_code = qdc_client.update_stats_by_id(
123
+ global_id=payload.global_id,
124
+ payload=payload.body.get_column_stats(),
144
125
  )
145
- for payload in payloads:
146
- logger.info(
147
- "Generating table stats. asset: {db} -> {schema} -> {table} -> {column}".format(
148
- db=payload.db,
149
- schema=payload.schema,
150
- table=payload.table,
151
- column=payload.column,
152
- )
153
- )
154
- status_code = qdc_client.update_stats_by_id(
155
- global_id=payload.global_id,
156
- payload=payload.body.get_column_stats(),
157
- )
158
- if status_code == 200:
159
- req_count += 1
126
+ if status_code == 200:
127
+ req_count += 1
160
128
  logger.info(f"Generating table stats is finished. {req_count} stats are ingested.")
161
129
  return
162
130
 
@@ -166,7 +134,6 @@ def redshift_table_level_sqllineage(
166
134
  qdc_client: qdc.QDCExternalAPIClient,
167
135
  tenant_id: str,
168
136
  ) -> None:
169
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
170
137
  redshift_connector = redshift.RedshiftQueryExecutor(conn)
171
138
  results = redshift_connector.get_query_results(
172
139
  query="""
@@ -0,0 +1,225 @@
1
+ import logging
2
+
3
+ from quollio_core.profilers.lineage import (
4
+ gen_column_lineage_payload,
5
+ gen_table_lineage_payload,
6
+ parse_snowflake_results,
7
+ )
8
+ from quollio_core.profilers.sqllineage import SQLLineage
9
+ from quollio_core.profilers.stats import gen_table_stats_payload
10
+ from quollio_core.repository import qdc, snowflake
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def snowflake_table_to_table_lineage(
16
+ conn: snowflake.SnowflakeConnectionConfig,
17
+ qdc_client: qdc.QDCExternalAPIClient,
18
+ tenant_id: str,
19
+ ) -> None:
20
+ with snowflake.SnowflakeQueryExecutor(conn) as sf_executor:
21
+ results = sf_executor.get_query_results(
22
+ query="""
23
+ SELECT
24
+ *
25
+ FROM
26
+ {db}.{schema}.QUOLLIO_LINEAGE_TABLE_LEVEL
27
+ """.format(
28
+ db=conn.account_database,
29
+ schema=conn.account_schema,
30
+ )
31
+ )
32
+ parsed_results = parse_snowflake_results(results=results)
33
+ update_table_lineage_inputs = gen_table_lineage_payload(
34
+ tenant_id=tenant_id,
35
+ endpoint=conn.account_id,
36
+ tables=parsed_results,
37
+ )
38
+
39
+ req_count = 0
40
+ for update_table_lineage_input in update_table_lineage_inputs:
41
+ logger.info(
42
+ "Generating table lineage. downstream: {db} -> {schema} -> {table}".format(
43
+ db=update_table_lineage_input.downstream_database_name,
44
+ schema=update_table_lineage_input.downstream_schema_name,
45
+ table=update_table_lineage_input.downstream_table_name,
46
+ )
47
+ )
48
+ status_code = qdc_client.update_lineage_by_id(
49
+ global_id=update_table_lineage_input.downstream_global_id,
50
+ payload=update_table_lineage_input.upstreams.as_dict(),
51
+ )
52
+ if status_code == 200:
53
+ req_count += 1
54
+ logger.info(f"Generating table lineage is finished. {req_count} lineages are ingested.")
55
+ return
56
+
57
+
58
+ def snowflake_column_to_column_lineage(
59
+ conn: snowflake.SnowflakeConnectionConfig,
60
+ qdc_client: qdc.QDCExternalAPIClient,
61
+ tenant_id: str,
62
+ ) -> None:
63
+ with snowflake.SnowflakeQueryExecutor(conn) as sf_executor:
64
+ results = sf_executor.get_query_results(
65
+ query="""
66
+ SELECT
67
+ *
68
+ FROM
69
+ {db}.{schema}.QUOLLIO_LINEAGE_COLUMN_LEVEL
70
+ """.format(
71
+ db=conn.account_database,
72
+ schema=conn.account_schema,
73
+ )
74
+ )
75
+ update_column_lineage_inputs = gen_column_lineage_payload(
76
+ tenant_id=tenant_id,
77
+ endpoint=conn.account_id,
78
+ columns=results,
79
+ )
80
+
81
+ req_count = 0
82
+ for update_column_lineage_input in update_column_lineage_inputs:
83
+ logger.info(
84
+ "Generating column lineage. downstream: {db} -> {schema} -> {table} -> {column}".format(
85
+ db=update_column_lineage_input.downstream_database_name,
86
+ schema=update_column_lineage_input.downstream_schema_name,
87
+ table=update_column_lineage_input.downstream_table_name,
88
+ column=update_column_lineage_input.downstream_column_name,
89
+ )
90
+ )
91
+ status_code = qdc_client.update_lineage_by_id(
92
+ global_id=update_column_lineage_input.downstream_global_id,
93
+ payload=update_column_lineage_input.upstreams.as_dict(),
94
+ )
95
+ if status_code == 200:
96
+ req_count += 1
97
+ logger.info(f"Generating column lineage is finished. {req_count} lineages are ingested.")
98
+ return
99
+
100
+
101
+ def snowflake_table_level_sqllineage(
102
+ conn: snowflake.SnowflakeConnectionConfig,
103
+ qdc_client: qdc.QDCExternalAPIClient,
104
+ tenant_id: str,
105
+ ) -> None:
106
+ with snowflake.SnowflakeQueryExecutor(conn) as sf_executor:
107
+ results = sf_executor.get_query_results(
108
+ query="""
109
+ SELECT
110
+ database_name
111
+ , schema_name
112
+ , query_text
113
+ FROM
114
+ {db}.{schema}.QUOLLIO_SQLLINEAGE_SOURCES
115
+ """.format(
116
+ db=conn.account_database,
117
+ schema=conn.account_schema,
118
+ )
119
+ )
120
+ update_table_lineage_inputs_list = list()
121
+ sql_lineage = SQLLineage()
122
+ for result in results:
123
+ src_tables, dest_table = sql_lineage.get_table_level_lineage_source(
124
+ sql=result["QUERY_TEXT"],
125
+ dialect="snowflake",
126
+ dest_db=result["DATABASE_NAME"],
127
+ dest_schema=result["SCHEMA_NAME"],
128
+ )
129
+ update_table_lineage_inputs = sql_lineage.gen_lineage_input(
130
+ tenant_id=tenant_id, endpoint=conn.account_id, src_tables=src_tables, dest_table=dest_table
131
+ )
132
+ update_table_lineage_inputs_list.append(update_table_lineage_inputs)
133
+
134
+ req_count = 0
135
+ for update_table_lineage_input in update_table_lineage_inputs_list:
136
+ logger.info(
137
+ "Generating table lineage. downstream: {db} -> {schema} -> {table}".format(
138
+ db=update_table_lineage_input.downstream_database_name,
139
+ schema=update_table_lineage_input.downstream_schema_name,
140
+ table=update_table_lineage_input.downstream_table_name,
141
+ )
142
+ )
143
+ status_code = qdc_client.update_lineage_by_id(
144
+ global_id=update_table_lineage_input.downstream_global_id,
145
+ payload=update_table_lineage_input.upstreams.as_dict(),
146
+ )
147
+ if status_code == 200:
148
+ req_count += 1
149
+ logger.info(f"Generating table lineage is finished. {req_count} lineages are ingested.")
150
+ return
151
+
152
+
153
+ def snowflake_table_stats(
154
+ conn: snowflake.SnowflakeConnectionConfig,
155
+ qdc_client: qdc.QDCExternalAPIClient,
156
+ tenant_id: str,
157
+ ) -> None:
158
+ with snowflake.SnowflakeQueryExecutor(conn) as sf_executor:
159
+ stats_query = _gen_get_stats_views_query(
160
+ db=conn.account_database,
161
+ schema=conn.account_schema,
162
+ )
163
+ stats_views = sf_executor.get_query_results(query=stats_query)
164
+
165
+ req_count = 0
166
+ for stats_view in stats_views:
167
+ stats_query = """
168
+ SELECT
169
+ db_name
170
+ , schema_name
171
+ , table_name
172
+ , column_name
173
+ , max_value
174
+ , min_value
175
+ , null_count
176
+ , cardinality
177
+ , avg_value
178
+ , median_value
179
+ , mode_value
180
+ , stddev_value
181
+ FROM
182
+ {db}.{schema}.{table}
183
+ """.format(
184
+ db=stats_view["TABLE_CATALOG"],
185
+ schema=stats_view["TABLE_SCHEMA"],
186
+ table=stats_view["TABLE_NAME"],
187
+ )
188
+ logger.debug(f"The following sql will be fetched to retrieve stats values. {stats_query}")
189
+ stats_result = sf_executor.get_query_results(query=stats_query)
190
+ payloads = gen_table_stats_payload(tenant_id=tenant_id, endpoint=conn.account_id, stats=stats_result)
191
+ for payload in payloads:
192
+ logger.info(
193
+ "Generating table stats. asset: {db} -> {schema} -> {table} -> {column}".format(
194
+ db=payload.db,
195
+ schema=payload.schema,
196
+ table=payload.table,
197
+ column=payload.column,
198
+ )
199
+ )
200
+ status_code = qdc_client.update_stats_by_id(
201
+ global_id=payload.global_id,
202
+ payload=payload.body.get_column_stats(),
203
+ )
204
+ if status_code == 200:
205
+ req_count += 1
206
+ logger.info(f"Generating table stats is finished. {req_count} stats are ingested.")
207
+ return
208
+
209
+
210
+ def _gen_get_stats_views_query(db: str, schema: str) -> str:
211
+ query = """
212
+ SELECT
213
+ DISTINCT
214
+ TABLE_CATALOG
215
+ , TABLE_SCHEMA
216
+ , TABLE_NAME
217
+ FROM
218
+ {db}.INFORMATION_SCHEMA.TABLES
219
+ WHERE
220
+ startswith(TABLE_NAME, 'QUOLLIO_STATS_COLUMNS_')
221
+ AND TABLE_SCHEMA = UPPER('{schema}')
222
+ """.format(
223
+ db=db, schema=schema
224
+ )
225
+ return query
@@ -54,7 +54,6 @@ class SQLLineage:
54
54
  dest_db: str = None,
55
55
  dest_schema: str = None,
56
56
  ) -> Tuple[Set[Table], Table]:
57
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
58
57
  try:
59
58
  statement: sqlglot.Expression = sqlglot.parse_one(sql=sql, error_level=sqlglot.ErrorLevel.RAISE)
60
59
  except ParseError as e:
@@ -4,6 +4,7 @@ import os
4
4
 
5
5
  from quollio_core.helper.core import setup_dbt_profile
6
6
  from quollio_core.helper.env_default import env_default
7
+ from quollio_core.helper.log import set_log_level
7
8
  from quollio_core.profilers.redshift import (
8
9
  redshift_table_level_lineage,
9
10
  redshift_table_level_sqllineage,
@@ -20,8 +21,6 @@ def build_view(
20
21
  target_tables: str = "",
21
22
  log_level: str = "info",
22
23
  ) -> None:
23
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
24
-
25
24
  logger.info("Build profiler views using dbt")
26
25
  # set parameters
27
26
  dbt_client = dbt.DBTClient()
@@ -74,7 +73,6 @@ def load_lineage(
74
73
  qdc_client: qdc.QDCExternalAPIClient,
75
74
  tenant_id: str,
76
75
  ) -> None:
77
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
78
76
  logger.info("Generate redshift table to table lineage.")
79
77
  redshift_table_level_lineage(
80
78
  conn=conn,
@@ -101,7 +99,6 @@ def load_stats(
101
99
  qdc_client: qdc.QDCExternalAPIClient,
102
100
  tenant_id: str,
103
101
  ) -> None:
104
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
105
102
 
106
103
  logger.info("Generate redshift stats.")
107
104
  redshift_table_stats(
@@ -119,7 +116,6 @@ def load_sqllineage(
119
116
  qdc_client: qdc.QDCExternalAPIClient,
120
117
  tenant_id: str,
121
118
  ) -> None:
122
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
123
119
 
124
120
  logger.info("Generate Redshift sqllineage.")
125
121
  redshift_table_level_sqllineage(
@@ -266,6 +262,8 @@ if __name__ == "__main__":
266
262
  help="The client secrete that is created on Quollio console to let clients access Quollio External API",
267
263
  )
268
264
  args = parser.parse_args()
265
+ set_log_level(level=args.log_level)
266
+
269
267
  conn = redshift.RedshiftConnectionConfig(
270
268
  host=args.host,
271
269
  build_user=args.build_user,
@@ -5,7 +5,7 @@ from typing import Dict, List, Optional
5
5
  from databricks.sdk.core import Config, HeaderFactory, oauth_service_principal
6
6
  from databricks.sql.client import Connection, connect
7
7
 
8
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
8
+ logger = logging.getLogger(__name__)
9
9
 
10
10
 
11
11
  @dataclass
@@ -47,8 +47,8 @@ class DatabricksQueryExecutor:
47
47
  cur.execute(query)
48
48
  result: List[Dict[str, str]] = cur.fetchall()
49
49
  except Exception as e:
50
- logging.error(query, exc_info=True)
51
- logging.error("databricks get_query_results failed. %s", e)
50
+ logger.error(query, exc_info=True)
51
+ logger.error("databricks get_query_results failed. %s", e)
52
52
  raise
53
53
 
54
54
  for row in result:
@@ -11,7 +11,6 @@ class DBTClient:
11
11
  self.dbt = dbtRunner()
12
12
 
13
13
  def invoke(self, cmd: str, project_dir: str, profile_dir: str, options: List[str] = None) -> dbtRunnerResult:
14
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
15
14
  req = [cmd, "--project-dir", project_dir, "--profiles-dir", profile_dir]
16
15
  if options is not None:
17
16
  req.extend(options)
@@ -25,7 +25,6 @@ class QDCExternalAPIClient:
25
25
  Tried to find a package for oauth0 client credentials flow,
26
26
  but any of them contains bugs or lacks of features to handle the token refresh when it's expired
27
27
  """
28
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
29
28
  url = f"{self.base_url}/oauth2/token"
30
29
  creds = f"{self.client_id}:{self.client_secret}"
31
30
  encoded_creds = base64.b64encode(creds.encode()).decode()
@@ -65,7 +64,6 @@ class QDCExternalAPIClient:
65
64
  return session
66
65
 
67
66
  def update_stats_by_id(self, global_id: str, payload: Dict[str, List[str]]) -> int:
68
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
69
67
  self._refresh_token_if_expired()
70
68
  headers = {"content-type": "application/json", "authorization": f"Bearer {self.auth_token}"}
71
69
  endpoint = f"{self.base_url}/v2/assets/{global_id}/stats"
@@ -85,7 +83,6 @@ class QDCExternalAPIClient:
85
83
  return res.status_code
86
84
 
87
85
  def update_lineage_by_id(self, global_id: str, payload: Dict[str, List[str]]) -> int:
88
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
89
86
  self._refresh_token_if_expired()
90
87
  headers = {"content-type": "application/json", "authorization": f"Bearer {self.auth_token}"}
91
88
  endpoint = f"{self.base_url}/v2/lineage/{global_id}"
@@ -67,7 +67,6 @@ class RedshiftQueryExecutor:
67
67
  return conn
68
68
 
69
69
  def get_query_results(self, query: str) -> Tuple[List[str]]:
70
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
71
70
  with self.conn.cursor() as cur:
72
71
  try:
73
72
  cur.execute(query)
@@ -28,6 +28,12 @@ class SnowflakeQueryExecutor:
28
28
  def __init__(self, config: SnowflakeConnectionConfig) -> None:
29
29
  self.conn = self.__initialize(config)
30
30
 
31
+ def __enter__(self):
32
+ return self
33
+
34
+ def __exit__(self, exc_type, exc_value, traceback):
35
+ self.conn.close()
36
+
31
37
  def __initialize(self, config: SnowflakeConnectionConfig) -> SnowflakeConnection:
32
38
  conn: SnowflakeConnection = connect(
33
39
  user=config.account_user,
@@ -41,7 +47,6 @@ class SnowflakeQueryExecutor:
41
47
  return conn
42
48
 
43
49
  def get_query_results(self, query: str) -> List[Dict[str, str]]:
44
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
45
50
  with self.conn.cursor(DictCursor) as cur:
46
51
  try:
47
52
  cur.execute(query)
@@ -4,6 +4,7 @@ import os
4
4
 
5
5
  from quollio_core.helper.core import setup_dbt_profile
6
6
  from quollio_core.helper.env_default import env_default
7
+ from quollio_core.helper.log import set_log_level
7
8
  from quollio_core.profilers.snowflake import (
8
9
  snowflake_column_to_column_lineage,
9
10
  snowflake_table_level_sqllineage,
@@ -21,7 +22,6 @@ def build_view(
21
22
  target_tables: str = "",
22
23
  log_level: str = "info",
23
24
  ) -> None:
24
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
25
25
 
26
26
  logger.info("Build profiler views using dbt")
27
27
  # set parameters
@@ -74,9 +74,8 @@ def load_lineage(
74
74
  tenant_id: str,
75
75
  enable_column_lineage: bool = False,
76
76
  ) -> None:
77
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
78
-
79
77
  logger.info("Generate Snowflake table to table lineage.")
78
+
80
79
  snowflake_table_to_table_lineage(
81
80
  conn=conn,
82
81
  qdc_client=qdc_client,
@@ -105,7 +104,6 @@ def load_stats(
105
104
  qdc_client: qdc.QDCExternalAPIClient,
106
105
  tenant_id: str,
107
106
  ) -> None:
108
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
109
107
 
110
108
  logger.info("Generate Snowflake stats.")
111
109
  snowflake_table_stats(
@@ -124,7 +122,6 @@ def load_sqllineage(
124
122
  qdc_client: qdc.QDCExternalAPIClient,
125
123
  tenant_id: str,
126
124
  ) -> None:
127
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
128
125
 
129
126
  logger.info("Generate Snowflake sqllineage.")
130
127
  snowflake_table_level_sqllineage(
@@ -279,6 +276,8 @@ if __name__ == "__main__":
279
276
  help="Whether to ingest column lineage into QDIC or not. Default value is False",
280
277
  )
281
278
  args = parser.parse_args()
279
+ set_log_level(level=args.log_level)
280
+
282
281
  conn = snowflake.SnowflakeConnectionConfig(
283
282
  account_id=args.account_id,
284
283
  account_user=args.user,
@@ -1,256 +0,0 @@
1
- import logging
2
-
3
- from quollio_core.profilers.lineage import (
4
- gen_column_lineage_payload,
5
- gen_table_lineage_payload,
6
- parse_snowflake_results,
7
- )
8
- from quollio_core.profilers.sqllineage import SQLLineage
9
- from quollio_core.profilers.stats import gen_table_stats_payload
10
- from quollio_core.repository import qdc, snowflake
11
-
12
- logger = logging.getLogger(__name__)
13
-
14
-
15
- def snowflake_table_to_table_lineage(
16
- conn: snowflake.SnowflakeConnectionConfig,
17
- qdc_client: qdc.QDCExternalAPIClient,
18
- tenant_id: str,
19
- ) -> None:
20
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
21
- sf_executor = snowflake.SnowflakeQueryExecutor(conn)
22
- results = sf_executor.get_query_results(
23
- query="""
24
- SELECT
25
- *
26
- FROM
27
- {db}.{schema}.QUOLLIO_LINEAGE_TABLE_LEVEL
28
- """.format(
29
- db=conn.account_database,
30
- schema=conn.account_schema,
31
- )
32
- )
33
- parsed_results = parse_snowflake_results(results=results)
34
- update_table_lineage_inputs = gen_table_lineage_payload(
35
- tenant_id=tenant_id,
36
- endpoint=conn.account_id,
37
- tables=parsed_results,
38
- )
39
-
40
- req_count = 0
41
- for update_table_lineage_input in update_table_lineage_inputs:
42
- logger.info(
43
- "Generating table lineage. downstream: {db} -> {schema} -> {table}".format(
44
- db=update_table_lineage_input.downstream_database_name,
45
- schema=update_table_lineage_input.downstream_schema_name,
46
- table=update_table_lineage_input.downstream_table_name,
47
- )
48
- )
49
- status_code = qdc_client.update_lineage_by_id(
50
- global_id=update_table_lineage_input.downstream_global_id,
51
- payload=update_table_lineage_input.upstreams.as_dict(),
52
- )
53
- if status_code == 200:
54
- req_count += 1
55
- logger.info(f"Generating table lineage is finished. {req_count} lineages are ingested.")
56
- return
57
-
58
-
59
- def snowflake_column_to_column_lineage(
60
- conn: snowflake.SnowflakeConnectionConfig,
61
- qdc_client: qdc.QDCExternalAPIClient,
62
- tenant_id: str,
63
- ) -> None:
64
- sf_executor = snowflake.SnowflakeQueryExecutor(conn)
65
- results = sf_executor.get_query_results(
66
- query="""
67
- SELECT
68
- *
69
- FROM
70
- {db}.{schema}.QUOLLIO_LINEAGE_COLUMN_LEVEL
71
- """.format(
72
- db=conn.account_database,
73
- schema=conn.account_schema,
74
- )
75
- )
76
- update_column_lineage_inputs = gen_column_lineage_payload(
77
- tenant_id=tenant_id,
78
- endpoint=conn.account_id,
79
- columns=results,
80
- )
81
-
82
- req_count = 0
83
- for update_column_lineage_input in update_column_lineage_inputs:
84
- logger.info(
85
- "Generating column lineage. downstream: {db} -> {schema} -> {table} -> {column}".format(
86
- db=update_column_lineage_input.downstream_database_name,
87
- schema=update_column_lineage_input.downstream_schema_name,
88
- table=update_column_lineage_input.downstream_table_name,
89
- column=update_column_lineage_input.downstream_column_name,
90
- )
91
- )
92
- status_code = qdc_client.update_lineage_by_id(
93
- global_id=update_column_lineage_input.downstream_global_id,
94
- payload=update_column_lineage_input.upstreams.as_dict(),
95
- )
96
- if status_code == 200:
97
- req_count += 1
98
- logger.info(f"Generating column lineage is finished. {req_count} lineages are ingested.")
99
- return
100
-
101
-
102
- def snowflake_table_level_sqllineage(
103
- conn: snowflake.SnowflakeConnectionConfig,
104
- qdc_client: qdc.QDCExternalAPIClient,
105
- tenant_id: str,
106
- ) -> None:
107
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
108
- sf_executor = snowflake.SnowflakeQueryExecutor(conn)
109
- results = sf_executor.get_query_results(
110
- query="""
111
- SELECT
112
- database_name
113
- , schema_name
114
- , query_text
115
- FROM
116
- {db}.{schema}.QUOLLIO_SQLLINEAGE_SOURCES
117
- """.format(
118
- db=conn.account_database,
119
- schema=conn.account_schema,
120
- )
121
- )
122
- update_table_lineage_inputs_list = list()
123
- sql_lineage = SQLLineage()
124
- for result in results:
125
- src_tables, dest_table = sql_lineage.get_table_level_lineage_source(
126
- sql=result["QUERY_TEXT"],
127
- dialect="snowflake",
128
- dest_db=result["DATABASE_NAME"],
129
- dest_schema=result["SCHEMA_NAME"],
130
- )
131
- update_table_lineage_inputs = sql_lineage.gen_lineage_input(
132
- tenant_id=tenant_id, endpoint=conn.account_id, src_tables=src_tables, dest_table=dest_table
133
- )
134
- update_table_lineage_inputs_list.append(update_table_lineage_inputs)
135
-
136
- req_count = 0
137
- for update_table_lineage_input in update_table_lineage_inputs_list:
138
- logger.info(
139
- "Generating table lineage. downstream: {db} -> {schema} -> {table}".format(
140
- db=update_table_lineage_input.downstream_database_name,
141
- schema=update_table_lineage_input.downstream_schema_name,
142
- table=update_table_lineage_input.downstream_table_name,
143
- )
144
- )
145
- status_code = qdc_client.update_lineage_by_id(
146
- global_id=update_table_lineage_input.downstream_global_id,
147
- payload=update_table_lineage_input.upstreams.as_dict(),
148
- )
149
- if status_code == 200:
150
- req_count += 1
151
- logger.info(f"Generating table lineage is finished. {req_count} lineages are ingested.")
152
- return
153
-
154
-
155
- def _get_target_tables_query(db: str, schema: str) -> str:
156
- query = """
157
- SELECT
158
- DISTINCT
159
- TABLE_CATALOG
160
- , TABLE_SCHEMA
161
- , TABLE_NAME
162
- FROM
163
- {db}.{schema}.QUOLLIO_STATS_PROFILING_COLUMNS
164
- """.format(
165
- db=db, schema=schema
166
- )
167
- return query
168
-
169
-
170
- def _get_stats_tables_query(db: str, schema: str) -> str:
171
- query = """
172
- SELECT
173
- DISTINCT
174
- TABLE_CATALOG
175
- , TABLE_SCHEMA
176
- , TABLE_NAME
177
- FROM
178
- {db}.INFORMATION_SCHEMA.TABLES
179
- WHERE
180
- startswith(TABLE_NAME, 'QUOLLIO_STATS_COLUMNS_')
181
- AND TABLE_SCHEMA = UPPER('{schema}')
182
- """.format(
183
- db=db, schema=schema
184
- )
185
- return query
186
-
187
-
188
- def snowflake_table_stats(
189
- conn: snowflake.SnowflakeConnectionConfig,
190
- qdc_client: qdc.QDCExternalAPIClient,
191
- tenant_id: str,
192
- ) -> None:
193
- logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
194
- sf_executor = snowflake.SnowflakeQueryExecutor(conn)
195
-
196
- target_query = _get_target_tables_query(
197
- db=conn.account_database,
198
- schema=conn.account_schema,
199
- )
200
- target_assets = sf_executor.get_query_results(query=target_query)
201
-
202
- stats_query = _get_stats_tables_query(
203
- db=conn.account_database,
204
- schema=conn.account_schema,
205
- )
206
- stats_columns = sf_executor.get_query_results(query=stats_query)
207
-
208
- req_count = 0
209
- for target_asset in target_assets:
210
- for stats_column in stats_columns:
211
- stats_query = """
212
- SELECT
213
- db_name
214
- , schema_name
215
- , table_name
216
- , column_name
217
- , max_value
218
- , min_value
219
- , null_count
220
- , cardinality
221
- , avg_value
222
- , median_value
223
- , mode_value
224
- , stddev_value
225
- FROM
226
- {db}.{schema}.{table}
227
- WHERE
228
- db_name = '{target_db}'
229
- and schema_name = '{target_schema}'
230
- and table_name = '{target_table}'
231
- """.format(
232
- db=stats_column["TABLE_CATALOG"],
233
- schema=stats_column["TABLE_SCHEMA"],
234
- table=stats_column["TABLE_NAME"],
235
- target_db=target_asset["TABLE_CATALOG"],
236
- target_schema=target_asset["TABLE_SCHEMA"],
237
- target_table=target_asset["TABLE_NAME"],
238
- )
239
- stats_result = sf_executor.get_query_results(query=stats_query)
240
- payloads = gen_table_stats_payload(tenant_id=tenant_id, endpoint=conn.account_id, stats=stats_result)
241
- for payload in payloads:
242
- logger.info(
243
- "Generating table stats. asset: {db} -> {schema} -> {table} -> {column}".format(
244
- db=payload.db,
245
- schema=payload.schema,
246
- table=payload.table,
247
- column=payload.column,
248
- )
249
- )
250
- status_code = qdc_client.update_stats_by_id(
251
- global_id=payload.global_id,
252
- payload=payload.body.get_column_stats(),
253
- )
254
- if status_code == 200:
255
- req_count += 1
256
- logger.info(f"Generating table stats is finished. {req_count} stats are ingested.")
File without changes
File without changes