quollio-core 0.6.5__tar.gz → 0.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. {quollio_core-0.6.5 → quollio_core-0.8.0}/PKG-INFO +3 -4
  2. {quollio_core-0.6.5 → quollio_core-0.8.0}/pyproject.toml +2 -3
  3. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/__init__.py +1 -1
  4. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/bigquery.py +47 -2
  5. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/bricks.py +26 -4
  6. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/profilers/bigquery.py +86 -26
  7. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/profilers/redshift.py +74 -38
  8. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/profilers/stats.py +37 -0
  9. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/redshift.py +48 -5
  10. quollio_core-0.8.0/quollio_core/repository/cloud_resource_manager.py +25 -0
  11. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/snowflake.py +4 -1
  12. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/teradata.py +25 -2
  13. {quollio_core-0.6.5 → quollio_core-0.8.0}/LICENSE +0 -0
  14. {quollio_core-0.6.5 → quollio_core-0.8.0}/README.md +0 -0
  15. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/.gitignore +0 -0
  16. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/README.md +0 -0
  17. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/analyses/.gitkeep +0 -0
  18. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/dbt_project.yml +0 -0
  19. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/macros/.gitkeep +0 -0
  20. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/models/quollio_lineage_column_level.sql +0 -0
  21. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/models/quollio_lineage_column_level.yml +0 -0
  22. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/models/quollio_lineage_table_level.sql +0 -0
  23. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/models/quollio_lineage_table_level.yml +0 -0
  24. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/models/sources.yml +0 -0
  25. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/packages_hub.yml +0 -0
  26. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/packages_local.yml +0 -0
  27. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/profiles/profiles_template.yml +0 -0
  28. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/seeds/.gitkeep +0 -0
  29. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/databricks/snapshots/.gitkeep +0 -0
  30. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/README.md +0 -0
  31. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/analyses/.gitkeep +0 -0
  32. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/dbt_project.yml +0 -0
  33. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/macros/.gitkeep +0 -0
  34. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/macros/materialization/divided_view.sql +0 -0
  35. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/models/quollio_lineage_table_level.sql +0 -0
  36. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/models/quollio_lineage_table_level.yml +0 -0
  37. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/models/quollio_lineage_view_level.sql +0 -0
  38. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/models/quollio_lineage_view_level.yml +0 -0
  39. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/models/quollio_sqllineage_sources.sql +0 -0
  40. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/models/quollio_sqllineage_sources.yml +0 -0
  41. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/models/quollio_stats_columns.sql +0 -0
  42. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/models/quollio_stats_columns.yml +0 -0
  43. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/models/quollio_stats_profiling_columns.sql +0 -0
  44. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/models/quollio_stats_profiling_columns.yml +0 -0
  45. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/models/sources.yml +0 -0
  46. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/packages_hub.yml +0 -0
  47. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/packages_local.yml +0 -0
  48. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/profiles/profiles_template.yml +0 -0
  49. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/seeds/.gitkeep +0 -0
  50. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/redshift/snapshots/.gitkeep +0 -0
  51. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/seeds/.gitkeep +0 -0
  52. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/README.md +0 -0
  53. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/analyses/.gitkeep +0 -0
  54. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/dbt_project.yml +0 -0
  55. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/macros/.gitkeep +0 -0
  56. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/macros/materialization/divided_view.sql +0 -0
  57. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/macros/materialization/get_imported_databases.sql +0 -0
  58. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_column_level.sql +0 -0
  59. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_column_level.yml +0 -0
  60. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_table_level.sql +0 -0
  61. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/models/quollio_lineage_table_level.yml +0 -0
  62. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/models/quollio_sqllineage_sources.sql +0 -0
  63. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/models/quollio_sqllineage_sources.yml +0 -0
  64. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.sql +0 -0
  65. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.yml +0 -0
  66. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.sql +0 -0
  67. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.yml +0 -0
  68. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/models/sources.yml +0 -0
  69. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/packages_hub.yml +0 -0
  70. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/packages_local.yml +0 -0
  71. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/profiles/profiles_template.yml +0 -0
  72. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/seeds/.gitkeep +0 -0
  73. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/dbt_projects/snowflake/snapshots/.gitkeep +0 -0
  74. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/helper/__init__.py +0 -0
  75. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/helper/core.py +0 -0
  76. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/helper/env_default.py +0 -0
  77. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/helper/log.py +0 -0
  78. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/helper/log_utils.py +0 -0
  79. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/models/avroasset.py +0 -0
  80. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/models/qdc.py +0 -0
  81. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/profilers/__init__.py +0 -0
  82. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/profilers/databricks.py +0 -0
  83. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/profilers/lineage.py +0 -0
  84. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/profilers/qdc.py +0 -0
  85. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/profilers/snowflake.py +0 -0
  86. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/profilers/sqllineage.py +0 -0
  87. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/profilers/teradata/lineage.py +0 -0
  88. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/profilers/teradata/stats.py +0 -0
  89. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/repository/__init__.py +0 -0
  90. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/repository/bigquery.py +0 -0
  91. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/repository/databricks.py +0 -0
  92. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/repository/dbt.py +0 -0
  93. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/repository/qdc.py +0 -0
  94. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/repository/redshift.py +0 -0
  95. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/repository/snowflake.py +0 -0
  96. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/repository/ssm.py +0 -0
  97. {quollio_core-0.6.5 → quollio_core-0.8.0}/quollio_core/repository/teradata.py +0 -0
@@ -1,14 +1,13 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: quollio-core
3
- Version: 0.6.5
3
+ Version: 0.8.0
4
4
  Summary: Quollio Core
5
5
  Author-email: quollio-dev <qt.dev@quollio.com>
6
6
  Maintainer-email: RyoAriyama <ryo.arym@gmail.com>, tharuta <35373297+TakumiHaruta@users.noreply.github.com>
7
- Requires-Python: >=3.8
7
+ Requires-Python: >=3.9
8
8
  Description-Content-Type: text/markdown
9
9
  Classifier: Programming Language :: Python
10
10
  Classifier: Programming Language :: Python :: 3
11
- Classifier: Programming Language :: Python :: 3.8
12
11
  Classifier: Programming Language :: Python :: 3.9
13
12
  Classifier: Programming Language :: Python :: 3.10
14
13
  Classifier: Programming Language :: Python :: 3.11
@@ -29,7 +28,7 @@ Requires-Dist: PyYAML==6.0.1
29
28
  Requires-Dist: requests==2.31.0
30
29
  Requires-Dist: pyjwt==2.8.0
31
30
  Requires-Dist: redshift-connector==2.0.915
32
- Requires-Dist: snowflake-connector-python==3.5.0
31
+ Requires-Dist: snowflake-connector-python==3.15.0
33
32
  Requires-Dist: databricks-sdk==0.17.0
34
33
  Requires-Dist: databricks-sql-connector==2.9.5
35
34
  Requires-Dist: sqlglot==20.8.0
@@ -5,7 +5,7 @@ build-backend = "flit_core.buildapi"
5
5
  [project]
6
6
  name = "quollio-core"
7
7
  readme = "README.md"
8
- requires-python = ">=3.8"
8
+ requires-python = ">=3.9"
9
9
  license = {file = "LICENSE"}
10
10
  authors = [
11
11
  {name = "quollio-dev", email = "qt.dev@quollio.com"},
@@ -17,7 +17,6 @@ maintainers = [
17
17
  classifiers = [
18
18
  "Programming Language :: Python",
19
19
  "Programming Language :: Python :: 3",
20
- "Programming Language :: Python :: 3.8",
21
20
  "Programming Language :: Python :: 3.9",
22
21
  "Programming Language :: Python :: 3.10",
23
22
  "Programming Language :: Python :: 3.11",
@@ -40,7 +39,7 @@ dependencies = [
40
39
  ,"requests==2.31.0"
41
40
  ,"pyjwt==2.8.0"
42
41
  ,"redshift-connector==2.0.915"
43
- ,"snowflake-connector-python==3.5.0"
42
+ ,"snowflake-connector-python==3.15.0"
44
43
  ,"databricks-sdk==0.17.0"
45
44
  ,"databricks-sql-connector==2.9.5"
46
45
  ,"sqlglot==20.8.0"
@@ -1,4 +1,4 @@
1
1
  """Quollio Core"""
2
2
 
3
- __version__ = "0.6.5"
3
+ __version__ = "0.8.0"
4
4
  __author__ = "Quollio Technologies, Inc"
@@ -3,10 +3,12 @@ import json
3
3
 
4
4
  from google.auth.credentials import Credentials
5
5
 
6
+ from quollio_core.helper.core import is_valid_domain
6
7
  from quollio_core.helper.env_default import env_default
7
8
  from quollio_core.helper.log_utils import configure_logging, error_handling_decorator, logger
8
9
  from quollio_core.profilers.bigquery import bigquery_table_lineage, bigquery_table_stats
9
- from quollio_core.repository import qdc
10
+ from quollio_core.profilers.qdc import gen_existing_global_id_dict, get_avro_file_content
11
+ from quollio_core.repository import qdc, ssm
10
12
  from quollio_core.repository.bigquery import BigQueryClient, get_credentials, get_org_id
11
13
 
12
14
 
@@ -30,8 +32,15 @@ def load_lineage(
30
32
  org_id: str,
31
33
  credentials: Credentials,
32
34
  qdc_client: qdc.QDCExternalAPIClient,
35
+ enable_multi_projects: str,
33
36
  ) -> None:
34
37
  logger.info("Loading lineage data.")
38
+ file_content = get_avro_file_content(
39
+ tenant_id=tenant_id,
40
+ account_id=org_id,
41
+ qdc_client=qdc_client,
42
+ )
43
+ existing_global_ids = gen_existing_global_id_dict(avro_content=file_content)
35
44
  bigquery_table_lineage(
36
45
  qdc_client=qdc_client,
37
46
  tenant_id=tenant_id,
@@ -39,6 +48,8 @@ def load_lineage(
39
48
  regions=regions,
40
49
  credentials=credentials,
41
50
  org_id=org_id,
51
+ existing_global_ids=existing_global_ids,
52
+ enable_multi_projects=enable_multi_projects,
42
53
  )
43
54
  logger.info("Lineage data loaded successfully.")
44
55
 
@@ -146,6 +157,27 @@ if __name__ == "__main__":
146
157
  help="Comma-separated list of dataplex stats tables - <project_id>.<dataset_id>.<table_id>",
147
158
  )
148
159
 
160
+ parser.add_argument(
161
+ "--enable_multi_projects",
162
+ type=str,
163
+ choices=["ENABLED", "DISABLED"],
164
+ action=env_default("ENABLE_MULTI_PROJECTS"),
165
+ default="DISABLED",
166
+ required=False,
167
+ help="Whether to enable multi-projects support. If set to 'true', \
168
+ the script will load lineage and stats from all projects accessible by the credentials. Default is 'false'.",
169
+ )
170
+ parser.add_argument(
171
+ "--external_api_access",
172
+ type=str,
173
+ choices=["PUBLIC", "VPC_ENDPOINT"],
174
+ action=env_default("EXTERNAL_API_ACCESS"),
175
+ default="PUBLIC",
176
+ required=False,
177
+ help="Access method to Quollio API. Default 'PUBLIC'. Choose 'VPC_ENDPOINT'\
178
+ if you use API Gateway VPC Endpoint, DefaultValue is set to PUBLIC.",
179
+ )
180
+
149
181
  args = parser.parse_args()
150
182
 
151
183
  # Validate that dataplex_stats_tables is provided if load_stats is in commands
@@ -154,9 +186,21 @@ if __name__ == "__main__":
154
186
 
155
187
  configure_logging(args.log_level)
156
188
 
189
+ api_url = args.api_url
190
+ if args.external_api_access == "VPC_ENDPOINT":
191
+ logger.debug("Using VPC Endpoint for Quollio API access")
192
+ api_url, err = ssm.get_parameter_by_assume_role(args.api_url)
193
+ if err is not None:
194
+ logger.error("Fail to ssm.get_parameter_by_assume_role. {err}".format(err=err))
195
+ raise Exception("Fail to ssm.get_parameter_by_assume_role. {err}".format(err=err))
196
+ is_domain_valid = is_valid_domain(domain=api_url, domain_type=args.external_api_access)
197
+ if not is_domain_valid:
198
+ raise ValueError("The format of quollio API URL is invalid. The URL must end with `.com` or /api.")
199
+ logger.debug("API URL: %s", api_url)
200
+
157
201
  credentials = initialize_credentials(args.credentials_json)
158
202
  org_id = initialize_org_id(args.credentials_json)
159
- qdc_client = qdc.initialize_qdc_client(args.api_url, args.client_id, args.client_secret)
203
+ qdc_client = qdc.initialize_qdc_client(api_url, args.client_id, args.client_secret)
160
204
  bq_client = initialize_bq_client(credentials, args.project_id)
161
205
  if args.project_id is None:
162
206
  args.project_id = json.loads(args.credentials_json)["project_id"]
@@ -170,6 +214,7 @@ if __name__ == "__main__":
170
214
  org_id=org_id,
171
215
  credentials=credentials,
172
216
  qdc_client=qdc_client,
217
+ enable_multi_projects=args.enable_multi_projects,
173
218
  )
174
219
 
175
220
  if "load_stats" in args.commands:
@@ -3,7 +3,7 @@ import logging
3
3
  import os
4
4
  import shutil
5
5
 
6
- from quollio_core.helper.core import setup_dbt_profile, trim_prefix
6
+ from quollio_core.helper.core import is_valid_domain, setup_dbt_profile, trim_prefix
7
7
  from quollio_core.helper.env_default import env_default
8
8
  from quollio_core.helper.log import set_log_level
9
9
  from quollio_core.profilers.databricks import (
@@ -13,7 +13,7 @@ from quollio_core.profilers.databricks import (
13
13
  )
14
14
  from quollio_core.profilers.stats import get_column_stats_items
15
15
  from quollio_core.repository import databricks as db
16
- from quollio_core.repository import dbt, qdc
16
+ from quollio_core.repository import dbt, qdc, ssm
17
17
 
18
18
  logger = logging.getLogger(__name__)
19
19
 
@@ -256,6 +256,16 @@ if __name__ == "__main__":
256
256
  required=False,
257
257
  help="Whether to ingest column lineage into QDIC or not. Default value is False",
258
258
  )
259
+ parser.add_argument(
260
+ "--external_api_access",
261
+ type=str,
262
+ choices=["PUBLIC", "VPC_ENDPOINT"],
263
+ action=env_default("EXTERNAL_API_ACCESS"),
264
+ default="PUBLIC",
265
+ required=False,
266
+ help="Access method to Quollio API. Default 'PUBLIC'. Choose 'VPC_ENDPOINT'\
267
+ if you use API Gateway VPC Endpoint, DefaultValue is set to PUBLIC.",
268
+ )
259
269
 
260
270
  stats_items = get_column_stats_items()
261
271
  parser.add_argument(
@@ -294,9 +304,21 @@ if __name__ == "__main__":
294
304
  dbt_macro_source=args.dbt_macro_source,
295
305
  )
296
306
 
307
+ api_url = args.api_url
308
+ if args.external_api_access == "VPC_ENDPOINT":
309
+ logger.debug("Using VPC Endpoint for Quollio API access")
310
+ api_url, err = ssm.get_parameter_by_assume_role(args.api_url)
311
+ if err is not None:
312
+ logger.error("Fail to ssm.get_parameter_by_assume_role. {err}".format(err=err))
313
+ raise Exception("Fail to ssm.get_parameter_by_assume_role. {err}".format(err=err))
314
+ is_domain_valid = is_valid_domain(domain=api_url, domain_type=args.external_api_access)
315
+ if not is_domain_valid:
316
+ raise ValueError("The format of quollio API URL is invalid. The URL must end with `.com` or /api.")
317
+ logger.debug("API URL: %s", api_url)
318
+
297
319
  if "load_lineage" in args.commands:
298
320
  qdc_client = qdc.QDCExternalAPIClient(
299
- base_url=args.api_url, client_id=args.client_id, client_secret=args.client_secret
321
+ base_url=api_url, client_id=args.client_id, client_secret=args.client_secret
300
322
  )
301
323
  load_lineage(
302
324
  conn=conn,
@@ -308,7 +330,7 @@ if __name__ == "__main__":
308
330
 
309
331
  if "load_stats" in args.commands:
310
332
  qdc_client = qdc.QDCExternalAPIClient(
311
- base_url=args.api_url, client_id=args.client_id, client_secret=args.client_secret
333
+ base_url=api_url, client_id=args.client_id, client_secret=args.client_secret
312
334
  )
313
335
  databricks_column_stats(
314
336
  conn=conn,
@@ -1,15 +1,25 @@
1
+ import io
2
+ import os
1
3
  from typing import Dict, List
2
4
 
5
+ from fastavro import writer
3
6
  from google.auth.credentials import Credentials
4
7
 
8
+ from quollio_core.helper.core import new_global_id
5
9
  from quollio_core.helper.log_utils import error_handling_decorator, logger
6
- from quollio_core.profilers.lineage import gen_table_lineage_payload, parse_bigquery_table_lineage
10
+ from quollio_core.models.avroasset import AvroAsset
11
+ from quollio_core.models.qdc import GetImportURLRequest
12
+ from quollio_core.profilers.lineage import (
13
+ gen_table_avro_lineage_payload,
14
+ gen_table_lineage_payload,
15
+ parse_bigquery_table_lineage,
16
+ )
7
17
  from quollio_core.profilers.stats import gen_table_stats_payload
8
18
  from quollio_core.repository import qdc
9
19
  from quollio_core.repository.bigquery import BigQueryClient, GCPLineageClient, get_entitiy_reference, get_search_request
20
+ from quollio_core.repository.cloud_resource_manager import CloudResourceManagerClient
10
21
 
11
22
 
12
- @error_handling_decorator
13
23
  def bigquery_table_lineage(
14
24
  qdc_client: qdc.QDCExternalAPIClient,
15
25
  tenant_id: str,
@@ -17,34 +27,84 @@ def bigquery_table_lineage(
17
27
  regions: list,
18
28
  org_id: str,
19
29
  credentials: Credentials,
30
+ existing_global_ids: Dict[str, bool],
31
+ enable_multi_projects: str = "DISABLED",
20
32
  ) -> None:
21
33
  lineage_client = GCPLineageClient(credentials)
22
- bq_client = BigQueryClient(credentials, project_id)
23
-
24
- datasets = bq_client.list_dataset_ids()
25
- all_tables = generate_table_list(bq_client, datasets)
26
- lineage_links = generate_lineage_links(all_tables, lineage_client, project_id, regions)
27
- lineage_links = parse_bigquery_table_lineage(lineage_links)
28
- logger.debug("The following resources will be ingested. %s", lineage_links)
29
-
30
- update_table_lineage_inputs = gen_table_lineage_payload(tenant_id=tenant_id, endpoint=org_id, tables=lineage_links)
31
-
32
- req_count = 0
33
- for update_table_lineage_input in update_table_lineage_inputs:
34
- logger.info(
35
- "Generating table lineage. downstream: %s -> %s -> %s. upstream: %s",
36
- update_table_lineage_input.downstream_database_name,
37
- update_table_lineage_input.downstream_schema_name,
38
- update_table_lineage_input.downstream_table_name,
39
- update_table_lineage_input.upstreams.as_dict(),
34
+ crm_client = CloudResourceManagerClient(credentials)
35
+
36
+ target_project_ids = []
37
+ if enable_multi_projects == "ENABLED":
38
+ try:
39
+ target_projects = crm_client.list_projects()
40
+ except Exception as e:
41
+ raise Exception(f"ListProjects by cloud resource manager failed. Err. {str(e)}")
42
+
43
+ for target_project in target_projects["projects"]:
44
+ if target_project is None:
45
+ logger.warning("projects.Projects returns None. Proceed to loop project value")
46
+ continue
47
+
48
+ target_project_id = target_project.get("projectId", "")
49
+ if target_project_id == "":
50
+ logger.warning("projects.Projects is empty string. Proceed to loop project value")
51
+ continue
52
+
53
+ target_project_ids.append(target_project_id)
54
+ else:
55
+ target_project_ids.append(project_id)
56
+
57
+ update_table_lineage_inputs = []
58
+ for target_project_id in target_project_ids:
59
+ bq_client = BigQueryClient(credentials, target_project_id)
60
+ datasets = bq_client.list_dataset_ids()
61
+ all_tables = generate_table_list(bq_client, datasets)
62
+ lineage_links = generate_lineage_links(all_tables, lineage_client, target_project_id, regions)
63
+ lineage_links = parse_bigquery_table_lineage(lineage_links)
64
+ logger.debug("The following resources will be ingested. %s", lineage_links)
65
+
66
+ update_table_lineage_input = gen_table_lineage_payload(
67
+ tenant_id=tenant_id, endpoint=org_id, tables=lineage_links
40
68
  )
41
- status_code = qdc_client.update_lineage_by_id(
42
- global_id=update_table_lineage_input.downstream_global_id,
43
- payload=update_table_lineage_input.upstreams.as_dict(),
69
+ update_table_lineage_input = gen_table_avro_lineage_payload(
70
+ tenant_id=tenant_id,
71
+ endpoint=org_id,
72
+ tables=lineage_links,
73
+ existing_global_ids=existing_global_ids,
44
74
  )
45
- if status_code == 200:
46
- req_count += 1
47
- logger.info("Generating table lineage is finished. %s lineages are ingested.", req_count)
75
+ update_table_lineage_inputs.extend(update_table_lineage_input)
76
+
77
+ stack_name = os.getenv("CF_STACK")
78
+ import_req = GetImportURLRequest(
79
+ service_name="bigquery",
80
+ source_name=stack_name,
81
+ file_name="{name}.avro".format(name=stack_name),
82
+ override_logical_name="false",
83
+ update_mode="partial",
84
+ )
85
+ datasource_id = new_global_id(tenant_id=tenant_id, cluster_id=org_id, data_id="", data_type="data_source")
86
+ logger.debug("Datasource id: {dsrc_id}".format(dsrc_id=datasource_id))
87
+
88
+ import_res = qdc_client.get_import_url(datasource_id=datasource_id, payload=import_req)
89
+ if import_res is None:
90
+ logger.error("get_import_url failed. Please retry `load_lineage` again")
91
+ return
92
+ logger.debug("ImportResponse: {res}".format(res=import_res))
93
+
94
+ avro_schema = AvroAsset.avro_schema_to_python()
95
+
96
+ buffer = io.BytesIO()
97
+ writer(buffer, avro_schema, update_table_lineage_inputs)
98
+
99
+ res = qdc_client.upload_file(
100
+ url=import_res.location,
101
+ metadata=import_res.datasource_metadata_response_body,
102
+ buffer=buffer.getbuffer().tobytes(),
103
+ )
104
+
105
+ if res == 200:
106
+ logger.info("Upload table lineage is finished.")
107
+ return
48
108
 
49
109
 
50
110
  @error_handling_decorator
@@ -1,10 +1,17 @@
1
+ import io
1
2
  import logging
2
- from typing import List
3
+ import os
4
+ from typing import Dict, List
3
5
 
4
- from quollio_core.profilers.lineage import gen_table_lineage_payload, gen_table_lineage_payload_inputs
6
+ from fastavro import writer
7
+
8
+ from quollio_core.helper.core import new_global_id
9
+ from quollio_core.models.avroasset import AvroAsset
10
+ from quollio_core.models.qdc import GetImportURLRequest
11
+ from quollio_core.profilers.lineage import gen_table_avro_lineage_payload, gen_table_lineage_payload_inputs
5
12
  from quollio_core.profilers.sqllineage import SQLLineage
6
13
  from quollio_core.profilers.stats import (
7
- gen_table_stats_payload_from_tuple,
14
+ gen_table_stats_avro_payload_from_tuple,
8
15
  get_is_target_stats_items,
9
16
  render_sql_for_stats,
10
17
  )
@@ -18,6 +25,7 @@ def redshift_table_level_lineage(
18
25
  qdc_client: qdc.QDCExternalAPIClient,
19
26
  tenant_id: str,
20
27
  dbt_table_name: str,
28
+ existing_global_ids: Dict[str, bool],
21
29
  ) -> None:
22
30
  with redshift.RedshiftQueryExecutor(config=conn) as redshift_executor:
23
31
  results = redshift_executor.get_query_results(
@@ -34,28 +42,39 @@ def redshift_table_level_lineage(
34
42
  )
35
43
  lineage_payload_inputs = gen_table_lineage_payload_inputs(input_data=results)
36
44
 
37
- update_table_lineage_inputs = gen_table_lineage_payload(
45
+ update_table_lineage_inputs = gen_table_avro_lineage_payload(
38
46
  tenant_id=tenant_id,
39
47
  endpoint=conn.host,
40
48
  tables=lineage_payload_inputs,
49
+ existing_global_ids=existing_global_ids,
50
+ )
51
+ stack_name = os.getenv("CF_STACK")
52
+ import_req = GetImportURLRequest(
53
+ service_name="redshift",
54
+ source_name=stack_name,
55
+ file_name="{name}.avro".format(name=stack_name),
56
+ override_logical_name="false",
57
+ update_mode="partial",
41
58
  )
59
+ datasource_id = new_global_id(tenant_id=tenant_id, cluster_id=conn.host, data_id="", data_type="data_source")
60
+ logger.debug("Datasource id: {dsrc_id}".format(dsrc_id=datasource_id))
61
+ import_res = qdc_client.get_import_url(datasource_id=datasource_id, payload=import_req)
62
+ if import_res is None:
63
+ logger.error("get_import_url failed. Please retry `load_lineage` again")
64
+ return
65
+ logger.debug("ImportResponse: {res}".format(res=import_res))
42
66
 
43
- req_count = 0
44
- for update_table_lineage_input in update_table_lineage_inputs:
45
- logger.info(
46
- "Generating table lineage. downstream: {db} -> {schema} -> {table}".format(
47
- db=update_table_lineage_input.downstream_database_name,
48
- schema=update_table_lineage_input.downstream_schema_name,
49
- table=update_table_lineage_input.downstream_table_name,
50
- )
51
- )
52
- status_code = qdc_client.update_lineage_by_id(
53
- global_id=update_table_lineage_input.downstream_global_id,
54
- payload=update_table_lineage_input.upstreams.as_dict(),
55
- )
56
- if status_code == 200:
57
- req_count += 1
58
- logger.info(f"Generating table lineage is finished. {req_count} lineages are ingested.")
67
+ avro_schema = AvroAsset.avro_schema_to_python()
68
+
69
+ buffer = io.BytesIO()
70
+ writer(buffer, avro_schema, update_table_lineage_inputs)
71
+ res = qdc_client.upload_file(
72
+ url=import_res.location,
73
+ metadata=import_res.datasource_metadata_response_body,
74
+ buffer=buffer.getbuffer().tobytes(),
75
+ )
76
+ if res == 200:
77
+ logger.info("Upload table lineage is finished.")
59
78
  return
60
79
 
61
80
 
@@ -82,6 +101,7 @@ def redshift_table_stats(
82
101
  qdc_client: qdc.QDCExternalAPIClient,
83
102
  tenant_id: str,
84
103
  stats_items: List[str],
104
+ existing_global_ids: Dict[str, bool],
85
105
  ) -> None:
86
106
  is_aggregate_items = get_is_target_stats_items(stats_items=stats_items)
87
107
  with redshift.RedshiftQueryExecutor(config=conn) as redshift_executor:
@@ -92,7 +112,7 @@ def redshift_table_stats(
92
112
  stats_views = redshift_executor.get_query_results(query=stats_query)
93
113
  logger.info("Found %s for table statistics.", len(stats_views))
94
114
 
95
- req_count = 0
115
+ update_stats_inputs = list()
96
116
  for stats_view in stats_views:
97
117
  table_fqn = "{catalog}.{schema}.{table}".format(
98
118
  catalog=stats_view[0], schema=stats_view[1], table=stats_view[2]
@@ -100,23 +120,39 @@ def redshift_table_stats(
100
120
  stats_query = render_sql_for_stats(is_aggregate_items=is_aggregate_items, table_fqn=table_fqn)
101
121
  logger.debug(f"The following sql will be fetched to retrieve stats values. {stats_query}")
102
122
  stats_result = redshift_executor.get_query_results(query=stats_query)
103
- payloads = gen_table_stats_payload_from_tuple(tenant_id=tenant_id, endpoint=conn.host, stats=stats_result)
104
- for payload in payloads:
105
- logger.info(
106
- "Generating table stats. asset: {db} -> {schema} -> {table} -> {column}".format(
107
- db=payload.db,
108
- schema=payload.schema,
109
- table=payload.table,
110
- column=payload.column,
111
- )
112
- )
113
- status_code = qdc_client.update_stats_by_id(
114
- global_id=payload.global_id,
115
- payload=payload.body.get_column_stats(),
116
- )
117
- if status_code == 200:
118
- req_count += 1
119
- logger.info(f"Generating table stats is finished. {req_count} stats are ingested.")
123
+ payload = gen_table_stats_avro_payload_from_tuple(
124
+ tenant_id=tenant_id, endpoint=conn.host, stats=stats_result, existing_global_ids=existing_global_ids
125
+ )
126
+ update_stats_inputs += payload
127
+
128
+ stack_name = os.getenv("CF_STACK")
129
+ import_req = GetImportURLRequest(
130
+ service_name="redshift",
131
+ source_name=stack_name,
132
+ file_name="{name}.avro".format(name=stack_name),
133
+ override_logical_name="false",
134
+ update_mode="partial",
135
+ )
136
+ datasource_id = new_global_id(tenant_id=tenant_id, cluster_id=conn.host, data_id="", data_type="data_source")
137
+ logger.debug("Datasource id: {dsrc_id}".format(dsrc_id=datasource_id))
138
+ import_res = qdc_client.get_import_url(datasource_id=datasource_id, payload=import_req)
139
+ if import_res is None:
140
+ logger.error("get_import_url failed. Please retry load_stats again")
141
+ return
142
+ logger.debug("ImportResponse: {res}".format(res=import_res))
143
+
144
+ avro_schema = AvroAsset.avro_schema_to_python()
145
+
146
+ buffer = io.BytesIO()
147
+ writer(buffer, avro_schema, update_stats_inputs)
148
+ res = qdc_client.upload_file(
149
+ url=import_res.location,
150
+ metadata=import_res.datasource_metadata_response_body,
151
+ buffer=buffer.getbuffer().tobytes(),
152
+ )
153
+ if res == 200:
154
+ logger.info("Generating table stats is finished.")
155
+
120
156
  return
121
157
 
122
158
 
@@ -147,6 +147,43 @@ def gen_table_stats_payload(tenant_id: str, endpoint: str, stats: List[Dict[str,
147
147
  return payloads
148
148
 
149
149
 
150
+ def gen_table_stats_avro_payload_from_tuple(
151
+ tenant_id: str, endpoint: str, stats: Tuple[List[str]], existing_global_ids: Dict[str, bool]
152
+ ) -> List[Dict[str, str]]:
153
+ payloads = list()
154
+ for stat in stats:
155
+ db_name, schema_name, table_name, column_name = stat[:4]
156
+
157
+ global_id_arg = "{db}{schema}{table}{column}".format(
158
+ db=db_name, schema=schema_name, table=table_name, column=column_name
159
+ )
160
+ table_global_id = new_global_id(
161
+ tenant_id=tenant_id, cluster_id=endpoint, data_id=global_id_arg, data_type="column"
162
+ )
163
+
164
+ if existing_global_ids.get(table_global_id) is not True:
165
+ continue
166
+
167
+ avro_assets = AvroAsset(
168
+ id=table_global_id,
169
+ object_type="column",
170
+ parents=[db_name, schema_name, table_name],
171
+ name=column_name,
172
+ stats_max=convert_value_type(stat[4], True),
173
+ stats_min=convert_value_type(stat[5], True),
174
+ stats_mean=convert_value_type(stat[8], True),
175
+ stats_median=convert_value_type(stat[9], True),
176
+ stats_mode=convert_value_type(stat[10], True),
177
+ stats_stddev=convert_value_type(stat[11], True),
178
+ stats_number_of_null=convert_value_type(stat[6], True),
179
+ stats_number_of_unique=convert_value_type(stat[7], True),
180
+ )
181
+
182
+ payloads.append(avro_assets.to_dict())
183
+
184
+ return payloads
185
+
186
+
150
187
  def gen_table_stats_payload_from_tuple(
151
188
  tenant_id: str, endpoint: str, stats: Tuple[List[str]]
152
189
  ) -> List[Dict[str, Union[str, List[Dict[str, str]]]]]:
@@ -3,16 +3,17 @@ import logging
3
3
  import os
4
4
  import shutil
5
5
 
6
- from quollio_core.helper.core import setup_dbt_profile
6
+ from quollio_core.helper.core import is_valid_domain, setup_dbt_profile
7
7
  from quollio_core.helper.env_default import env_default
8
8
  from quollio_core.helper.log import set_log_level
9
+ from quollio_core.profilers.qdc import gen_existing_global_id_dict, get_avro_file_content
9
10
  from quollio_core.profilers.redshift import (
10
11
  redshift_table_level_lineage,
11
12
  redshift_table_level_sqllineage,
12
13
  redshift_table_stats,
13
14
  )
14
15
  from quollio_core.profilers.stats import get_column_stats_items
15
- from quollio_core.repository import dbt, qdc, redshift
16
+ from quollio_core.repository import dbt, qdc, redshift, ssm
16
17
 
17
18
  logger = logging.getLogger(__name__)
18
19
 
@@ -84,11 +85,20 @@ def load_lineage(
84
85
  tenant_id: str,
85
86
  ) -> None:
86
87
  logger.info("Generate redshift table to table lineage.")
88
+
89
+ file_content = get_avro_file_content(
90
+ tenant_id=tenant_id,
91
+ account_id=conn.host,
92
+ qdc_client=qdc_client,
93
+ )
94
+ existing_global_ids = gen_existing_global_id_dict(avro_content=file_content)
95
+
87
96
  redshift_table_level_lineage(
88
97
  conn=conn,
89
98
  qdc_client=qdc_client,
90
99
  tenant_id=tenant_id,
91
100
  dbt_table_name="quollio_lineage_table_level",
101
+ existing_global_ids=existing_global_ids,
92
102
  )
93
103
 
94
104
  logger.info("Generate redshift view level lineage.")
@@ -97,6 +107,7 @@ def load_lineage(
97
107
  qdc_client=qdc_client,
98
108
  tenant_id=tenant_id,
99
109
  dbt_table_name="quollio_lineage_view_level",
110
+ existing_global_ids=existing_global_ids,
100
111
  )
101
112
 
102
113
  logger.info("Lineage data is successfully loaded.")
@@ -115,12 +126,20 @@ def load_stats(
115
126
  if stats_items is None:
116
127
  raise ValueError("No stats items are not selected. Please specify any value to `stats_items` param.")
117
128
 
129
+ file_content = get_avro_file_content(
130
+ tenant_id=tenant_id,
131
+ account_id=conn.host,
132
+ qdc_client=qdc_client,
133
+ )
134
+ existing_global_ids = gen_existing_global_id_dict(avro_content=file_content)
135
+
118
136
  logger.info("The following values will be aggregated. {stats_items}".format(stats_items=stats_items))
119
137
  redshift_table_stats(
120
138
  conn=conn,
121
139
  qdc_client=qdc_client,
122
140
  tenant_id=tenant_id,
123
141
  stats_items=stats_items,
142
+ existing_global_ids=existing_global_ids,
124
143
  )
125
144
 
126
145
  logger.info("Stats data is successfully loaded.")
@@ -252,6 +271,7 @@ if __name__ == "__main__":
252
271
  type=str,
253
272
  choices=["debug", "info", "warn", "error", "none"],
254
273
  action=env_default("LOG_LEVEL"),
274
+ default="info",
255
275
  required=False,
256
276
  help="The log level for dbt commands. Default value is info",
257
277
  )
@@ -285,6 +305,16 @@ if __name__ == "__main__":
285
305
  required=False,
286
306
  help="The client secrete that is created on Quollio console to let clients access Quollio External API",
287
307
  )
308
+ parser.add_argument(
309
+ "--external_api_access",
310
+ type=str,
311
+ choices=["PUBLIC", "VPC_ENDPOINT"],
312
+ action=env_default("EXTERNAL_API_ACCESS"),
313
+ default="PUBLIC",
314
+ required=False,
315
+ help="Access method to Quollio API. Default 'PUBLIC'. Choose 'VPC_ENDPOINT'\
316
+ if you use API Gateway VPC Endpoint, DefaultValue is set to PUBLIC.",
317
+ )
288
318
 
289
319
  stats_items = get_column_stats_items()
290
320
  parser.add_argument(
@@ -323,11 +353,24 @@ if __name__ == "__main__":
323
353
  log_level=args.log_level,
324
354
  dbt_macro_source=args.dbt_macro_source,
325
355
  )
356
+
357
+ api_url = args.api_url
358
+ if args.external_api_access == "VPC_ENDPOINT":
359
+ logger.debug("Using VPC Endpoint for Quollio API access")
360
+ api_url, err = ssm.get_parameter_by_assume_role(args.api_url)
361
+ if err is not None:
362
+ logger.error("Fail to ssm.get_parameter_by_assume_role. {err}".format(err=err))
363
+ raise Exception("Fail to ssm.get_parameter_by_assume_role. {err}".format(err=err))
364
+ is_domain_valid = is_valid_domain(domain=api_url, domain_type=args.external_api_access)
365
+ if not is_domain_valid:
366
+ raise ValueError("The format of quollio API URL is invalid. The URL must end with `.com` or /api.")
367
+ logger.debug("API URL: %s", api_url)
368
+
326
369
  if "load_lineage" in args.commands:
327
370
  qdc_client = qdc.QDCExternalAPIClient(
328
371
  client_id=args.client_id,
329
372
  client_secret=args.client_secret,
330
- base_url=args.api_url,
373
+ base_url=api_url,
331
374
  )
332
375
  load_lineage(
333
376
  conn=conn,
@@ -338,7 +381,7 @@ if __name__ == "__main__":
338
381
  qdc_client = qdc.QDCExternalAPIClient(
339
382
  client_id=args.client_id,
340
383
  client_secret=args.client_secret,
341
- base_url=args.api_url,
384
+ base_url=api_url,
342
385
  )
343
386
  load_stats(
344
387
  conn=conn,
@@ -348,7 +391,7 @@ if __name__ == "__main__":
348
391
  )
349
392
  if "load_sqllineage" in args.commands:
350
393
  qdc_client = qdc.QDCExternalAPIClient(
351
- base_url=args.api_url,
394
+ base_url=api_url,
352
395
  client_id=args.client_id,
353
396
  client_secret=args.client_secret,
354
397
  )
@@ -0,0 +1,25 @@
1
+ from google.oauth2.service_account import Credentials
2
+ from googleapiclient.discovery import build
3
+
4
+
5
+ class CloudResourceManagerClient:
6
+ """Client to interact with the Cloud Resource Manager API."""
7
+
8
+ def __init__(self, credentials: Credentials) -> None:
9
+ """Initialize the Cloud Resource Manager client with provided credentials."""
10
+ self.client = self.__initialize(credentials=credentials)
11
+
12
+ def __initialize(self, credentials: Credentials):
13
+ return build("cloudresourcemanager", "v1", credentials=credentials)
14
+
15
+ def list_projects(self):
16
+ """List all projects accessible with the current credentials."""
17
+ request = self.client.projects().list()
18
+ response = request.execute()
19
+ return response
20
+
21
+ def get_project(self, project_id: str):
22
+ """Get a specific project by project ID."""
23
+ request = self.client.projects().get(projectId=project_id)
24
+ response = request.execute()
25
+ return response
@@ -312,6 +312,7 @@ if __name__ == "__main__":
312
312
  type=str,
313
313
  choices=["debug", "info", "warn", "error", "none"],
314
314
  action=env_default("LOG_LEVEL"),
315
+ default="info",
315
316
  required=False,
316
317
  help="The log level for dbt commands. Default value is info",
317
318
  )
@@ -442,6 +443,7 @@ if __name__ == "__main__":
442
443
  )
443
444
  api_url = args.api_url
444
445
  if args.external_api_access == "VPC_ENDPOINT":
446
+ logger.debug("Using VPC Endpoint for Quollio API access")
445
447
  api_url, err = ssm.get_parameter_by_assume_role(args.api_url)
446
448
  if err is not None:
447
449
  logger.error("Fail to ssm.get_parameter_by_assume_role. {err}".format(err=err))
@@ -449,7 +451,8 @@ if __name__ == "__main__":
449
451
 
450
452
  is_domain_valid = is_valid_domain(domain=api_url, domain_type=args.external_api_access)
451
453
  if not is_domain_valid:
452
- raise ValueError("The format of quollio API URL is invalid. The URL must end with `.com`")
454
+ raise ValueError("The format of quollio API URL is invalid. The URL must end with `.com` or /api.")
455
+ logger.debug("API URL: %s", api_url)
453
456
 
454
457
  if "load_lineage" in args.commands:
455
458
  qdc_client = qdc.QDCExternalAPIClient(
@@ -1,12 +1,13 @@
1
1
  import argparse
2
2
  import json
3
3
 
4
+ from quollio_core.helper.core import is_valid_domain
4
5
  from quollio_core.helper.env_default import env_default
5
6
  from quollio_core.helper.log_utils import configure_logging, error_handling_decorator, logger
6
7
  from quollio_core.profilers.stats import get_column_stats_items
7
8
  from quollio_core.profilers.teradata.lineage import load_lineage
8
9
  from quollio_core.profilers.teradata.stats import load_stats
9
- from quollio_core.repository import qdc
10
+ from quollio_core.repository import qdc, ssm
10
11
  from quollio_core.repository import teradata as teradata_repo
11
12
 
12
13
  DEFAULT_SYSTEM_DATABASES = [
@@ -197,6 +198,16 @@ def main() -> None:
197
198
  help="Name of the Teradata system database.\
198
199
  Default is DBC",
199
200
  )
201
+ parser.add_argument(
202
+ "--external_api_access",
203
+ type=str,
204
+ choices=["PUBLIC", "VPC_ENDPOINT"],
205
+ action=env_default("EXTERNAL_API_ACCESS"),
206
+ default="PUBLIC",
207
+ required=False,
208
+ help="Access method to Quollio API. Default 'PUBLIC'. Choose 'VPC_ENDPOINT'\
209
+ if you use API Gateway VPC Endpoint, DefaultValue is set to PUBLIC.",
210
+ )
200
211
 
201
212
  args = parser.parse_args()
202
213
 
@@ -217,7 +228,19 @@ def main() -> None:
217
228
  additional_params = {}
218
229
 
219
230
  logger.info("Initializing QDC client")
220
- qdc_client = qdc.initialize_qdc_client(args.api_url, args.client_id, args.client_secret)
231
+ api_url = args.api_url
232
+ if args.external_api_access == "VPC_ENDPOINT":
233
+ logger.debug("Using VPC Endpoint for Quollio API access")
234
+ api_url, err = ssm.get_parameter_by_assume_role(args.api_url)
235
+ if err is not None:
236
+ logger.error("Fail to ssm.get_parameter_by_assume_role. {err}".format(err=err))
237
+ raise Exception("Fail to ssm.get_parameter_by_assume_role. {err}".format(err=err))
238
+ is_domain_valid = is_valid_domain(domain=api_url, domain_type=args.external_api_access)
239
+ if not is_domain_valid:
240
+ raise ValueError("The format of quollio API URL is invalid. The URL must end with `.com` or /api.")
241
+
242
+ logger.debug("API URL: %s", api_url)
243
+ qdc_client = qdc.initialize_qdc_client(api_url, args.client_id, args.client_secret)
221
244
 
222
245
  logger.info("Initializing Teradata client")
223
246
  config = teradata_repo.TeradataConfig.from_dict(
File without changes
File without changes