quollio-core 0.4.6__py3-none-any.whl → 0.4.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- quollio_core/__init__.py +1 -1
- quollio_core/bigquery.py +114 -0
- quollio_core/bricks.py +26 -6
- quollio_core/dbt_projects/snowflake/macros/materialization/divided_view.sql +2 -1
- quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.sql +57 -20
- quollio_core/profilers/bigquery.py +81 -0
- quollio_core/profilers/databricks.py +6 -6
- quollio_core/profilers/lineage.py +14 -0
- quollio_core/repository/bigquery.py +61 -0
- {quollio_core-0.4.6.dist-info → quollio_core-0.4.7.dist-info}/METADATA +6 -1
- {quollio_core-0.4.6.dist-info → quollio_core-0.4.7.dist-info}/RECORD +13 -10
- {quollio_core-0.4.6.dist-info → quollio_core-0.4.7.dist-info}/LICENSE +0 -0
- {quollio_core-0.4.6.dist-info → quollio_core-0.4.7.dist-info}/WHEEL +0 -0
quollio_core/__init__.py
CHANGED
quollio_core/bigquery.py
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
import argparse
|
2
|
+
import json
|
3
|
+
import logging
|
4
|
+
|
5
|
+
from quollio_core.helper.env_default import env_default
|
6
|
+
from quollio_core.profilers.bigquery import bigquery_table_lineage
|
7
|
+
from quollio_core.repository import qdc
|
8
|
+
from quollio_core.repository.bigquery import get_credentials, get_org_id
|
9
|
+
|
10
|
+
logger = logging.getLogger(__name__)
|
11
|
+
|
12
|
+
|
13
|
+
def load_lineage(
|
14
|
+
qdc_client: qdc.QDCExternalAPIClient, project_id: str, regions: list, tenant_id: str, credentials: dict, org_id: str
|
15
|
+
):
|
16
|
+
bigquery_table_lineage(
|
17
|
+
qdc_client=qdc_client,
|
18
|
+
tenant_id=tenant_id,
|
19
|
+
project_id=project_id,
|
20
|
+
regions=regions,
|
21
|
+
credentials=credentials,
|
22
|
+
org_id=org_id,
|
23
|
+
)
|
24
|
+
|
25
|
+
|
26
|
+
if __name__ == "__main__":
|
27
|
+
parser = argparse.ArgumentParser(
|
28
|
+
prog="Quollio Intelligence Agent for Google BigQuery",
|
29
|
+
description="Collect lineage and stats from Google BigQuery and load to Quollio Data Catalog",
|
30
|
+
epilog="Copyright (c) 2024 Quollio Technologies, Inc.",
|
31
|
+
)
|
32
|
+
parser.add_argument(
|
33
|
+
"commands",
|
34
|
+
choices=["load_lineage"],
|
35
|
+
type=str,
|
36
|
+
nargs="+",
|
37
|
+
help="""
|
38
|
+
The command to execute.
|
39
|
+
'load_lineage': Load lineage data from Google Data Catalog to Quollio,
|
40
|
+
""",
|
41
|
+
)
|
42
|
+
parser.add_argument(
|
43
|
+
"--credentials",
|
44
|
+
type=str,
|
45
|
+
action=env_default("GOOGLE_APPLICATION_CREDENTIALS"),
|
46
|
+
help="Crendentials for Google Cloud Platform",
|
47
|
+
)
|
48
|
+
parser.add_argument(
|
49
|
+
"--tenant_id",
|
50
|
+
type=str,
|
51
|
+
action=env_default("TENANT_ID"),
|
52
|
+
required=False,
|
53
|
+
help="The tenant id (company id) where the lineage and stats are loaded",
|
54
|
+
)
|
55
|
+
parser.add_argument(
|
56
|
+
"--api_url",
|
57
|
+
type=str,
|
58
|
+
action=env_default("QDC_API_URL"),
|
59
|
+
required=False,
|
60
|
+
help="The base URL of Quollio External API",
|
61
|
+
)
|
62
|
+
parser.add_argument(
|
63
|
+
"--client_id",
|
64
|
+
type=str,
|
65
|
+
action=env_default("QDC_CLIENT_ID"),
|
66
|
+
required=False,
|
67
|
+
help="The client id that is created on Quollio console to let clients access Quollio External API",
|
68
|
+
)
|
69
|
+
parser.add_argument(
|
70
|
+
"--client_secret",
|
71
|
+
type=str,
|
72
|
+
action=env_default("QDC_CLIENT_SECRET"),
|
73
|
+
required=False,
|
74
|
+
help="The client secret that is created on Quollio console to let clients access Quollio External API",
|
75
|
+
)
|
76
|
+
parser.add_argument(
|
77
|
+
"--project_id",
|
78
|
+
type=str,
|
79
|
+
action=env_default("GCP_PROJECT_ID"),
|
80
|
+
required=False,
|
81
|
+
help="GCP Project ID",
|
82
|
+
)
|
83
|
+
parser.add_argument(
|
84
|
+
"--regions",
|
85
|
+
type=str,
|
86
|
+
action=env_default("GCP_REGIONS"),
|
87
|
+
required=False,
|
88
|
+
help="GCP regions where the data is located. Multiple regions can be provided separated by space.",
|
89
|
+
nargs="+",
|
90
|
+
)
|
91
|
+
|
92
|
+
args = parser.parse_args()
|
93
|
+
|
94
|
+
if len(args.commands) == 0:
|
95
|
+
raise ValueError("No command is provided")
|
96
|
+
|
97
|
+
if "load_lineage" in args.commands:
|
98
|
+
|
99
|
+
qdc_client = qdc.QDCExternalAPIClient(
|
100
|
+
base_url=args.api_url, client_id=args.client_id, client_secret=args.client_secret
|
101
|
+
)
|
102
|
+
|
103
|
+
credentials_json = json.loads(args.credentials)
|
104
|
+
credentials = get_credentials(credentials_json=credentials_json)
|
105
|
+
org_id = get_org_id(credentials_json=credentials_json)
|
106
|
+
|
107
|
+
load_lineage(
|
108
|
+
qdc_client=qdc_client,
|
109
|
+
project_id=args.project_id,
|
110
|
+
regions=args.regions,
|
111
|
+
tenant_id=args.tenant_id,
|
112
|
+
credentials=credentials,
|
113
|
+
org_id=org_id,
|
114
|
+
)
|
quollio_core/bricks.py
CHANGED
@@ -17,7 +17,7 @@ logger = logging.getLogger(__name__)
|
|
17
17
|
|
18
18
|
def build_view(
|
19
19
|
conn: db.DatabricksConnectionConfig,
|
20
|
-
target_tables: str,
|
20
|
+
target_tables: str = "",
|
21
21
|
log_level: str = "info",
|
22
22
|
) -> None:
|
23
23
|
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s - %(message)s")
|
@@ -41,7 +41,13 @@ def build_view(
|
|
41
41
|
options=["--no-use-colors", "--log-level", log_level],
|
42
42
|
)
|
43
43
|
|
44
|
-
run_options = ["--no-use-colors", "--log-level", log_level
|
44
|
+
run_options = ["--no-use-colors", "--log-level", log_level]
|
45
|
+
|
46
|
+
if target_tables is not None:
|
47
|
+
target_tables_str = " ".join(target_tables)
|
48
|
+
run_options.append("--select")
|
49
|
+
run_options.append(target_tables_str)
|
50
|
+
|
45
51
|
dbt_client.invoke(
|
46
52
|
cmd="run",
|
47
53
|
project_dir=project_path,
|
@@ -106,7 +112,6 @@ if __name__ == "__main__":
|
|
106
112
|
'build_view': Build views using dbt,
|
107
113
|
'load_lineage': Load lineage data from created views to Quollio,
|
108
114
|
'load_stats': Load stats from created views to Quollio,
|
109
|
-
'load_sqllineage': Load lineage data from sql parse result(alpha),
|
110
115
|
""",
|
111
116
|
)
|
112
117
|
parser.add_argument(
|
@@ -193,8 +198,8 @@ if __name__ == "__main__":
|
|
193
198
|
parser.add_argument(
|
194
199
|
"--target_tables",
|
195
200
|
type=str,
|
196
|
-
nargs="
|
197
|
-
choices=["quollio_lineage_table_level", "
|
201
|
+
nargs="+",
|
202
|
+
choices=["quollio_lineage_table_level", "quollio_lineage_column_level"],
|
198
203
|
action=env_default("DATABRICKS_TARGET_TABLES"),
|
199
204
|
required=False,
|
200
205
|
help="Target tables you want to create with dbt module. \
|
@@ -203,6 +208,16 @@ if __name__ == "__main__":
|
|
203
208
|
if you want to create two or more tables",
|
204
209
|
)
|
205
210
|
|
211
|
+
parser.add_argument(
|
212
|
+
"--monitoring_table_suffix",
|
213
|
+
type=str,
|
214
|
+
action=env_default("DATABRICKS_MONITORING_TABLE_SUFFIX"),
|
215
|
+
required=False,
|
216
|
+
help="Sets the monitoring tables suffix for databricks. \
|
217
|
+
This is used to identify the monitoring tables created by the databricks monitoring tool. \
|
218
|
+
Default value is _profile_metrics",
|
219
|
+
)
|
220
|
+
|
206
221
|
args = parser.parse_args()
|
207
222
|
|
208
223
|
conn = db.DatabricksConnectionConfig(
|
@@ -234,4 +249,9 @@ if __name__ == "__main__":
|
|
234
249
|
qdc_client = qdc.QDCExternalAPIClient(
|
235
250
|
base_url=args.api_url, client_id=args.client_id, client_secret=args.client_secret
|
236
251
|
)
|
237
|
-
databricks_column_stats(
|
252
|
+
databricks_column_stats(
|
253
|
+
conn=conn,
|
254
|
+
qdc_client=qdc_client,
|
255
|
+
tenant_id=args.tenant_id,
|
256
|
+
monitoring_table_suffix=args.monitoring_table_suffix,
|
257
|
+
)
|
@@ -24,6 +24,7 @@ SELECT * FROM {{ ref('quollio_stats_profiling_columns') }} WHERE NOT startswit
|
|
24
24
|
{%- set build_sql %}
|
25
25
|
{%- for record in records[i: i+chunk] -%}
|
26
26
|
{%- if not loop.first %}UNION{% endif %}
|
27
|
+
|
27
28
|
SELECT
|
28
29
|
DISTINCT
|
29
30
|
'{{record[0]}}' as db_name
|
@@ -38,7 +39,7 @@ SELECT * FROM {{ ref('quollio_stats_profiling_columns') }} WHERE NOT startswit
|
|
38
39
|
, {% if record[5] == true %}median("{{record[3]}}"){% else %}null{% endif %} AS median_value
|
39
40
|
, {% if record[5] == true %}approx_top_k("{{record[3]}}")[0][0]{% else %}null{% endif %} AS mode_value
|
40
41
|
, {% if record[5] == true %}stddev("{{record[3]}}"){% else %}null{% endif %} AS stddev_value
|
41
|
-
FROM {{
|
42
|
+
FROM "{{record[0]}}"."{{record[1]}}"."{{record[2]}}" {{ var("sample_method") }}
|
42
43
|
{% endfor -%}
|
43
44
|
{%- endset %}
|
44
45
|
-- create a view with a index as suffix
|
@@ -36,24 +36,61 @@ WITH columns AS (
|
|
36
36
|
table_catalog
|
37
37
|
, table_schema
|
38
38
|
, name
|
39
|
+
), m_view_sys_columns AS (
|
40
|
+
SELECT
|
41
|
+
cols.table_catalog
|
42
|
+
, cols.table_schema
|
43
|
+
, cols.table_name
|
44
|
+
, cols.column_name
|
45
|
+
, cols.data_type
|
46
|
+
FROM
|
47
|
+
{{ source('account_usage', 'COLUMNS') }} cols
|
48
|
+
LEFT OUTER JOIN
|
49
|
+
{{ source('account_usage', 'TABLES') }} tbls
|
50
|
+
ON
|
51
|
+
cols.table_catalog = tbls.table_catalog
|
52
|
+
AND cols.table_schema = tbls.table_schema
|
53
|
+
AND cols.table_name = tbls.table_name
|
54
|
+
WHERE
|
55
|
+
tbls.table_type = 'MATERIALIZED VIEW'
|
56
|
+
AND cols.column_name = 'SYS_MV_SOURCE_PARTITION'
|
57
|
+
), implicit_columns_removed AS (
|
58
|
+
SELECT
|
59
|
+
c.table_catalog
|
60
|
+
, c.table_schema
|
61
|
+
, c.table_name
|
62
|
+
, c.column_name
|
63
|
+
, c.data_type
|
64
|
+
FROM
|
65
|
+
columns c
|
66
|
+
INNER JOIN
|
67
|
+
accessible_tables a
|
68
|
+
ON
|
69
|
+
c.table_catalog = a.table_catalog
|
70
|
+
AND c.table_schema = a.table_schema
|
71
|
+
AND c.table_name = a.name
|
72
|
+
MINUS
|
73
|
+
SELECT
|
74
|
+
table_catalog
|
75
|
+
, table_schema
|
76
|
+
, table_name
|
77
|
+
, column_name
|
78
|
+
, data_type
|
79
|
+
FROM
|
80
|
+
m_view_sys_columns
|
81
|
+
), final AS (
|
82
|
+
SELECT
|
83
|
+
table_catalog
|
84
|
+
, table_schema
|
85
|
+
, table_name
|
86
|
+
, column_name
|
87
|
+
, data_type
|
88
|
+
, case when data_type in('NUMBER','DECIMAL', 'DEC', 'NUMERIC',
|
89
|
+
'INT', 'INTEGER', 'BIGINT', 'SMALLINT',
|
90
|
+
'TINYINT', 'BYTEINT')
|
91
|
+
THEN true
|
92
|
+
else false END AS is_calculable
|
93
|
+
FROM
|
94
|
+
implicit_columns_removed
|
39
95
|
)
|
40
|
-
|
41
|
-
SELECT
|
42
|
-
c.table_catalog
|
43
|
-
, c.table_schema
|
44
|
-
, c.table_name
|
45
|
-
, c.column_name
|
46
|
-
, c.data_type
|
47
|
-
, case when c.data_type in('NUMBER','DECIMAL', 'DEC', 'NUMERIC',
|
48
|
-
'INT', 'INTEGER', 'BIGINT', 'SMALLINT',
|
49
|
-
'TINYINT', 'BYTEINT')
|
50
|
-
THEN true
|
51
|
-
else false END AS is_calculable
|
52
|
-
FROM
|
53
|
-
columns c
|
54
|
-
INNER JOIN
|
55
|
-
accessible_tables a
|
56
|
-
ON
|
57
|
-
c.table_catalog = a.table_catalog
|
58
|
-
AND c.table_schema = a.table_schema
|
59
|
-
AND c.table_name = a.name
|
96
|
+
select * from final
|
@@ -0,0 +1,81 @@
|
|
1
|
+
import logging
|
2
|
+
from typing import Any, Dict, List
|
3
|
+
|
4
|
+
from quollio_core.profilers.lineage import gen_table_lineage_payload, parse_bigquery_table_lineage
|
5
|
+
from quollio_core.repository import qdc
|
6
|
+
from quollio_core.repository.bigquery import BigQueryClient, GCPLineageClient, get_entitiy_reference, get_search_request
|
7
|
+
|
8
|
+
logger = logging.getLogger(__name__)
|
9
|
+
|
10
|
+
|
11
|
+
def bigquery_table_lineage(
|
12
|
+
qdc_client: qdc.QDCExternalAPIClient,
|
13
|
+
tenant_id: str,
|
14
|
+
project_id: str,
|
15
|
+
regions: list,
|
16
|
+
org_id: str,
|
17
|
+
credentials: Any,
|
18
|
+
):
|
19
|
+
lineage_client = GCPLineageClient(credentials)
|
20
|
+
bq_client = BigQueryClient(credentials)
|
21
|
+
|
22
|
+
datasets = bq_client.list_datasets(project_id)
|
23
|
+
all_tables = generate_table_list(datasets, bq_client)
|
24
|
+
lineage_links = generate_lineage_links(all_tables, lineage_client, project_id, regions)
|
25
|
+
lineage_links = parse_bigquery_table_lineage(lineage_links)
|
26
|
+
|
27
|
+
update_table_lineage_inputs = gen_table_lineage_payload(tenant_id=tenant_id, endpoint=org_id, tables=lineage_links)
|
28
|
+
|
29
|
+
req_count = 0
|
30
|
+
for update_table_lineage_input in update_table_lineage_inputs:
|
31
|
+
logger.info(
|
32
|
+
"Generating table lineage. downstream: %s -> %s-> %s",
|
33
|
+
update_table_lineage_input.downstream_database_name,
|
34
|
+
update_table_lineage_input.downstream_schema_name,
|
35
|
+
update_table_lineage_input.downstream_table_name,
|
36
|
+
)
|
37
|
+
status_code = qdc_client.update_lineage_by_id(
|
38
|
+
global_id=update_table_lineage_input.downstream_global_id,
|
39
|
+
payload=update_table_lineage_input.upstreams.as_dict(),
|
40
|
+
)
|
41
|
+
if status_code == 200:
|
42
|
+
req_count += 1
|
43
|
+
logger.info("Generating table lineage is finished. %s lineages are ingested.", req_count)
|
44
|
+
|
45
|
+
|
46
|
+
def generate_table_list(datasets: List[str], bq_client: BigQueryClient) -> List[str]:
|
47
|
+
all_tables = []
|
48
|
+
for dataset in datasets:
|
49
|
+
all_tables.extend(
|
50
|
+
[
|
51
|
+
table
|
52
|
+
for table in bq_client.list_tables(dataset.dataset_id)
|
53
|
+
if table.table_type in ["TABLE", "VIEW", "MATERIALIZED_VIEW"]
|
54
|
+
]
|
55
|
+
)
|
56
|
+
|
57
|
+
all_table_names = []
|
58
|
+
for table in all_tables:
|
59
|
+
all_table_names.append(f"{table.project}.{table.dataset_id}.{table.table_id}")
|
60
|
+
|
61
|
+
return all_table_names
|
62
|
+
|
63
|
+
|
64
|
+
def generate_lineage_links(
|
65
|
+
all_tables: List[str], lineage_client: GCPLineageClient, project_id: str, regions: List[str]
|
66
|
+
) -> Dict[str, List[str]]:
|
67
|
+
lineage_links = {}
|
68
|
+
for table in all_tables:
|
69
|
+
downstream = get_entitiy_reference()
|
70
|
+
downstream.fully_qualified_name = f"bigquery:{table}"
|
71
|
+
|
72
|
+
for region in regions:
|
73
|
+
request = get_search_request(downstream_table=downstream, project_id=project_id, region=region)
|
74
|
+
response = lineage_client.get_links(request=request)
|
75
|
+
for lineage in response:
|
76
|
+
target_table = str(lineage.target.fully_qualified_name).replace("bigquery:", "")
|
77
|
+
if target_table not in lineage_links:
|
78
|
+
lineage_links[target_table] = []
|
79
|
+
lineage_links[target_table].append(str(lineage.source.fully_qualified_name).replace("bigquery:", ""))
|
80
|
+
|
81
|
+
return lineage_links
|
@@ -99,7 +99,7 @@ def databricks_column_level_lineage(
|
|
99
99
|
|
100
100
|
|
101
101
|
def _get_monitoring_tables(
|
102
|
-
conn: databricks.DatabricksConnectionConfig,
|
102
|
+
conn: databricks.DatabricksConnectionConfig, monitoring_table_suffix: str = "_profile_metrics"
|
103
103
|
) -> List[Dict[str, str]]:
|
104
104
|
tables = []
|
105
105
|
query = f"""
|
@@ -110,7 +110,7 @@ def _get_monitoring_tables(
|
|
110
110
|
CONCAT(table_catalog, '.', table_schema, '.', table_name) AS table_fqdn
|
111
111
|
FROM
|
112
112
|
system.information_schema.tables
|
113
|
-
WHERE table_name LIKE "%{
|
113
|
+
WHERE table_name LIKE "%{monitoring_table_suffix}"
|
114
114
|
"""
|
115
115
|
with databricks.DatabricksQueryExecutor(config=conn) as databricks_executor:
|
116
116
|
tables = databricks_executor.get_query_results(query)
|
@@ -123,9 +123,9 @@ def _get_monitoring_tables(
|
|
123
123
|
|
124
124
|
|
125
125
|
def _get_column_stats(
|
126
|
-
conn: databricks.DatabricksConnectionConfig,
|
126
|
+
conn: databricks.DatabricksConnectionConfig, monitoring_table_suffix: str = "_profile_metrics"
|
127
127
|
) -> List[Dict[str, str]]:
|
128
|
-
tables = _get_monitoring_tables(conn,
|
128
|
+
tables = _get_monitoring_tables(conn, monitoring_table_suffix)
|
129
129
|
if not tables:
|
130
130
|
return []
|
131
131
|
stats = []
|
@@ -178,9 +178,9 @@ def databricks_column_stats(
|
|
178
178
|
conn: databricks.DatabricksConnectionConfig,
|
179
179
|
qdc_client: qdc.QDCExternalAPIClient,
|
180
180
|
tenant_id: str,
|
181
|
-
|
181
|
+
monitoring_table_suffix: str = "_profile_metrics",
|
182
182
|
) -> None:
|
183
|
-
table_stats = _get_column_stats(conn,
|
183
|
+
table_stats = _get_column_stats(conn, monitoring_table_suffix)
|
184
184
|
for table in table_stats:
|
185
185
|
stats = gen_table_stats_payload(tenant_id, conn.host, table)
|
186
186
|
for stat in stats:
|
@@ -153,3 +153,17 @@ def parse_databricks_table_lineage(results: List) -> List[Dict[str, Dict]]:
|
|
153
153
|
payload["UPSTREAM_TABLES"] = json.loads(result["UPSTREAM_TABLES"])
|
154
154
|
payloads.append(payload)
|
155
155
|
return payloads
|
156
|
+
|
157
|
+
|
158
|
+
def parse_bigquery_table_lineage(tables: Dict) -> List[Dict[str, Dict]]:
|
159
|
+
payloads = list()
|
160
|
+
for downstream, upstream in tables.items():
|
161
|
+
payload = {
|
162
|
+
"DOWNSTREAM_TABLE_NAME": "",
|
163
|
+
"UPSTREAM_TABLES": [],
|
164
|
+
}
|
165
|
+
payload["DOWNSTREAM_TABLE_NAME"] = downstream
|
166
|
+
for upstream_table in upstream:
|
167
|
+
payload["UPSTREAM_TABLES"].append({"upstream_object_name": upstream_table})
|
168
|
+
payloads.append(payload)
|
169
|
+
return payloads
|
@@ -0,0 +1,61 @@
|
|
1
|
+
import logging
|
2
|
+
|
3
|
+
from google.cloud.bigquery import Client
|
4
|
+
from google.cloud.datacatalog_lineage_v1 import EntityReference, LineageClient, SearchLinksRequest
|
5
|
+
from google.oauth2.service_account import Credentials
|
6
|
+
from googleapiclient.discovery import build
|
7
|
+
|
8
|
+
logger = logging.getLogger(__name__)
|
9
|
+
|
10
|
+
|
11
|
+
class BigQueryClient:
|
12
|
+
def __init__(self, credentials: Credentials) -> None:
|
13
|
+
self.client = self.__initialze(credentials=credentials)
|
14
|
+
|
15
|
+
def __initialze(self, credentials: Credentials) -> Client:
|
16
|
+
client = Client(credentials=credentials)
|
17
|
+
return client
|
18
|
+
|
19
|
+
def list_datasets(self, project_id) -> list:
|
20
|
+
datasets = list(self.client.list_datasets(project_id))
|
21
|
+
logger.debug("Found %s datasets in project %s", len(datasets), project_id)
|
22
|
+
return datasets
|
23
|
+
|
24
|
+
def list_tables(self, dataset_id) -> list:
|
25
|
+
tables = list(self.client.list_tables(dataset_id))
|
26
|
+
logger.debug("Found %s tables in dataset %s", len(tables), dataset_id)
|
27
|
+
return list(self.client.list_tables(dataset_id))
|
28
|
+
|
29
|
+
|
30
|
+
class GCPLineageClient:
|
31
|
+
def __init__(self, credentials: Credentials) -> None:
|
32
|
+
self.client = self.__initialze(credentials=credentials)
|
33
|
+
|
34
|
+
def __initialze(self, credentials: Credentials) -> LineageClient:
|
35
|
+
client = LineageClient(credentials=credentials)
|
36
|
+
return client
|
37
|
+
|
38
|
+
def get_links(self, request: SearchLinksRequest) -> list:
|
39
|
+
response = self.client.search_links(request)
|
40
|
+
return response.links
|
41
|
+
|
42
|
+
|
43
|
+
def get_entitiy_reference() -> EntityReference:
|
44
|
+
return EntityReference()
|
45
|
+
|
46
|
+
|
47
|
+
def get_search_request(downstream_table: EntityReference, project_id: str, region: str) -> SearchLinksRequest:
|
48
|
+
return SearchLinksRequest(target=downstream_table, parent=f"projects/{project_id}/locations/{region.lower()}")
|
49
|
+
|
50
|
+
|
51
|
+
def get_credentials(credentials_json: dict) -> Credentials:
|
52
|
+
return Credentials.from_service_account_info(credentials_json)
|
53
|
+
|
54
|
+
|
55
|
+
def get_org_id(credentials_json: dict) -> str:
|
56
|
+
credentials = get_credentials(credentials_json)
|
57
|
+
crm_service = build("cloudresourcemanager", "v1", credentials=credentials)
|
58
|
+
project_id = credentials_json["project_id"]
|
59
|
+
project = crm_service.projects().get(projectId=project_id).execute()
|
60
|
+
org_id = project["parent"]["id"]
|
61
|
+
return org_id
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: quollio-core
|
3
|
-
Version: 0.4.
|
3
|
+
Version: 0.4.7
|
4
4
|
Summary: Quollio Core
|
5
5
|
Author-email: quollio-dev <qt.dev@quollio.com>
|
6
6
|
Maintainer-email: RyoAriyama <ryo.arym@gmail.com>, tharuta <35373297+TakumiHaruta@users.noreply.github.com>
|
@@ -31,6 +31,11 @@ Requires-Dist: snowflake-connector-python==3.5.0
|
|
31
31
|
Requires-Dist: databricks-sdk==0.17.0
|
32
32
|
Requires-Dist: databricks-sql-connector==2.9.5
|
33
33
|
Requires-Dist: sqlglot==20.8.0
|
34
|
+
Requires-Dist: google-cloud==0.34.0
|
35
|
+
Requires-Dist: google-cloud-bigquery==3.22.0
|
36
|
+
Requires-Dist: google-cloud-datacatalog==3.19.0
|
37
|
+
Requires-Dist: google-cloud-datacatalog-lineage==0.3.6
|
38
|
+
Requires-Dist: google-api-python-client==2.131.0
|
34
39
|
Requires-Dist: black>=22.3.0 ; extra == "test"
|
35
40
|
Requires-Dist: coverage>=7.3.2 ; extra == "test"
|
36
41
|
Requires-Dist: isort>=5.10.1 ; extra == "test"
|
@@ -1,5 +1,6 @@
|
|
1
|
-
quollio_core/__init__.py,sha256=
|
2
|
-
quollio_core/
|
1
|
+
quollio_core/__init__.py,sha256=NxY4NXjq-cDnk10OtHZG-Gx7f8ZIkLwMASW4OojpfSQ,83
|
2
|
+
quollio_core/bigquery.py,sha256=2DrUMo4evcH4BHiUtnY48IjmsdAsQMoPGtNx8SRoyzQ,3528
|
3
|
+
quollio_core/bricks.py,sha256=3nxX5hul03Ac3r5BeoeHyK2XaSQq-tBsB9kMeiQ-A7k,8591
|
3
4
|
quollio_core/redshift.py,sha256=wap7QmV-YuHZAomIrHXytGUuxhQ5MFEb38QDY3XrThQ,10167
|
4
5
|
quollio_core/snowflake.py,sha256=8IMbdTjCDBIiS_GF8APWRTVWNj6EM3ZT8MRN12T-1v0,10266
|
5
6
|
quollio_core/dbt_projects/databricks/.gitignore,sha256=1jJAyXSzJ3YUm0nx3i7wUSE4RjQMX3ad6F8O88UbtzI,29
|
@@ -45,7 +46,7 @@ quollio_core/dbt_projects/snowflake/package-lock.yml,sha256=Gef3zDCLF41j_FL-_h3s
|
|
45
46
|
quollio_core/dbt_projects/snowflake/packages.yml,sha256=p9Bl2C44gdC6iYTUkz_15yq3xahSJf2IA3WOXLF_ahA,61
|
46
47
|
quollio_core/dbt_projects/snowflake/analyses/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
47
48
|
quollio_core/dbt_projects/snowflake/macros/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
48
|
-
quollio_core/dbt_projects/snowflake/macros/materialization/divided_view.sql,sha256=
|
49
|
+
quollio_core/dbt_projects/snowflake/macros/materialization/divided_view.sql,sha256=340zvlJLDc_rE3W0GOuronKa42gqIElsHy41midkqyM,2783
|
49
50
|
quollio_core/dbt_projects/snowflake/models/quollio_lineage_column_level.sql,sha256=Cxt2U2aXNG_LUm63jwTyxUkapkrB7_uHmesx1PTcMJM,4721
|
50
51
|
quollio_core/dbt_projects/snowflake/models/quollio_lineage_column_level.yml,sha256=a2uNIAh-xw51eu-GmHVuAnGnTbwK7h8-DjDeQtK3KaQ,711
|
51
52
|
quollio_core/dbt_projects/snowflake/models/quollio_lineage_table_level.sql,sha256=Q_7vY1N1Hi1LFv5CxkkdR3gQw8fTDnoKECTLSK4gd3o,5112
|
@@ -54,7 +55,7 @@ quollio_core/dbt_projects/snowflake/models/quollio_sqllineage_sources.sql,sha256
|
|
54
55
|
quollio_core/dbt_projects/snowflake/models/quollio_sqllineage_sources.yml,sha256=qgazupx3ca4P8R0loY5F9hyCz2fmAcWqZ6iOySo_NoY,377
|
55
56
|
quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.sql,sha256=lH8xPmAzSW-6wi_g1y_LFVhtFgHzBvTweVX-MKeJzUQ,302
|
56
57
|
quollio_core/dbt_projects/snowflake/models/quollio_stats_columns.yml,sha256=V_BESPk6IqE52ExT26-78As9l9AlWW86-Geb5PIhThU,67
|
57
|
-
quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.sql,sha256=
|
58
|
+
quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.sql,sha256=ex5ax-KJoM_P1QspkolOUOQg9BazTdZO1Jllp08PQo8,2265
|
58
59
|
quollio_core/dbt_projects/snowflake/models/quollio_stats_profiling_columns.yml,sha256=W39VAmFnnX6RBoW7B_4CConC1lm0Jm9o50Jsz9bYZzY,538
|
59
60
|
quollio_core/dbt_projects/snowflake/models/sources.yml,sha256=vGSV33cNj4UUyPUcYS-JFgc3r8KvSLfiA7qhbDCUU9s,10975
|
60
61
|
quollio_core/dbt_projects/snowflake/profiles/profiles_template.yml,sha256=gcZsgdGP461QuUM9jLbBKdadT8cHTXgNarq_azOOMhk,379
|
@@ -64,19 +65,21 @@ quollio_core/helper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
|
|
64
65
|
quollio_core/helper/core.py,sha256=-3vCDlKExWPHJmWuZQNpYnvPP55uoGwRpTtnFvsDxIo,1127
|
65
66
|
quollio_core/helper/env_default.py,sha256=YIL9hfrPs1ViL1AXohnbWEjVBUDXbVVakH0ZoSZWOlc,1202
|
66
67
|
quollio_core/profilers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
67
|
-
quollio_core/profilers/
|
68
|
-
quollio_core/profilers/
|
68
|
+
quollio_core/profilers/bigquery.py,sha256=e1Y8cZR-LxI9mSsYb0DurQyy0eCjM_kAKLfvl4IuQLE,3262
|
69
|
+
quollio_core/profilers/databricks.py,sha256=SAahPj1mbEuI3UI42J9ermrAyyZcl-a6ax0lqo71JDQ,7281
|
70
|
+
quollio_core/profilers/lineage.py,sha256=4FyxIuPBrUFihqZryqTQBcfB0Z7634lKl_WwkD82vzE,6865
|
69
71
|
quollio_core/profilers/redshift.py,sha256=obdHVIsOM1bwHGdvYKalsJcTXwLK02kAKQMSBzSvsDo,7862
|
70
72
|
quollio_core/profilers/snowflake.py,sha256=C1LC19ZaUMwNoXjsbnez0xANydJYs8oNRt6tixWKDq8,9090
|
71
73
|
quollio_core/profilers/sqllineage.py,sha256=oCyl4tpXL5bkfguXAzTHSB9kZBL3tQK_rfcJ4XQMrLo,5177
|
72
74
|
quollio_core/profilers/stats.py,sha256=PG1NbbUSpc1JuEYvBzD66rd24tp0C13_Y5Y7vRjYG1c,4720
|
73
75
|
quollio_core/repository/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
76
|
+
quollio_core/repository/bigquery.py,sha256=KMJTeF4OUxtaJt0ymoJ4tkrMKq8yLyMYaMxNvU5yd_Y,2271
|
74
77
|
quollio_core/repository/databricks.py,sha256=m68tja5N-QxH3VqEq-mOJKBeR2qldSgj_L9iIxvWwm0,1945
|
75
78
|
quollio_core/repository/dbt.py,sha256=HXqW_xa4xYPh9CnKkg4L1gwG3SGjj2BAYoWgzWMFU4U,770
|
76
79
|
quollio_core/repository/qdc.py,sha256=VCmzAUvjLemw1os5TaPtfBFkMCOMuPeftjZmUPhFj2Y,4702
|
77
80
|
quollio_core/repository/redshift.py,sha256=UVHIpYzDQ2AbBTAGa8DgmEenG0NZsHfYroR1MmEPQGA,2991
|
78
81
|
quollio_core/repository/snowflake.py,sha256=1YVMDfb9euJKvikv1pk_IxVF6SVsiemSvZ-WMTSbY7E,1874
|
79
|
-
quollio_core-0.4.
|
80
|
-
quollio_core-0.4.
|
81
|
-
quollio_core-0.4.
|
82
|
-
quollio_core-0.4.
|
82
|
+
quollio_core-0.4.7.dist-info/LICENSE,sha256=V8j_M8nAz8PvAOZQocyRDX7keai8UJ9skgmnwqETmdY,34520
|
83
|
+
quollio_core-0.4.7.dist-info/WHEEL,sha256=EZbGkh7Ie4PoZfRQ8I0ZuP9VklN_TvcZ6DSE5Uar4z4,81
|
84
|
+
quollio_core-0.4.7.dist-info/METADATA,sha256=GeVZJsz_V5scOtet3glilzvJsK30V4GH3T0bz5MBwaE,6804
|
85
|
+
quollio_core-0.4.7.dist-info/RECORD,,
|
File without changes
|
File without changes
|