acryl-datahub 0.15.0.1rc4__py3-none-any.whl → 0.15.0.1rc6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.1rc4.dist-info → acryl_datahub-0.15.0.1rc6.dist-info}/METADATA +2384 -2384
- {acryl_datahub-0.15.0.1rc4.dist-info → acryl_datahub-0.15.0.1rc6.dist-info}/RECORD +11 -11
- datahub/__init__.py +1 -1
- datahub/ingestion/graph/client.py +6 -3
- datahub/ingestion/source/datahub/config.py +12 -1
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -3
- datahub/specific/dataproduct.py +2 -2
- datahub/sql_parsing/sqlglot_lineage.py +15 -5
- {acryl_datahub-0.15.0.1rc4.dist-info → acryl_datahub-0.15.0.1rc6.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.1rc4.dist-info → acryl_datahub-0.15.0.1rc6.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.1rc4.dist-info → acryl_datahub-0.15.0.1rc6.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
datahub/__init__.py,sha256=
|
|
1
|
+
datahub/__init__.py,sha256=lThsP3KmoFgrvSScMvSH0uLu7H2JJshAHDrzuIjHRjs,576
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
3
|
datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
|
|
4
4
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -164,7 +164,7 @@ datahub/ingestion/glossary/classifier.py,sha256=daLxnVv_JlfB_jBOxH5LrU_xQRndrsGo
|
|
|
164
164
|
datahub/ingestion/glossary/classifier_registry.py,sha256=yFOYLQhDgCLqXYMG3L1BquXafeLcZDcmp8meyw6k9ts,307
|
|
165
165
|
datahub/ingestion/glossary/datahub_classifier.py,sha256=8VhwuLDhyOqqOr0jqAPIgorb4eAOnvTr4m13Y2Wy1-E,7515
|
|
166
166
|
datahub/ingestion/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
167
|
-
datahub/ingestion/graph/client.py,sha256=
|
|
167
|
+
datahub/ingestion/graph/client.py,sha256=AYDFwP9a_M-fCZv-PcWMSr5tc53XWJl372SWKwdu37E,64651
|
|
168
168
|
datahub/ingestion/graph/config.py,sha256=3b_Gxa5wcBnphP63bBiAFdWS7PJhUHRE1WZL_q4Cw8k,749
|
|
169
169
|
datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
|
|
170
170
|
datahub/ingestion/graph/filters.py,sha256=UeUZQHoimavIYx-jXLA0WGkOUe10TaO8uEZkfa-QgNE,6188
|
|
@@ -265,7 +265,7 @@ datahub/ingestion/source/data_lake_common/config.py,sha256=qUk83B01hjuBKHvVz8SmX
|
|
|
265
265
|
datahub/ingestion/source/data_lake_common/data_lake_utils.py,sha256=nxu7osuzqxScPFc-1ODA2M1c_xPNPpRH_SMMU7zKOIE,6212
|
|
266
266
|
datahub/ingestion/source/data_lake_common/path_spec.py,sha256=u3u2eMe70V5vur-j8mYtupZdoeA2hSeK262Whdsc2YU,23506
|
|
267
267
|
datahub/ingestion/source/datahub/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
268
|
-
datahub/ingestion/source/datahub/config.py,sha256=
|
|
268
|
+
datahub/ingestion/source/datahub/config.py,sha256=rqZFvEmjxjBcW2cTEPYDVTAk3OLzuGIjEFghXPNeZNY,3955
|
|
269
269
|
datahub/ingestion/source/datahub/datahub_api_reader.py,sha256=hlKADVEPoTFiRGKqRsMF5mL4fSu_IrIW8Nx7LpEzvkM,2134
|
|
270
270
|
datahub/ingestion/source/datahub/datahub_database_reader.py,sha256=F8JrOjSrmJ2B6m1MWh83A1EYFDcGMla749HUeQWMnL0,9464
|
|
271
271
|
datahub/ingestion/source/datahub/datahub_kafka_reader.py,sha256=8x9_u5kRjgSmu7c295ZIZjxP6bgoZZbWsKRicuLStRQ,4145
|
|
@@ -432,7 +432,7 @@ datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81
|
|
|
432
432
|
datahub/ingestion/source/snowflake/snowflake_config.py,sha256=LZqnTELtzRNf0vsKG-xXggXyt13S9RYvHOZEZHRjgNk,18851
|
|
433
433
|
datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=yzv-01FdmfDSCJY5rqKNNodXxzg3SS5DF7oA4WXArOA,17793
|
|
434
434
|
datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
|
|
435
|
-
datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=
|
|
435
|
+
datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=kpZvGbWmLMEUBslGToeA3oBO__8V2FzPvUKJSNw_3VM,21598
|
|
436
436
|
datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
|
|
437
437
|
datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=8QEihOfivalVR9vLo6vCUL-vnZfAGgMio0uhPYX0jTo,25883
|
|
438
438
|
datahub/ingestion/source/snowflake/snowflake_query.py,sha256=yDu_1aTAG7eLEh1w1FGmn2-c6NJZURdslnI6fC_4B_0,38723
|
|
@@ -863,7 +863,7 @@ datahub/specific/chart.py,sha256=DsLA5qHBIMNc1pIZ1AC5kLvwpRDd79Q56N4SANOofps,113
|
|
|
863
863
|
datahub/specific/custom_properties.py,sha256=Ob8L9b9QIbUvHfzWo4L-SNY1QSRhgRy30kLRDdenGEs,1024
|
|
864
864
|
datahub/specific/dashboard.py,sha256=kRfyJsm7piugxBg0IfIbLmvv6Smk3D44IGVw8THLqPE,15100
|
|
865
865
|
datahub/specific/datajob.py,sha256=5pEBrN6llpgS7jWYEfrvqpbT2vMVVpepH71jIUJUo4U,18480
|
|
866
|
-
datahub/specific/dataproduct.py,sha256=
|
|
866
|
+
datahub/specific/dataproduct.py,sha256=lVv3TGkZyZ0t9CUXLnkwMhr8GK1HB-fiyRyjxTdvb7s,5259
|
|
867
867
|
datahub/specific/dataset.py,sha256=TAI8SRhhhsv1zEi3lGv24NX6PTJDrEyt5v0Sdg-uFY8,13568
|
|
868
868
|
datahub/specific/form.py,sha256=jVI0JD-o2-XkD1suW_ITnTZUF0GNbGjaNb9-PXdfdkA,4549
|
|
869
869
|
datahub/specific/ownership.py,sha256=KlYnk7o0Tq2EVugW7qRWR9D3v0C8PuqIdwgUzYwlkDM,1446
|
|
@@ -879,7 +879,7 @@ datahub/sql_parsing/split_statements.py,sha256=uZhAXLaRxDfmK0lPBW2oM_YVdJfSMhdgn
|
|
|
879
879
|
datahub/sql_parsing/sql_parsing_aggregator.py,sha256=LBs1RjRqh3natrx4WfgRQGNpI56o12jtbABO5ipEBWA,69889
|
|
880
880
|
datahub/sql_parsing/sql_parsing_common.py,sha256=h_V_m54hJ9EUh5kczq7cYOIeNeo4bgf0Px0H-Nq-UIg,2602
|
|
881
881
|
datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
|
|
882
|
-
datahub/sql_parsing/sqlglot_lineage.py,sha256=
|
|
882
|
+
datahub/sql_parsing/sqlglot_lineage.py,sha256=gUVq3NwZUzQByJs43JZXz8lZf0ZVzVt0FzaW5wZOwK4,47460
|
|
883
883
|
datahub/sql_parsing/sqlglot_utils.py,sha256=n6yufzEGwSlFeCSU540hEldIuab0q8KGqm9x0vSawkc,14699
|
|
884
884
|
datahub/sql_parsing/tool_meta_extractor.py,sha256=7tY4FAClhFcqwc23lGVlnT6Dequ_5Xcpbt0hDvnlLzM,6670
|
|
885
885
|
datahub/telemetry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -981,8 +981,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
981
981
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
982
982
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
983
983
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
984
|
-
acryl_datahub-0.15.0.
|
|
985
|
-
acryl_datahub-0.15.0.
|
|
986
|
-
acryl_datahub-0.15.0.
|
|
987
|
-
acryl_datahub-0.15.0.
|
|
988
|
-
acryl_datahub-0.15.0.
|
|
984
|
+
acryl_datahub-0.15.0.1rc6.dist-info/METADATA,sha256=NwalxQqxf_XeT9QBcWc4wsNKE11Hv59p6ZZ8HRGVyGc,173642
|
|
985
|
+
acryl_datahub-0.15.0.1rc6.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
986
|
+
acryl_datahub-0.15.0.1rc6.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
|
|
987
|
+
acryl_datahub-0.15.0.1rc6.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
988
|
+
acryl_datahub-0.15.0.1rc6.dist-info/RECORD,,
|
datahub/__init__.py
CHANGED
|
@@ -188,9 +188,12 @@ class DataHubGraph(DatahubRestEmitter):
|
|
|
188
188
|
retry_max_times=emitter._retry_max_times,
|
|
189
189
|
extra_headers=emitter._session.headers,
|
|
190
190
|
disable_ssl_verification=emitter._session.verify is False,
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
191
|
+
ca_certificate_path=(
|
|
192
|
+
emitter._session.verify
|
|
193
|
+
if isinstance(emitter._session.verify, str)
|
|
194
|
+
else None
|
|
195
|
+
),
|
|
196
|
+
client_certificate_path=emitter._session.cert,
|
|
194
197
|
)
|
|
195
198
|
)
|
|
196
199
|
|
|
@@ -14,6 +14,17 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
|
14
14
|
DEFAULT_DATABASE_TABLE_NAME = "metadata_aspect_v2"
|
|
15
15
|
DEFAULT_KAFKA_TOPIC_NAME = "MetadataChangeLog_Timeseries_v1"
|
|
16
16
|
DEFAULT_DATABASE_BATCH_SIZE = 10_000
|
|
17
|
+
DEFAULT_EXCLUDE_ASPECTS = {
|
|
18
|
+
"dataHubIngestionSourceKey",
|
|
19
|
+
"dataHubIngestionSourceInfo",
|
|
20
|
+
"datahubIngestionRunSummary",
|
|
21
|
+
"datahubIngestionCheckpoint",
|
|
22
|
+
"dataHubSecretKey",
|
|
23
|
+
"dataHubSecretValue",
|
|
24
|
+
"globalSettingsKey",
|
|
25
|
+
"globalSettingsInfo",
|
|
26
|
+
"testResults",
|
|
27
|
+
}
|
|
17
28
|
|
|
18
29
|
|
|
19
30
|
class DataHubSourceConfig(StatefulIngestionConfigBase):
|
|
@@ -44,7 +55,7 @@ class DataHubSourceConfig(StatefulIngestionConfigBase):
|
|
|
44
55
|
)
|
|
45
56
|
|
|
46
57
|
exclude_aspects: Set[str] = Field(
|
|
47
|
-
|
|
58
|
+
default=DEFAULT_EXCLUDE_ASPECTS,
|
|
48
59
|
description="Set of aspect names to exclude from ingestion",
|
|
49
60
|
)
|
|
50
61
|
|
|
@@ -4,7 +4,7 @@ from dataclasses import dataclass
|
|
|
4
4
|
from datetime import datetime
|
|
5
5
|
from typing import Any, Collection, Iterable, List, Optional, Set, Tuple, Type
|
|
6
6
|
|
|
7
|
-
from pydantic import BaseModel, validator
|
|
7
|
+
from pydantic import BaseModel, Field, validator
|
|
8
8
|
|
|
9
9
|
from datahub.configuration.datetimes import parse_absolute_time
|
|
10
10
|
from datahub.ingestion.api.closeable import Closeable
|
|
@@ -72,8 +72,8 @@ class ColumnUpstreamJob(BaseModel):
|
|
|
72
72
|
|
|
73
73
|
|
|
74
74
|
class ColumnUpstreamLineage(BaseModel):
|
|
75
|
-
column_name: str
|
|
76
|
-
upstreams: List[ColumnUpstreamJob]
|
|
75
|
+
column_name: Optional[str]
|
|
76
|
+
upstreams: List[ColumnUpstreamJob] = Field(default_factory=list)
|
|
77
77
|
|
|
78
78
|
|
|
79
79
|
class UpstreamTableNode(BaseModel):
|
datahub/specific/dataproduct.py
CHANGED
|
@@ -131,7 +131,7 @@ class DataProductPatchBuilder(MetadataPatchProposal):
|
|
|
131
131
|
self._add_patch(
|
|
132
132
|
DataProductProperties.ASPECT_NAME,
|
|
133
133
|
"add",
|
|
134
|
-
path=f"/assets/{asset_urn}",
|
|
134
|
+
path=f"/assets/{self.quote(asset_urn)}",
|
|
135
135
|
value=DataProductAssociation(destinationUrn=asset_urn),
|
|
136
136
|
)
|
|
137
137
|
return self
|
|
@@ -140,7 +140,7 @@ class DataProductPatchBuilder(MetadataPatchProposal):
|
|
|
140
140
|
self._add_patch(
|
|
141
141
|
DataProductProperties.ASPECT_NAME,
|
|
142
142
|
"remove",
|
|
143
|
-
path=f"/assets/{asset_urn}",
|
|
143
|
+
path=f"/assets/{self.quote(asset_urn)}",
|
|
144
144
|
value={},
|
|
145
145
|
)
|
|
146
146
|
return self
|
|
@@ -66,6 +66,7 @@ SQL_LINEAGE_TIMEOUT_ENABLED = get_boolean_env_variable(
|
|
|
66
66
|
"SQL_LINEAGE_TIMEOUT_ENABLED", True
|
|
67
67
|
)
|
|
68
68
|
SQL_LINEAGE_TIMEOUT_SECONDS = 10
|
|
69
|
+
SQL_PARSER_TRACE = get_boolean_env_variable("DATAHUB_SQL_PARSER_TRACE", False)
|
|
69
70
|
|
|
70
71
|
|
|
71
72
|
# These rules are a subset of the rules in sqlglot.optimizer.optimizer.RULES.
|
|
@@ -365,10 +366,11 @@ def _prepare_query_columns(
|
|
|
365
366
|
|
|
366
367
|
return node
|
|
367
368
|
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
369
|
+
if SQL_PARSER_TRACE:
|
|
370
|
+
logger.debug(
|
|
371
|
+
"Prior to case normalization sql %s",
|
|
372
|
+
statement.sql(pretty=True, dialect=dialect),
|
|
373
|
+
)
|
|
372
374
|
statement = statement.transform(_sqlglot_force_column_normalizer, copy=False)
|
|
373
375
|
# logger.debug(
|
|
374
376
|
# "Sql after casing normalization %s",
|
|
@@ -562,7 +564,7 @@ def _select_statement_cll( # noqa: C901
|
|
|
562
564
|
)
|
|
563
565
|
)
|
|
564
566
|
|
|
565
|
-
# TODO: Also extract referenced columns (aka
|
|
567
|
+
# TODO: Also extract referenced columns (aka auxiliary / non-SELECT lineage)
|
|
566
568
|
except (sqlglot.errors.OptimizeError, ValueError, IndexError) as e:
|
|
567
569
|
raise SqlUnderstandingError(
|
|
568
570
|
f"sqlglot failed to compute some lineage: {e}"
|
|
@@ -1022,6 +1024,14 @@ def _sqlglot_lineage_inner(
|
|
|
1022
1024
|
logger.debug(
|
|
1023
1025
|
f"Resolved {total_schemas_resolved} of {total_tables_discovered} table schemas"
|
|
1024
1026
|
)
|
|
1027
|
+
if SQL_PARSER_TRACE:
|
|
1028
|
+
for qualified_table, schema_info in table_name_schema_mapping.items():
|
|
1029
|
+
logger.debug(
|
|
1030
|
+
"Table name %s resolved to %s with schema %s",
|
|
1031
|
+
qualified_table,
|
|
1032
|
+
table_name_urn_mapping[qualified_table],
|
|
1033
|
+
schema_info,
|
|
1034
|
+
)
|
|
1025
1035
|
|
|
1026
1036
|
column_lineage: Optional[List[_ColumnLineageInfo]] = None
|
|
1027
1037
|
try:
|
|
File without changes
|
{acryl_datahub-0.15.0.1rc4.dist-info → acryl_datahub-0.15.0.1rc6.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|