acryl-datahub 0.15.0.1rc5__py3-none-any.whl → 0.15.0.1rc7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.1rc5.dist-info → acryl_datahub-0.15.0.1rc7.dist-info}/METADATA +2456 -2456
- {acryl_datahub-0.15.0.1rc5.dist-info → acryl_datahub-0.15.0.1rc7.dist-info}/RECORD +30 -29
- datahub/__init__.py +1 -1
- datahub/ingestion/graph/client.py +6 -3
- datahub/ingestion/source/looker/looker_common.py +9 -0
- datahub/ingestion/source/looker/looker_source.py +19 -3
- datahub/ingestion/source/looker/looker_usage.py +23 -17
- datahub/ingestion/source/mode.py +14 -7
- datahub/ingestion/source/snowflake/snowflake_config.py +3 -25
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +3 -10
- datahub/ingestion/source/snowflake/snowflake_query.py +0 -9
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +1 -5
- datahub/ingestion/source/snowflake/snowflake_shares.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_v2.py +14 -6
- datahub/ingestion/source/tableau/tableau.py +51 -20
- datahub/ingestion/source_report/ingestion_stage.py +1 -0
- datahub/metadata/_schema_classes.py +195 -2
- datahub/metadata/com/linkedin/pegasus2avro/ml/metadata/__init__.py +2 -0
- datahub/metadata/schema.avsc +188 -4
- datahub/metadata/schemas/DataProcessInstanceKey.avsc +5 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/MLModelGroupProperties.avsc +82 -0
- datahub/metadata/schemas/MLModelProperties.avsc +62 -2
- datahub/metadata/schemas/MLTrainingRunProperties.avsc +171 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +94 -2
- datahub/specific/dataproduct.py +2 -2
- datahub/sql_parsing/sqlglot_lineage.py +15 -5
- {acryl_datahub-0.15.0.1rc5.dist-info → acryl_datahub-0.15.0.1rc7.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.1rc5.dist-info → acryl_datahub-0.15.0.1rc7.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0.1rc5.dist-info → acryl_datahub-0.15.0.1rc7.dist-info}/top_level.txt +0 -0
|
@@ -5109,6 +5109,21 @@
|
|
|
5109
5109
|
"default": null,
|
|
5110
5110
|
"doc": "URL where the reference exist"
|
|
5111
5111
|
},
|
|
5112
|
+
{
|
|
5113
|
+
"Searchable": {
|
|
5114
|
+
"boostScore": 10.0,
|
|
5115
|
+
"enableAutocomplete": true,
|
|
5116
|
+
"fieldType": "WORD_GRAM",
|
|
5117
|
+
"queryByDefault": true
|
|
5118
|
+
},
|
|
5119
|
+
"type": [
|
|
5120
|
+
"null",
|
|
5121
|
+
"string"
|
|
5122
|
+
],
|
|
5123
|
+
"name": "name",
|
|
5124
|
+
"default": null,
|
|
5125
|
+
"doc": "Display name of the MLModel"
|
|
5126
|
+
},
|
|
5112
5127
|
{
|
|
5113
5128
|
"Searchable": {
|
|
5114
5129
|
"fieldType": "TEXT",
|
|
@@ -5123,6 +5138,7 @@
|
|
|
5123
5138
|
"doc": "Documentation of the MLModel"
|
|
5124
5139
|
},
|
|
5125
5140
|
{
|
|
5141
|
+
"deprecated": true,
|
|
5126
5142
|
"type": [
|
|
5127
5143
|
"null",
|
|
5128
5144
|
"long"
|
|
@@ -5131,6 +5147,24 @@
|
|
|
5131
5147
|
"default": null,
|
|
5132
5148
|
"doc": "Date when the MLModel was developed"
|
|
5133
5149
|
},
|
|
5150
|
+
{
|
|
5151
|
+
"type": [
|
|
5152
|
+
"null",
|
|
5153
|
+
"com.linkedin.pegasus2avro.common.TimeStamp"
|
|
5154
|
+
],
|
|
5155
|
+
"name": "created",
|
|
5156
|
+
"default": null,
|
|
5157
|
+
"doc": "Audit stamp containing who created this and when"
|
|
5158
|
+
},
|
|
5159
|
+
{
|
|
5160
|
+
"type": [
|
|
5161
|
+
"null",
|
|
5162
|
+
"com.linkedin.pegasus2avro.common.TimeStamp"
|
|
5163
|
+
],
|
|
5164
|
+
"name": "lastModified",
|
|
5165
|
+
"default": null,
|
|
5166
|
+
"doc": "Date when the MLModel was last modified"
|
|
5167
|
+
},
|
|
5134
5168
|
{
|
|
5135
5169
|
"type": [
|
|
5136
5170
|
"null",
|
|
@@ -5363,7 +5397,8 @@
|
|
|
5363
5397
|
"Relationship": {
|
|
5364
5398
|
"/*": {
|
|
5365
5399
|
"entityTypes": [
|
|
5366
|
-
"dataJob"
|
|
5400
|
+
"dataJob",
|
|
5401
|
+
"dataProcessInstance"
|
|
5367
5402
|
],
|
|
5368
5403
|
"isLineage": true,
|
|
5369
5404
|
"name": "TrainedBy"
|
|
@@ -5378,7 +5413,7 @@
|
|
|
5378
5413
|
],
|
|
5379
5414
|
"name": "trainingJobs",
|
|
5380
5415
|
"default": null,
|
|
5381
|
-
"doc": "List of jobs (if any) used to train the model",
|
|
5416
|
+
"doc": "List of jobs (if any) used to train the model. Visible in Lineage. Note that ML Models can also be specified as the output of a specific Data Process Instances (runs) via the DataProcessInstanceOutputs aspect.",
|
|
5382
5417
|
"Urn": "Urn",
|
|
5383
5418
|
"urn_is_array": true
|
|
5384
5419
|
},
|
|
@@ -6804,6 +6839,21 @@
|
|
|
6804
6839
|
"default": {},
|
|
6805
6840
|
"doc": "Custom property bag."
|
|
6806
6841
|
},
|
|
6842
|
+
{
|
|
6843
|
+
"Searchable": {
|
|
6844
|
+
"boostScore": 10.0,
|
|
6845
|
+
"enableAutocomplete": true,
|
|
6846
|
+
"fieldType": "WORD_GRAM",
|
|
6847
|
+
"queryByDefault": true
|
|
6848
|
+
},
|
|
6849
|
+
"type": [
|
|
6850
|
+
"null",
|
|
6851
|
+
"string"
|
|
6852
|
+
],
|
|
6853
|
+
"name": "name",
|
|
6854
|
+
"default": null,
|
|
6855
|
+
"doc": "Display name of the MLModelGroup"
|
|
6856
|
+
},
|
|
6807
6857
|
{
|
|
6808
6858
|
"Searchable": {
|
|
6809
6859
|
"fieldType": "TEXT",
|
|
@@ -6818,6 +6868,7 @@
|
|
|
6818
6868
|
"doc": "Documentation of the MLModelGroup"
|
|
6819
6869
|
},
|
|
6820
6870
|
{
|
|
6871
|
+
"deprecated": true,
|
|
6821
6872
|
"type": [
|
|
6822
6873
|
"null",
|
|
6823
6874
|
"long"
|
|
@@ -6826,6 +6877,47 @@
|
|
|
6826
6877
|
"default": null,
|
|
6827
6878
|
"doc": "Date when the MLModelGroup was developed"
|
|
6828
6879
|
},
|
|
6880
|
+
{
|
|
6881
|
+
"type": [
|
|
6882
|
+
"null",
|
|
6883
|
+
"com.linkedin.pegasus2avro.common.TimeStamp"
|
|
6884
|
+
],
|
|
6885
|
+
"name": "created",
|
|
6886
|
+
"default": null,
|
|
6887
|
+
"doc": "Time and Actor who created the MLModelGroup"
|
|
6888
|
+
},
|
|
6889
|
+
{
|
|
6890
|
+
"type": [
|
|
6891
|
+
"null",
|
|
6892
|
+
"com.linkedin.pegasus2avro.common.TimeStamp"
|
|
6893
|
+
],
|
|
6894
|
+
"name": "lastModified",
|
|
6895
|
+
"default": null,
|
|
6896
|
+
"doc": "Date when the MLModelGroup was last modified"
|
|
6897
|
+
},
|
|
6898
|
+
{
|
|
6899
|
+
"Relationship": {
|
|
6900
|
+
"/*": {
|
|
6901
|
+
"entityTypes": [
|
|
6902
|
+
"dataJob"
|
|
6903
|
+
],
|
|
6904
|
+
"isLineage": true,
|
|
6905
|
+
"name": "TrainedBy"
|
|
6906
|
+
}
|
|
6907
|
+
},
|
|
6908
|
+
"type": [
|
|
6909
|
+
"null",
|
|
6910
|
+
{
|
|
6911
|
+
"type": "array",
|
|
6912
|
+
"items": "string"
|
|
6913
|
+
}
|
|
6914
|
+
],
|
|
6915
|
+
"name": "trainingJobs",
|
|
6916
|
+
"default": null,
|
|
6917
|
+
"doc": "List of jobs (if any) used to train the model group. Visible in Lineage.",
|
|
6918
|
+
"Urn": "Urn",
|
|
6919
|
+
"urn_is_array": true
|
|
6920
|
+
},
|
|
6829
6921
|
{
|
|
6830
6922
|
"type": [
|
|
6831
6923
|
"null",
|
datahub/specific/dataproduct.py
CHANGED
|
@@ -131,7 +131,7 @@ class DataProductPatchBuilder(MetadataPatchProposal):
|
|
|
131
131
|
self._add_patch(
|
|
132
132
|
DataProductProperties.ASPECT_NAME,
|
|
133
133
|
"add",
|
|
134
|
-
path=f"/assets/{asset_urn}",
|
|
134
|
+
path=f"/assets/{self.quote(asset_urn)}",
|
|
135
135
|
value=DataProductAssociation(destinationUrn=asset_urn),
|
|
136
136
|
)
|
|
137
137
|
return self
|
|
@@ -140,7 +140,7 @@ class DataProductPatchBuilder(MetadataPatchProposal):
|
|
|
140
140
|
self._add_patch(
|
|
141
141
|
DataProductProperties.ASPECT_NAME,
|
|
142
142
|
"remove",
|
|
143
|
-
path=f"/assets/{asset_urn}",
|
|
143
|
+
path=f"/assets/{self.quote(asset_urn)}",
|
|
144
144
|
value={},
|
|
145
145
|
)
|
|
146
146
|
return self
|
|
@@ -66,6 +66,7 @@ SQL_LINEAGE_TIMEOUT_ENABLED = get_boolean_env_variable(
|
|
|
66
66
|
"SQL_LINEAGE_TIMEOUT_ENABLED", True
|
|
67
67
|
)
|
|
68
68
|
SQL_LINEAGE_TIMEOUT_SECONDS = 10
|
|
69
|
+
SQL_PARSER_TRACE = get_boolean_env_variable("DATAHUB_SQL_PARSER_TRACE", False)
|
|
69
70
|
|
|
70
71
|
|
|
71
72
|
# These rules are a subset of the rules in sqlglot.optimizer.optimizer.RULES.
|
|
@@ -365,10 +366,11 @@ def _prepare_query_columns(
|
|
|
365
366
|
|
|
366
367
|
return node
|
|
367
368
|
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
369
|
+
if SQL_PARSER_TRACE:
|
|
370
|
+
logger.debug(
|
|
371
|
+
"Prior to case normalization sql %s",
|
|
372
|
+
statement.sql(pretty=True, dialect=dialect),
|
|
373
|
+
)
|
|
372
374
|
statement = statement.transform(_sqlglot_force_column_normalizer, copy=False)
|
|
373
375
|
# logger.debug(
|
|
374
376
|
# "Sql after casing normalization %s",
|
|
@@ -562,7 +564,7 @@ def _select_statement_cll( # noqa: C901
|
|
|
562
564
|
)
|
|
563
565
|
)
|
|
564
566
|
|
|
565
|
-
# TODO: Also extract referenced columns (aka
|
|
567
|
+
# TODO: Also extract referenced columns (aka auxiliary / non-SELECT lineage)
|
|
566
568
|
except (sqlglot.errors.OptimizeError, ValueError, IndexError) as e:
|
|
567
569
|
raise SqlUnderstandingError(
|
|
568
570
|
f"sqlglot failed to compute some lineage: {e}"
|
|
@@ -1022,6 +1024,14 @@ def _sqlglot_lineage_inner(
|
|
|
1022
1024
|
logger.debug(
|
|
1023
1025
|
f"Resolved {total_schemas_resolved} of {total_tables_discovered} table schemas"
|
|
1024
1026
|
)
|
|
1027
|
+
if SQL_PARSER_TRACE:
|
|
1028
|
+
for qualified_table, schema_info in table_name_schema_mapping.items():
|
|
1029
|
+
logger.debug(
|
|
1030
|
+
"Table name %s resolved to %s with schema %s",
|
|
1031
|
+
qualified_table,
|
|
1032
|
+
table_name_urn_mapping[qualified_table],
|
|
1033
|
+
schema_info,
|
|
1034
|
+
)
|
|
1025
1035
|
|
|
1026
1036
|
column_lineage: Optional[List[_ColumnLineageInfo]] = None
|
|
1027
1037
|
try:
|
|
File without changes
|
{acryl_datahub-0.15.0.1rc5.dist-info → acryl_datahub-0.15.0.1rc7.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|