acryl-datahub 1.0.0rc1__py3-none-any.whl → 1.0.0rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc1.dist-info → acryl_datahub-1.0.0rc3.dist-info}/METADATA +2469 -2469
- {acryl_datahub-1.0.0rc1.dist-info → acryl_datahub-1.0.0rc3.dist-info}/RECORD +49 -48
- datahub/_version.py +1 -1
- datahub/cli/docker_cli.py +2 -2
- datahub/configuration/common.py +1 -1
- datahub/ingestion/api/decorators.py +1 -1
- datahub/ingestion/graph/client.py +1 -1
- datahub/ingestion/run/pipeline.py +1 -1
- datahub/ingestion/source/identity/okta.py +13 -2
- datahub/ingestion/source/kafka_connect/common.py +1 -1
- datahub/ingestion/source/kafka_connect/kafka_connect.py +97 -4
- datahub/ingestion/source/kafka_connect/sink_connectors.py +2 -2
- datahub/ingestion/source/kafka_connect/source_connectors.py +2 -2
- datahub/ingestion/source/looker/looker_common.py +3 -3
- datahub/ingestion/source/looker/looker_source.py +1 -1
- datahub/ingestion/source/mode.py +3 -3
- datahub/ingestion/source/nifi.py +2 -2
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/openapi_parser.py +1 -1
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_queries.py +3 -0
- datahub/ingestion/source/sql/athena.py +2 -2
- datahub/ingestion/source/sql/hive_metastore.py +1 -1
- datahub/ingestion/source/sql/mssql/source.py +1 -1
- datahub/ingestion/source/sql/sql_common.py +1 -1
- datahub/ingestion/source/sql/teradata.py +3 -3
- datahub/ingestion/source/tableau/tableau.py +19 -0
- datahub/metadata/schema.avsc +30 -3
- datahub/metadata/schemas/AssertionInfo.avsc +3 -1
- datahub/metadata/schemas/BusinessAttributeInfo.avsc +6 -2
- datahub/metadata/schemas/BusinessAttributes.avsc +6 -0
- datahub/metadata/schemas/ChartInfo.avsc +1 -0
- datahub/metadata/schemas/EditableSchemaMetadata.avsc +6 -2
- datahub/metadata/schemas/GlossaryTerms.avsc +3 -1
- datahub/metadata/schemas/InputFields.avsc +3 -1
- datahub/metadata/schemas/MetadataChangeEvent.avsc +7 -2
- datahub/metadata/schemas/SchemaMetadata.avsc +3 -1
- datahub/metadata/schemas/StructuredPropertyDefinition.avsc +14 -0
- datahub/sql_parsing/schema_resolver.py +1 -1
- datahub/sql_parsing/sqlglot_lineage.py +1 -1
- datahub/testing/check_sql_parser_result.py +5 -6
- datahub/testing/compare_metadata_json.py +7 -6
- datahub/testing/pytest_hooks.py +56 -0
- datahub/upgrade/upgrade.py +2 -2
- datahub/utilities/mapping.py +1 -1
- {acryl_datahub-1.0.0rc1.dist-info → acryl_datahub-1.0.0rc3.dist-info}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc1.dist-info → acryl_datahub-1.0.0rc3.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0rc1.dist-info → acryl_datahub-1.0.0rc3.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0rc1.dist-info → acryl_datahub-1.0.0rc3.dist-info}/top_level.txt +0 -0
|
@@ -923,7 +923,7 @@ class LookerExplore:
|
|
|
923
923
|
tags=cast(List, dict.get("tags")) if dict.get("tags") is not None else [],
|
|
924
924
|
)
|
|
925
925
|
|
|
926
|
-
@classmethod
|
|
926
|
+
@classmethod
|
|
927
927
|
def from_api( # noqa: C901
|
|
928
928
|
cls,
|
|
929
929
|
model: str,
|
|
@@ -931,7 +931,7 @@ class LookerExplore:
|
|
|
931
931
|
client: LookerAPI,
|
|
932
932
|
reporter: SourceReport,
|
|
933
933
|
source_config: LookerDashboardSourceConfig,
|
|
934
|
-
) -> Optional["LookerExplore"]:
|
|
934
|
+
) -> Optional["LookerExplore"]:
|
|
935
935
|
try:
|
|
936
936
|
explore = client.lookml_model_explore(model, explore_name)
|
|
937
937
|
views: Set[str] = set()
|
|
@@ -1183,7 +1183,7 @@ class LookerExplore:
|
|
|
1183
1183
|
base_url = remove_port_from_url(base_url)
|
|
1184
1184
|
return f"{base_url}/embed/explore/{self.model_name}/{self.name}"
|
|
1185
1185
|
|
|
1186
|
-
def _to_metadata_events(
|
|
1186
|
+
def _to_metadata_events(
|
|
1187
1187
|
self,
|
|
1188
1188
|
config: LookerCommonConfig,
|
|
1189
1189
|
reporter: SourceReport,
|
|
@@ -383,7 +383,7 @@ class LookerDashboardSource(TestableSource, StatefulIngestionSourceBase):
|
|
|
383
383
|
|
|
384
384
|
self.reachable_explores[(model, explore)].append(via)
|
|
385
385
|
|
|
386
|
-
def _get_looker_dashboard_element(
|
|
386
|
+
def _get_looker_dashboard_element(
|
|
387
387
|
self, element: DashboardElement
|
|
388
388
|
) -> Optional[LookerDashboardElement]:
|
|
389
389
|
# Dashboard elements can use raw usage_queries against explores
|
datahub/ingestion/source/mode.py
CHANGED
|
@@ -759,9 +759,9 @@ class ModeSource(StatefulIngestionSourceBase):
|
|
|
759
759
|
return platform, database
|
|
760
760
|
else:
|
|
761
761
|
self.report.report_warning(
|
|
762
|
-
title="
|
|
763
|
-
message=
|
|
764
|
-
f"{data_source_id}",
|
|
762
|
+
title="Unable to construct upstream lineage",
|
|
763
|
+
message="We did not find a data source / connection with a matching ID, meaning that we do not know the platform/database to use in lineage.",
|
|
764
|
+
context=f"Data Source ID: {data_source_id}",
|
|
765
765
|
)
|
|
766
766
|
return None, None
|
|
767
767
|
|
datahub/ingestion/source/nifi.py
CHANGED
|
@@ -488,7 +488,7 @@ class NifiSource(Source):
|
|
|
488
488
|
def get_report(self) -> SourceReport:
|
|
489
489
|
return self.report
|
|
490
490
|
|
|
491
|
-
def update_flow(self, pg_flow_dto: Dict, recursion_level: int = 0) -> None:
|
|
491
|
+
def update_flow(self, pg_flow_dto: Dict, recursion_level: int = 0) -> None:
|
|
492
492
|
"""
|
|
493
493
|
Update self.nifi_flow with contents of the input process group `pg_flow_dto`
|
|
494
494
|
"""
|
|
@@ -894,7 +894,7 @@ class NifiSource(Source):
|
|
|
894
894
|
if not delete_response.ok:
|
|
895
895
|
logger.error("failed to delete provenance ", provenance_uri)
|
|
896
896
|
|
|
897
|
-
def construct_workunits(self) -> Iterable[MetadataWorkUnit]:
|
|
897
|
+
def construct_workunits(self) -> Iterable[MetadataWorkUnit]:
|
|
898
898
|
rootpg = self.nifi_flow.root_process_group
|
|
899
899
|
flow_name = rootpg.name # self.config.site_name
|
|
900
900
|
flow_urn = self.make_flow_urn()
|
|
@@ -270,7 +270,7 @@ class APISource(Source, ABC):
|
|
|
270
270
|
mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
|
|
271
271
|
return ApiWorkUnit(id=dataset_name, mce=mce)
|
|
272
272
|
|
|
273
|
-
def get_workunits_internal(self) -> Iterable[ApiWorkUnit]:
|
|
273
|
+
def get_workunits_internal(self) -> Iterable[ApiWorkUnit]:
|
|
274
274
|
config = self.config
|
|
275
275
|
|
|
276
276
|
sw_dict = self.config.get_swagger()
|
|
@@ -33,7 +33,7 @@ class CatalogItem(BaseModel):
|
|
|
33
33
|
)
|
|
34
34
|
|
|
35
35
|
@validator("display_name", always=True)
|
|
36
|
-
def validate_diplay_name(cls, value, values):
|
|
36
|
+
def validate_diplay_name(cls, value, values):
|
|
37
37
|
if values["created_by"]:
|
|
38
38
|
return values["created_by"].split("\\")[-1]
|
|
39
39
|
return ""
|
|
@@ -731,6 +731,9 @@ fingerprinted_queries as (
|
|
|
731
731
|
JOIN filtered_access_history a USING (query_id)
|
|
732
732
|
)
|
|
733
733
|
SELECT * FROM query_access_history
|
|
734
|
+
-- Our query aggregator expects the queries to be added in chronological order.
|
|
735
|
+
-- It's easier for us to push down the sorting to Snowflake/SQL instead of doing it in Python.
|
|
736
|
+
ORDER BY QUERY_START_TIME ASC
|
|
734
737
|
"""
|
|
735
738
|
|
|
736
739
|
|
|
@@ -55,7 +55,7 @@ try:
|
|
|
55
55
|
except ImportError:
|
|
56
56
|
_F = typing.TypeVar("_F", bound=typing.Callable[..., typing.Any])
|
|
57
57
|
|
|
58
|
-
def override(f: _F, /) -> _F:
|
|
58
|
+
def override(f: _F, /) -> _F:
|
|
59
59
|
return f
|
|
60
60
|
|
|
61
61
|
|
|
@@ -104,7 +104,7 @@ class CustomAthenaRestDialect(AthenaRestDialect):
|
|
|
104
104
|
return "\n".join([r for r in res])
|
|
105
105
|
|
|
106
106
|
@typing.no_type_check
|
|
107
|
-
def _get_column_type(self, type_: Union[str, Dict[str, Any]]) -> TypeEngine:
|
|
107
|
+
def _get_column_type(self, type_: Union[str, Dict[str, Any]]) -> TypeEngine:
|
|
108
108
|
"""Derives the data type of the Athena column.
|
|
109
109
|
|
|
110
110
|
This method is overwritten to extend the behavior of PyAthena.
|
|
@@ -401,7 +401,7 @@ class SQLServerSource(SQLAlchemySource):
|
|
|
401
401
|
data_job.add_property(name=data_name, value=str(data_value))
|
|
402
402
|
yield from self.construct_job_workunits(data_job)
|
|
403
403
|
|
|
404
|
-
def loop_stored_procedures(
|
|
404
|
+
def loop_stored_procedures(
|
|
405
405
|
self,
|
|
406
406
|
inspector: Inspector,
|
|
407
407
|
schema: str,
|
|
@@ -649,7 +649,7 @@ ORDER by DataBaseName, TableName;
|
|
|
649
649
|
)
|
|
650
650
|
|
|
651
651
|
# Disabling the below because the cached view definition is not the view definition the column in tablesv actually holds the last statement executed against the object... not necessarily the view definition
|
|
652
|
-
# setattr(
|
|
652
|
+
# setattr(
|
|
653
653
|
# TeradataDialect,
|
|
654
654
|
# "get_view_definition",
|
|
655
655
|
# lambda self, connection, view_name, schema=None, **kw: optimized_get_view_definition(
|
|
@@ -746,7 +746,7 @@ ORDER by DataBaseName, TableName;
|
|
|
746
746
|
else:
|
|
747
747
|
raise Exception("Unable to get database name from Sqlalchemy inspector")
|
|
748
748
|
|
|
749
|
-
def cached_loop_tables(
|
|
749
|
+
def cached_loop_tables(
|
|
750
750
|
self,
|
|
751
751
|
inspector: Inspector,
|
|
752
752
|
schema: str,
|
|
@@ -782,7 +782,7 @@ ORDER by DataBaseName, TableName;
|
|
|
782
782
|
break
|
|
783
783
|
return description, properties, location
|
|
784
784
|
|
|
785
|
-
def cached_loop_views(
|
|
785
|
+
def cached_loop_views(
|
|
786
786
|
self,
|
|
787
787
|
inspector: Inspector,
|
|
788
788
|
schema: str,
|
|
@@ -2190,6 +2190,10 @@ class TableauSiteSource:
|
|
|
2190
2190
|
dataset_snapshot.aspects.append(browse_paths)
|
|
2191
2191
|
else:
|
|
2192
2192
|
logger.debug(f"Browse path not set for Custom SQL table {csql_id}")
|
|
2193
|
+
logger.warning(
|
|
2194
|
+
f"Skipping Custom SQL table {csql_id} due to filtered downstream"
|
|
2195
|
+
)
|
|
2196
|
+
continue
|
|
2193
2197
|
|
|
2194
2198
|
dataset_properties = DatasetPropertiesClass(
|
|
2195
2199
|
name=csql.get(c.NAME),
|
|
@@ -2628,6 +2632,15 @@ class TableauSiteSource:
|
|
|
2628
2632
|
datasource_info = datasource
|
|
2629
2633
|
|
|
2630
2634
|
browse_path = self._get_project_browse_path_name(datasource)
|
|
2635
|
+
if (
|
|
2636
|
+
not is_embedded_ds
|
|
2637
|
+
and self._get_published_datasource_project_luid(datasource) is None
|
|
2638
|
+
):
|
|
2639
|
+
logger.warning(
|
|
2640
|
+
f"Skip ingesting published datasource {datasource.get(c.NAME)} because of filtered project"
|
|
2641
|
+
)
|
|
2642
|
+
return
|
|
2643
|
+
|
|
2631
2644
|
logger.debug(f"datasource {datasource.get(c.NAME)} browse-path {browse_path}")
|
|
2632
2645
|
datasource_id = datasource[c.ID]
|
|
2633
2646
|
datasource_urn = builder.make_dataset_urn_with_platform_instance(
|
|
@@ -2851,6 +2864,11 @@ class TableauSiteSource:
|
|
|
2851
2864
|
query_filter=tables_filter,
|
|
2852
2865
|
page_size=self.config.effective_database_table_page_size,
|
|
2853
2866
|
):
|
|
2867
|
+
if tableau_database_table_id_to_urn_map.get(tableau_table[c.ID]) is None:
|
|
2868
|
+
logger.warning(
|
|
2869
|
+
f"Skipping table {tableau_table[c.ID]} due to filtered out published datasource"
|
|
2870
|
+
)
|
|
2871
|
+
continue
|
|
2854
2872
|
database_table = self.database_tables[
|
|
2855
2873
|
tableau_database_table_id_to_urn_map[tableau_table[c.ID]]
|
|
2856
2874
|
]
|
|
@@ -2905,6 +2923,7 @@ class TableauSiteSource:
|
|
|
2905
2923
|
dataset_snapshot.aspects.append(browse_paths)
|
|
2906
2924
|
else:
|
|
2907
2925
|
logger.debug(f"Browse path not set for table {database_table.urn}")
|
|
2926
|
+
return
|
|
2908
2927
|
|
|
2909
2928
|
schema_metadata = self.get_schema_metadata_for_table(
|
|
2910
2929
|
tableau_columns, database_table.parsed_columns
|
datahub/metadata/schema.avsc
CHANGED
|
@@ -3504,7 +3504,9 @@
|
|
|
3504
3504
|
"fieldName": "glossaryTerms",
|
|
3505
3505
|
"fieldType": "URN",
|
|
3506
3506
|
"filterNameOverride": "Glossary Term",
|
|
3507
|
-
"hasValuesFieldName": "hasGlossaryTerms"
|
|
3507
|
+
"hasValuesFieldName": "hasGlossaryTerms",
|
|
3508
|
+
"includeSystemModifiedAt": true,
|
|
3509
|
+
"systemModifiedAtFieldName": "termsModifiedAt"
|
|
3508
3510
|
},
|
|
3509
3511
|
"java": {
|
|
3510
3512
|
"class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn"
|
|
@@ -10339,7 +10341,9 @@
|
|
|
10339
10341
|
"/terms/*/urn": {
|
|
10340
10342
|
"boostScore": 0.5,
|
|
10341
10343
|
"fieldName": "editedFieldGlossaryTerms",
|
|
10342
|
-
"fieldType": "URN"
|
|
10344
|
+
"fieldType": "URN",
|
|
10345
|
+
"includeSystemModifiedAt": true,
|
|
10346
|
+
"systemModifiedAtFieldName": "schemaFieldTermsModifiedAt"
|
|
10343
10347
|
}
|
|
10344
10348
|
},
|
|
10345
10349
|
"type": [
|
|
@@ -10477,6 +10481,12 @@
|
|
|
10477
10481
|
"namespace": "com.linkedin.pegasus2avro.businessattribute",
|
|
10478
10482
|
"fields": [
|
|
10479
10483
|
{
|
|
10484
|
+
"Searchable": {
|
|
10485
|
+
"fieldName": "schemaFieldBusinessAttribute",
|
|
10486
|
+
"includeSystemModifiedAt": true,
|
|
10487
|
+
"queryByDefault": false,
|
|
10488
|
+
"systemModifiedAtFieldName": "schemaFieldBusinessAttributeModifiedAt"
|
|
10489
|
+
},
|
|
10480
10490
|
"java": {
|
|
10481
10491
|
"class": "com.linkedin.pegasus2avro.common.urn.BusinessAttributeUrn"
|
|
10482
10492
|
},
|
|
@@ -15077,6 +15087,7 @@
|
|
|
15077
15087
|
},
|
|
15078
15088
|
{
|
|
15079
15089
|
"Searchable": {
|
|
15090
|
+
"boostScore": 10.0,
|
|
15080
15091
|
"enableAutocomplete": true,
|
|
15081
15092
|
"fieldNameAliases": [
|
|
15082
15093
|
"_entityName"
|
|
@@ -16546,7 +16557,9 @@
|
|
|
16546
16557
|
"/terms/*/urn": {
|
|
16547
16558
|
"boostScore": 0.5,
|
|
16548
16559
|
"fieldName": "editedFieldGlossaryTerms",
|
|
16549
|
-
"fieldType": "URN"
|
|
16560
|
+
"fieldType": "URN",
|
|
16561
|
+
"includeSystemModifiedAt": true,
|
|
16562
|
+
"systemModifiedAtFieldName": "schemaFieldTermsModifiedAt"
|
|
16550
16563
|
}
|
|
16551
16564
|
},
|
|
16552
16565
|
"type": [
|
|
@@ -19423,6 +19436,13 @@
|
|
|
19423
19436
|
"doc": "The display name of the property. This is the name that will be shown in the UI and can be used to look up the property id."
|
|
19424
19437
|
},
|
|
19425
19438
|
{
|
|
19439
|
+
"UrnValidation": {
|
|
19440
|
+
"entityTypes": [
|
|
19441
|
+
"dataType"
|
|
19442
|
+
],
|
|
19443
|
+
"exist": true,
|
|
19444
|
+
"strict": true
|
|
19445
|
+
},
|
|
19426
19446
|
"java": {
|
|
19427
19447
|
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
19428
19448
|
},
|
|
@@ -19511,6 +19531,13 @@
|
|
|
19511
19531
|
"fieldName": "entityTypes"
|
|
19512
19532
|
}
|
|
19513
19533
|
},
|
|
19534
|
+
"UrnValidation": {
|
|
19535
|
+
"entityTypes": [
|
|
19536
|
+
"entityType"
|
|
19537
|
+
],
|
|
19538
|
+
"exist": true,
|
|
19539
|
+
"strict": true
|
|
19540
|
+
},
|
|
19514
19541
|
"Urn": "Urn",
|
|
19515
19542
|
"urn_is_array": true,
|
|
19516
19543
|
"type": {
|
|
@@ -2010,7 +2010,9 @@
|
|
|
2010
2010
|
"fieldName": "glossaryTerms",
|
|
2011
2011
|
"fieldType": "URN",
|
|
2012
2012
|
"filterNameOverride": "Glossary Term",
|
|
2013
|
-
"hasValuesFieldName": "hasGlossaryTerms"
|
|
2013
|
+
"hasValuesFieldName": "hasGlossaryTerms",
|
|
2014
|
+
"includeSystemModifiedAt": true,
|
|
2015
|
+
"systemModifiedAtFieldName": "termsModifiedAt"
|
|
2014
2016
|
},
|
|
2015
2017
|
"java": {
|
|
2016
2018
|
"class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn"
|
|
@@ -221,7 +221,9 @@
|
|
|
221
221
|
"/terms/*/urn": {
|
|
222
222
|
"boostScore": 0.5,
|
|
223
223
|
"fieldName": "editedFieldGlossaryTerms",
|
|
224
|
-
"fieldType": "URN"
|
|
224
|
+
"fieldType": "URN",
|
|
225
|
+
"includeSystemModifiedAt": true,
|
|
226
|
+
"systemModifiedAtFieldName": "schemaFieldTermsModifiedAt"
|
|
225
227
|
}
|
|
226
228
|
},
|
|
227
229
|
"type": [
|
|
@@ -254,7 +256,9 @@
|
|
|
254
256
|
"fieldName": "glossaryTerms",
|
|
255
257
|
"fieldType": "URN",
|
|
256
258
|
"filterNameOverride": "Glossary Term",
|
|
257
|
-
"hasValuesFieldName": "hasGlossaryTerms"
|
|
259
|
+
"hasValuesFieldName": "hasGlossaryTerms",
|
|
260
|
+
"includeSystemModifiedAt": true,
|
|
261
|
+
"systemModifiedAtFieldName": "termsModifiedAt"
|
|
258
262
|
},
|
|
259
263
|
"java": {
|
|
260
264
|
"class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn"
|
|
@@ -31,6 +31,12 @@
|
|
|
31
31
|
"namespace": "com.linkedin.pegasus2avro.businessattribute",
|
|
32
32
|
"fields": [
|
|
33
33
|
{
|
|
34
|
+
"Searchable": {
|
|
35
|
+
"fieldName": "schemaFieldBusinessAttribute",
|
|
36
|
+
"includeSystemModifiedAt": true,
|
|
37
|
+
"queryByDefault": false,
|
|
38
|
+
"systemModifiedAtFieldName": "schemaFieldBusinessAttributeModifiedAt"
|
|
39
|
+
},
|
|
34
40
|
"java": {
|
|
35
41
|
"class": "com.linkedin.pegasus2avro.common.urn.BusinessAttributeUrn"
|
|
36
42
|
},
|
|
@@ -303,7 +303,9 @@
|
|
|
303
303
|
"/terms/*/urn": {
|
|
304
304
|
"boostScore": 0.5,
|
|
305
305
|
"fieldName": "editedFieldGlossaryTerms",
|
|
306
|
-
"fieldType": "URN"
|
|
306
|
+
"fieldType": "URN",
|
|
307
|
+
"includeSystemModifiedAt": true,
|
|
308
|
+
"systemModifiedAtFieldName": "schemaFieldTermsModifiedAt"
|
|
307
309
|
}
|
|
308
310
|
},
|
|
309
311
|
"type": [
|
|
@@ -336,7 +338,9 @@
|
|
|
336
338
|
"fieldName": "glossaryTerms",
|
|
337
339
|
"fieldType": "URN",
|
|
338
340
|
"filterNameOverride": "Glossary Term",
|
|
339
|
-
"hasValuesFieldName": "hasGlossaryTerms"
|
|
341
|
+
"hasValuesFieldName": "hasGlossaryTerms",
|
|
342
|
+
"includeSystemModifiedAt": true,
|
|
343
|
+
"systemModifiedAtFieldName": "termsModifiedAt"
|
|
340
344
|
},
|
|
341
345
|
"java": {
|
|
342
346
|
"class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn"
|
|
@@ -26,7 +26,9 @@
|
|
|
26
26
|
"fieldName": "glossaryTerms",
|
|
27
27
|
"fieldType": "URN",
|
|
28
28
|
"filterNameOverride": "Glossary Term",
|
|
29
|
-
"hasValuesFieldName": "hasGlossaryTerms"
|
|
29
|
+
"hasValuesFieldName": "hasGlossaryTerms",
|
|
30
|
+
"includeSystemModifiedAt": true,
|
|
31
|
+
"systemModifiedAtFieldName": "termsModifiedAt"
|
|
30
32
|
},
|
|
31
33
|
"java": {
|
|
32
34
|
"class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn"
|
|
@@ -553,7 +553,9 @@
|
|
|
553
553
|
"fieldName": "glossaryTerms",
|
|
554
554
|
"fieldType": "URN",
|
|
555
555
|
"filterNameOverride": "Glossary Term",
|
|
556
|
-
"hasValuesFieldName": "hasGlossaryTerms"
|
|
556
|
+
"hasValuesFieldName": "hasGlossaryTerms",
|
|
557
|
+
"includeSystemModifiedAt": true,
|
|
558
|
+
"systemModifiedAtFieldName": "termsModifiedAt"
|
|
557
559
|
},
|
|
558
560
|
"java": {
|
|
559
561
|
"class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn"
|
|
@@ -183,6 +183,7 @@
|
|
|
183
183
|
},
|
|
184
184
|
{
|
|
185
185
|
"Searchable": {
|
|
186
|
+
"boostScore": 10.0,
|
|
186
187
|
"enableAutocomplete": true,
|
|
187
188
|
"fieldNameAliases": [
|
|
188
189
|
"_entityName"
|
|
@@ -994,7 +995,9 @@
|
|
|
994
995
|
"fieldName": "glossaryTerms",
|
|
995
996
|
"fieldType": "URN",
|
|
996
997
|
"filterNameOverride": "Glossary Term",
|
|
997
|
-
"hasValuesFieldName": "hasGlossaryTerms"
|
|
998
|
+
"hasValuesFieldName": "hasGlossaryTerms",
|
|
999
|
+
"includeSystemModifiedAt": true,
|
|
1000
|
+
"systemModifiedAtFieldName": "termsModifiedAt"
|
|
998
1001
|
},
|
|
999
1002
|
"java": {
|
|
1000
1003
|
"class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn"
|
|
@@ -4644,7 +4647,9 @@
|
|
|
4644
4647
|
"/terms/*/urn": {
|
|
4645
4648
|
"boostScore": 0.5,
|
|
4646
4649
|
"fieldName": "editedFieldGlossaryTerms",
|
|
4647
|
-
"fieldType": "URN"
|
|
4650
|
+
"fieldType": "URN",
|
|
4651
|
+
"includeSystemModifiedAt": true,
|
|
4652
|
+
"systemModifiedAtFieldName": "schemaFieldTermsModifiedAt"
|
|
4648
4653
|
}
|
|
4649
4654
|
},
|
|
4650
4655
|
"type": [
|
|
@@ -777,7 +777,9 @@
|
|
|
777
777
|
"fieldName": "glossaryTerms",
|
|
778
778
|
"fieldType": "URN",
|
|
779
779
|
"filterNameOverride": "Glossary Term",
|
|
780
|
-
"hasValuesFieldName": "hasGlossaryTerms"
|
|
780
|
+
"hasValuesFieldName": "hasGlossaryTerms",
|
|
781
|
+
"includeSystemModifiedAt": true,
|
|
782
|
+
"systemModifiedAtFieldName": "termsModifiedAt"
|
|
781
783
|
},
|
|
782
784
|
"java": {
|
|
783
785
|
"class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn"
|
|
@@ -23,6 +23,13 @@
|
|
|
23
23
|
"doc": "The display name of the property. This is the name that will be shown in the UI and can be used to look up the property id."
|
|
24
24
|
},
|
|
25
25
|
{
|
|
26
|
+
"UrnValidation": {
|
|
27
|
+
"entityTypes": [
|
|
28
|
+
"dataType"
|
|
29
|
+
],
|
|
30
|
+
"exist": true,
|
|
31
|
+
"strict": true
|
|
32
|
+
},
|
|
26
33
|
"java": {
|
|
27
34
|
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
28
35
|
},
|
|
@@ -111,6 +118,13 @@
|
|
|
111
118
|
"fieldName": "entityTypes"
|
|
112
119
|
}
|
|
113
120
|
},
|
|
121
|
+
"UrnValidation": {
|
|
122
|
+
"entityTypes": [
|
|
123
|
+
"entityType"
|
|
124
|
+
],
|
|
125
|
+
"exist": true,
|
|
126
|
+
"strict": true
|
|
127
|
+
},
|
|
114
128
|
"type": {
|
|
115
129
|
"type": "array",
|
|
116
130
|
"items": "string"
|
|
@@ -13,7 +13,7 @@ from datahub.ingestion.graph.client import DataHubGraph
|
|
|
13
13
|
from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier
|
|
14
14
|
from datahub.metadata.schema_classes import SchemaFieldClass, SchemaMetadataClass
|
|
15
15
|
from datahub.metadata.urns import DataPlatformUrn
|
|
16
|
-
from datahub.sql_parsing._models import _TableName as _TableName
|
|
16
|
+
from datahub.sql_parsing._models import _TableName as _TableName
|
|
17
17
|
from datahub.sql_parsing.sql_parsing_common import PLATFORMS_WITH_CASE_SENSITIVE_TABLES
|
|
18
18
|
from datahub.utilities.file_backed_collections import ConnectionWrapper, FileBackedDict
|
|
19
19
|
from datahub.utilities.urns.field_paths import get_simple_field_path_from_v2_field_path
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
import os
|
|
3
2
|
import pathlib
|
|
4
3
|
from typing import Any, Dict, Optional
|
|
5
4
|
|
|
@@ -8,11 +7,10 @@ import deepdiff
|
|
|
8
7
|
from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier
|
|
9
8
|
from datahub.sql_parsing.schema_resolver import SchemaInfo, SchemaResolver
|
|
10
9
|
from datahub.sql_parsing.sqlglot_lineage import SqlParsingResult, sqlglot_lineage
|
|
10
|
+
from datahub.testing.pytest_hooks import get_golden_settings
|
|
11
11
|
|
|
12
12
|
logger = logging.getLogger(__name__)
|
|
13
13
|
|
|
14
|
-
UPDATE_FILES = os.environ.get("UPDATE_SQLPARSER_FILES", "false").lower() == "true"
|
|
15
|
-
|
|
16
14
|
|
|
17
15
|
def assert_sql_result_with_resolver(
|
|
18
16
|
sql: str,
|
|
@@ -22,6 +20,8 @@ def assert_sql_result_with_resolver(
|
|
|
22
20
|
allow_table_error: bool = False,
|
|
23
21
|
**kwargs: Any,
|
|
24
22
|
) -> None:
|
|
23
|
+
settings = get_golden_settings()
|
|
24
|
+
|
|
25
25
|
# HACK: Our BigQuery source overwrites this value and doesn't undo it.
|
|
26
26
|
# As such, we need to handle that here.
|
|
27
27
|
BigqueryTableIdentifier._BQ_SHARDED_TABLE_SUFFIX = "_yyyymmdd"
|
|
@@ -47,15 +47,14 @@ def assert_sql_result_with_resolver(
|
|
|
47
47
|
)
|
|
48
48
|
|
|
49
49
|
txt = res.json(indent=4)
|
|
50
|
-
if
|
|
50
|
+
if settings.update_golden:
|
|
51
51
|
expected_file.write_text(txt)
|
|
52
52
|
return
|
|
53
53
|
|
|
54
54
|
if not expected_file.exists():
|
|
55
55
|
expected_file.write_text(txt)
|
|
56
56
|
raise AssertionError(
|
|
57
|
-
f"
|
|
58
|
-
"Created it with the expected output. Please verify it."
|
|
57
|
+
f"Missing expected golden file; run with --update-golden-files to create it: {expected_file}"
|
|
59
58
|
)
|
|
60
59
|
|
|
61
60
|
expected = SqlParsingResult.parse_raw(expected_file.read_text())
|
|
@@ -16,6 +16,7 @@ from deepdiff import DeepDiff
|
|
|
16
16
|
from datahub.ingestion.sink.file import write_metadata_file
|
|
17
17
|
from datahub.ingestion.source.file import read_metadata_file
|
|
18
18
|
from datahub.testing.mcp_diff import CannotCompareMCPs, MCPDiff, get_aspects_by_urn
|
|
19
|
+
from datahub.testing.pytest_hooks import get_golden_settings
|
|
19
20
|
|
|
20
21
|
logger = logging.getLogger(__name__)
|
|
21
22
|
|
|
@@ -40,26 +41,26 @@ def load_json_file(filename: Union[str, os.PathLike]) -> MetadataJson:
|
|
|
40
41
|
def assert_metadata_files_equal(
|
|
41
42
|
output_path: Union[str, os.PathLike],
|
|
42
43
|
golden_path: Union[str, os.PathLike],
|
|
43
|
-
update_golden: bool,
|
|
44
|
-
copy_output: bool,
|
|
45
44
|
ignore_paths: Sequence[str] = (),
|
|
46
45
|
ignore_paths_v2: Sequence[str] = (),
|
|
47
46
|
ignore_order: bool = True,
|
|
48
47
|
) -> None:
|
|
48
|
+
settings = get_golden_settings()
|
|
49
|
+
|
|
49
50
|
golden_exists = os.path.isfile(golden_path)
|
|
50
51
|
|
|
51
|
-
if copy_output:
|
|
52
|
+
if settings.copy_output:
|
|
52
53
|
shutil.copyfile(str(output_path), str(golden_path) + ".output")
|
|
53
54
|
logger.info(f"Copied output file to {golden_path}.output")
|
|
54
55
|
|
|
55
|
-
if not update_golden and not golden_exists:
|
|
56
|
+
if not settings.update_golden and not golden_exists:
|
|
56
57
|
raise FileNotFoundError(
|
|
57
58
|
"Golden file does not exist. Please run with the --update-golden-files option to create."
|
|
58
59
|
)
|
|
59
60
|
|
|
60
61
|
output = load_json_file(output_path)
|
|
61
62
|
|
|
62
|
-
if update_golden and not golden_exists:
|
|
63
|
+
if settings.update_golden and not golden_exists:
|
|
63
64
|
shutil.copyfile(str(output_path), str(golden_path))
|
|
64
65
|
return
|
|
65
66
|
else:
|
|
@@ -87,7 +88,7 @@ def assert_metadata_files_equal(
|
|
|
87
88
|
ignore_paths = (*ignore_paths, *default_exclude_paths)
|
|
88
89
|
|
|
89
90
|
diff = diff_metadata_json(output, golden, ignore_paths, ignore_order=ignore_order)
|
|
90
|
-
if diff and update_golden:
|
|
91
|
+
if diff and settings.update_golden:
|
|
91
92
|
if isinstance(diff, MCPDiff) and diff.is_delta_valid:
|
|
92
93
|
logger.info(f"Applying delta to golden file {golden_path}")
|
|
93
94
|
diff.apply_delta(golden)
|