acryl-datahub 1.1.0.5rc11__py3-none-any.whl → 1.1.0.5rc13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.0.5rc11.dist-info → acryl_datahub-1.1.0.5rc13.dist-info}/METADATA +2593 -2593
- {acryl_datahub-1.1.0.5rc11.dist-info → acryl_datahub-1.1.0.5rc13.dist-info}/RECORD +29 -27
- datahub/_version.py +1 -1
- datahub/cli/quickstart_versioning.py +4 -6
- datahub/ingestion/api/source.py +2 -1
- datahub/ingestion/source/abs/source.py +1 -1
- datahub/ingestion/source/azure/azure_common.py +2 -2
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +1 -1
- datahub/ingestion/source/data_lake_common/object_store.py +75 -27
- datahub/ingestion/source/dbt/dbt_cloud.py +7 -2
- datahub/ingestion/source/dbt/dbt_common.py +2 -0
- datahub/ingestion/source/mock_data/datahub_mock_data.py +55 -13
- datahub/ingestion/source/snowflake/snowflake_config.py +15 -1
- datahub/ingestion/source/snowflake/snowflake_queries.py +57 -19
- datahub/ingestion/source/snowflake/snowflake_v2.py +2 -0
- datahub/ingestion/source/sql/athena.py +10 -0
- datahub/ingestion/source/sql/oracle.py +1 -1
- datahub/ingestion/source/tableau/tableau_common.py +4 -2
- datahub/metadata/_internal_schema_classes.py +35 -2
- datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
- datahub/metadata/schema.avsc +38 -11
- datahub/metadata/schemas/DatasetKey.avsc +2 -1
- datahub/metadata/schemas/LogicalParent.avsc +140 -0
- datahub/metadata/schemas/QuerySubjects.avsc +1 -12
- datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
- {acryl_datahub-1.1.0.5rc11.dist-info → acryl_datahub-1.1.0.5rc13.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.0.5rc11.dist-info → acryl_datahub-1.1.0.5rc13.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.1.0.5rc11.dist-info → acryl_datahub-1.1.0.5rc13.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.0.5rc11.dist-info → acryl_datahub-1.1.0.5rc13.dist-info}/top_level.txt +0 -0
|
@@ -787,6 +787,11 @@ class QueryLogQueryBuilder:
|
|
|
787
787
|
would be incorrectly filtered out because they don't populate the DML arrays, causing missing lineage
|
|
788
788
|
and operational metadata.
|
|
789
789
|
|
|
790
|
+
Filtering Logic:
|
|
791
|
+
A query is included if it matches:
|
|
792
|
+
- Any database name in additional_database_names (exact match), OR
|
|
793
|
+
- Any database pattern in database_pattern.allow AND NOT any pattern in database_pattern.deny
|
|
794
|
+
|
|
790
795
|
Args:
|
|
791
796
|
database_pattern: The AllowDenyPattern configuration for database filtering
|
|
792
797
|
additional_database_names: Additional database names to always include (no pattern matching)
|
|
@@ -797,14 +802,31 @@ class QueryLogQueryBuilder:
|
|
|
797
802
|
if not database_pattern and not additional_database_names:
|
|
798
803
|
return "TRUE"
|
|
799
804
|
|
|
800
|
-
# Build the database filter conditions
|
|
805
|
+
# Build the database filter conditions
|
|
806
|
+
# Logic: Allow if (matches additional_database_names_allowlist) OR (matches database_pattern.allow AND NOT matches database_pattern.deny)
|
|
801
807
|
# Note: Using UPPER() + RLIKE for case-insensitive matching is more performant than REGEXP_LIKE with 'i' flag
|
|
802
|
-
database_filter_parts = []
|
|
803
808
|
|
|
809
|
+
# Build additional database names condition (exact matches) - these always get included
|
|
810
|
+
additional_db_condition = None
|
|
811
|
+
if additional_database_names:
|
|
812
|
+
additional_db_conditions = []
|
|
813
|
+
for db_name in additional_database_names:
|
|
814
|
+
# Escape single quotes
|
|
815
|
+
escaped_db_name = db_name.replace("'", "''")
|
|
816
|
+
additional_db_conditions.append(
|
|
817
|
+
f"SPLIT_PART(UPPER(o:objectName), '.', 1) = '{escaped_db_name.upper()}'"
|
|
818
|
+
)
|
|
819
|
+
if additional_db_conditions:
|
|
820
|
+
additional_db_condition = " OR ".join(additional_db_conditions)
|
|
821
|
+
|
|
822
|
+
# Build database pattern condition (allow AND NOT deny)
|
|
823
|
+
database_pattern_condition = None
|
|
804
824
|
if database_pattern:
|
|
805
825
|
allow_patterns = database_pattern.allow
|
|
806
826
|
deny_patterns = database_pattern.deny
|
|
807
827
|
|
|
828
|
+
pattern_parts = []
|
|
829
|
+
|
|
808
830
|
# Add allow patterns (if not the default "allow all")
|
|
809
831
|
if allow_patterns and allow_patterns != [".*"]:
|
|
810
832
|
allow_conditions = []
|
|
@@ -815,7 +837,11 @@ class QueryLogQueryBuilder:
|
|
|
815
837
|
f"SPLIT_PART(UPPER(o:objectName), '.', 1) RLIKE '{escaped_pattern}'"
|
|
816
838
|
)
|
|
817
839
|
if allow_conditions:
|
|
818
|
-
|
|
840
|
+
pattern_parts.append(
|
|
841
|
+
allow_conditions[0]
|
|
842
|
+
if len(allow_conditions) == 1
|
|
843
|
+
else f"({' OR '.join(allow_conditions)})"
|
|
844
|
+
)
|
|
819
845
|
|
|
820
846
|
# Add deny patterns
|
|
821
847
|
if deny_patterns:
|
|
@@ -827,24 +853,36 @@ class QueryLogQueryBuilder:
|
|
|
827
853
|
f"SPLIT_PART(UPPER(o:objectName), '.', 1) NOT RLIKE '{escaped_pattern}'"
|
|
828
854
|
)
|
|
829
855
|
if deny_conditions:
|
|
830
|
-
|
|
856
|
+
pattern_parts.append(
|
|
857
|
+
deny_conditions[0]
|
|
858
|
+
if len(deny_conditions) == 1
|
|
859
|
+
else f"({' AND '.join(deny_conditions)})"
|
|
860
|
+
)
|
|
831
861
|
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
additional_db_conditions = []
|
|
835
|
-
for db_name in additional_database_names:
|
|
836
|
-
# Escape single quotes
|
|
837
|
-
escaped_db_name = db_name.replace("'", "''")
|
|
838
|
-
additional_db_conditions.append(
|
|
839
|
-
f"SPLIT_PART(UPPER(o:objectName), '.', 1) = '{escaped_db_name.upper()}'"
|
|
840
|
-
)
|
|
841
|
-
if additional_db_conditions:
|
|
842
|
-
database_filter_parts.append(
|
|
843
|
-
f"({' OR '.join(additional_db_conditions)})"
|
|
844
|
-
)
|
|
862
|
+
if pattern_parts:
|
|
863
|
+
database_pattern_condition = " AND ".join(pattern_parts)
|
|
845
864
|
|
|
846
|
-
|
|
847
|
-
|
|
865
|
+
# Combine conditions: additional_database_names OR database_pattern
|
|
866
|
+
filter_conditions = []
|
|
867
|
+
if additional_db_condition:
|
|
868
|
+
filter_conditions.append(
|
|
869
|
+
f"({additional_db_condition})"
|
|
870
|
+
if len(additional_db_condition.split(" OR ")) > 1
|
|
871
|
+
else additional_db_condition
|
|
872
|
+
)
|
|
873
|
+
if database_pattern_condition:
|
|
874
|
+
filter_conditions.append(
|
|
875
|
+
f"({database_pattern_condition})"
|
|
876
|
+
if len(database_pattern_condition.split(" AND ")) > 1
|
|
877
|
+
else database_pattern_condition
|
|
878
|
+
)
|
|
879
|
+
|
|
880
|
+
if filter_conditions:
|
|
881
|
+
database_filter_condition = (
|
|
882
|
+
filter_conditions[0]
|
|
883
|
+
if len(filter_conditions) == 1
|
|
884
|
+
else " OR ".join(filter_conditions)
|
|
885
|
+
)
|
|
848
886
|
|
|
849
887
|
# Build a condition that checks if any objects in the arrays match the database pattern
|
|
850
888
|
# This implements "at least one" matching behavior: queries are allowed if they touch
|
|
@@ -601,6 +601,8 @@ class SnowflakeV2Source(
|
|
|
601
601
|
user_email_pattern=self.config.user_email_pattern,
|
|
602
602
|
pushdown_deny_usernames=self.config.pushdown_deny_usernames,
|
|
603
603
|
query_dedup_strategy=self.config.query_dedup_strategy,
|
|
604
|
+
push_down_database_pattern_access_history=self.config.push_down_database_pattern_access_history,
|
|
605
|
+
additional_database_names_allowlist=self.config.additional_database_names_allowlist,
|
|
604
606
|
),
|
|
605
607
|
structured_report=self.report,
|
|
606
608
|
filters=self.filters,
|
|
@@ -303,6 +303,11 @@ class AthenaConfig(SQLCommonConfig):
|
|
|
303
303
|
print_warning=True,
|
|
304
304
|
)
|
|
305
305
|
|
|
306
|
+
emit_schema_fieldpaths_as_v1: bool = pydantic.Field(
|
|
307
|
+
default=False,
|
|
308
|
+
description="Convert simple field paths to DataHub field path v1 format. Simple column paths are those that do not contain any nested fields.",
|
|
309
|
+
)
|
|
310
|
+
|
|
306
311
|
profiling: AthenaProfilingConfig = AthenaProfilingConfig()
|
|
307
312
|
|
|
308
313
|
def get_sql_alchemy_url(self):
|
|
@@ -641,6 +646,11 @@ class AthenaSource(SQLAlchemySource):
|
|
|
641
646
|
partition_keys is not None and column["name"] in partition_keys
|
|
642
647
|
),
|
|
643
648
|
)
|
|
649
|
+
|
|
650
|
+
# Keeping it as individual check to make it more explicit and easier to understand
|
|
651
|
+
if not self.config.emit_schema_fieldpaths_as_v1:
|
|
652
|
+
return fields
|
|
653
|
+
|
|
644
654
|
if isinstance(
|
|
645
655
|
fields[0].type.type, (RecordTypeClass, MapTypeClass, ArrayTypeClass)
|
|
646
656
|
):
|
|
@@ -441,7 +441,7 @@ class OracleInspectorObjectWrapper:
|
|
|
441
441
|
"\nac.constraint_name,"
|
|
442
442
|
"\nac.constraint_type,"
|
|
443
443
|
"\nacc.column_name AS local_column,"
|
|
444
|
-
"\nac.
|
|
444
|
+
"\nac.table_name AS remote_table,"
|
|
445
445
|
"\nrcc.column_name AS remote_column,"
|
|
446
446
|
"\nac.r_owner AS remote_owner,"
|
|
447
447
|
"\nacc.position AS loc_pos,"
|
|
@@ -579,10 +579,12 @@ def get_platform(connection_type: str) -> str:
|
|
|
579
579
|
platform = "oracle"
|
|
580
580
|
elif connection_type in ("tbio", "teradata"):
|
|
581
581
|
platform = "teradata"
|
|
582
|
-
elif connection_type in ("sqlserver"):
|
|
582
|
+
elif connection_type in ("sqlserver",):
|
|
583
583
|
platform = "mssql"
|
|
584
|
-
elif connection_type in ("athena"):
|
|
584
|
+
elif connection_type in ("athena",):
|
|
585
585
|
platform = "athena"
|
|
586
|
+
elif connection_type in ("googlebigquery",):
|
|
587
|
+
platform = "bigquery"
|
|
586
588
|
elif connection_type.endswith("_jdbc"):
|
|
587
589
|
# e.g. convert trino_jdbc -> trino
|
|
588
590
|
platform = connection_type[: -len("_jdbc")]
|
|
@@ -15248,6 +15248,35 @@ class DataHubIngestionSourceSourceTypeClass(object):
|
|
|
15248
15248
|
|
|
15249
15249
|
|
|
15250
15250
|
|
|
15251
|
+
class LogicalParentClass(_Aspect):
|
|
15252
|
+
# No docs available.
|
|
15253
|
+
|
|
15254
|
+
|
|
15255
|
+
ASPECT_NAME = 'logicalParent'
|
|
15256
|
+
ASPECT_INFO = {}
|
|
15257
|
+
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.logical.LogicalParent")
|
|
15258
|
+
|
|
15259
|
+
def __init__(self,
|
|
15260
|
+
parent: "EdgeClass",
|
|
15261
|
+
):
|
|
15262
|
+
super().__init__()
|
|
15263
|
+
|
|
15264
|
+
self.parent = parent
|
|
15265
|
+
|
|
15266
|
+
def _restore_defaults(self) -> None:
|
|
15267
|
+
self.parent = EdgeClass._construct_with_defaults()
|
|
15268
|
+
|
|
15269
|
+
|
|
15270
|
+
@property
|
|
15271
|
+
def parent(self) -> "EdgeClass":
|
|
15272
|
+
# No docs available.
|
|
15273
|
+
return self._inner_dict.get('parent') # type: ignore
|
|
15274
|
+
|
|
15275
|
+
@parent.setter
|
|
15276
|
+
def parent(self, value: "EdgeClass") -> None:
|
|
15277
|
+
self._inner_dict['parent'] = value
|
|
15278
|
+
|
|
15279
|
+
|
|
15251
15280
|
class AssertionKeyClass(_Aspect):
|
|
15252
15281
|
"""Key for a Assertion"""
|
|
15253
15282
|
|
|
@@ -16183,7 +16212,7 @@ class DatasetKeyClass(_Aspect):
|
|
|
16183
16212
|
|
|
16184
16213
|
|
|
16185
16214
|
ASPECT_NAME = 'datasetKey'
|
|
16186
|
-
ASPECT_INFO = {'keyForEntity': 'dataset', 'entityCategory': 'core', 'entityAspects': ['viewProperties', 'subTypes', 'datasetProfile', 'datasetUsageStatistics', 'operation', 'domains', 'applications', 'schemaMetadata', 'status', 'container', 'deprecation', 'testResults', 'siblings', 'embed', 'incidentsSummary', 'datasetProperties', 'editableDatasetProperties', 'datasetDeprecation', 'datasetUpstreamLineage', 'upstreamLineage', 'institutionalMemory', 'ownership', 'editableSchemaMetadata', 'globalTags', 'glossaryTerms', 'browsePaths', 'dataPlatformInstance', 'browsePathsV2', 'access', 'structuredProperties', 'forms', 'partitionsSummary', 'versionProperties', 'icebergCatalogInfo'], 'entityDoc': 'Datasets represent logical or physical data assets stored or represented in various data platforms. Tables, Views, Streams are all instances of datasets.'}
|
|
16215
|
+
ASPECT_INFO = {'keyForEntity': 'dataset', 'entityCategory': 'core', 'entityAspects': ['viewProperties', 'subTypes', 'datasetProfile', 'datasetUsageStatistics', 'operation', 'domains', 'applications', 'schemaMetadata', 'status', 'container', 'deprecation', 'testResults', 'siblings', 'embed', 'incidentsSummary', 'datasetProperties', 'editableDatasetProperties', 'datasetDeprecation', 'datasetUpstreamLineage', 'upstreamLineage', 'institutionalMemory', 'ownership', 'editableSchemaMetadata', 'globalTags', 'glossaryTerms', 'browsePaths', 'dataPlatformInstance', 'browsePathsV2', 'access', 'structuredProperties', 'forms', 'partitionsSummary', 'versionProperties', 'icebergCatalogInfo', 'logicalParent'], 'entityDoc': 'Datasets represent logical or physical data assets stored or represented in various data platforms. Tables, Views, Streams are all instances of datasets.'}
|
|
16187
16216
|
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.DatasetKey")
|
|
16188
16217
|
|
|
16189
16218
|
def __init__(self,
|
|
@@ -16949,7 +16978,7 @@ class SchemaFieldKeyClass(_Aspect):
|
|
|
16949
16978
|
|
|
16950
16979
|
|
|
16951
16980
|
ASPECT_NAME = 'schemaFieldKey'
|
|
16952
|
-
ASPECT_INFO = {'keyForEntity': 'schemaField', 'entityCategory': 'core', 'entityAspects': ['schemafieldInfo', 'structuredProperties', 'forms', 'businessAttributes', 'status', 'schemaFieldAliases', 'documentation', 'testResults', 'deprecation', 'subTypes']}
|
|
16981
|
+
ASPECT_INFO = {'keyForEntity': 'schemaField', 'entityCategory': 'core', 'entityAspects': ['schemafieldInfo', 'structuredProperties', 'forms', 'businessAttributes', 'status', 'schemaFieldAliases', 'documentation', 'testResults', 'deprecation', 'subTypes', 'logicalParent']}
|
|
16953
16982
|
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.SchemaFieldKey")
|
|
16954
16983
|
|
|
16955
16984
|
def __init__(self,
|
|
@@ -26963,6 +26992,7 @@ __SCHEMA_TYPES = {
|
|
|
26963
26992
|
'com.linkedin.pegasus2avro.ingestion.DataHubIngestionSourceSchedule': DataHubIngestionSourceScheduleClass,
|
|
26964
26993
|
'com.linkedin.pegasus2avro.ingestion.DataHubIngestionSourceSource': DataHubIngestionSourceSourceClass,
|
|
26965
26994
|
'com.linkedin.pegasus2avro.ingestion.DataHubIngestionSourceSourceType': DataHubIngestionSourceSourceTypeClass,
|
|
26995
|
+
'com.linkedin.pegasus2avro.logical.LogicalParent': LogicalParentClass,
|
|
26966
26996
|
'com.linkedin.pegasus2avro.metadata.key.AssertionKey': AssertionKeyClass,
|
|
26967
26997
|
'com.linkedin.pegasus2avro.metadata.key.ChartKey': ChartKeyClass,
|
|
26968
26998
|
'com.linkedin.pegasus2avro.metadata.key.ContainerKey': ContainerKeyClass,
|
|
@@ -27470,6 +27500,7 @@ __SCHEMA_TYPES = {
|
|
|
27470
27500
|
'DataHubIngestionSourceSchedule': DataHubIngestionSourceScheduleClass,
|
|
27471
27501
|
'DataHubIngestionSourceSource': DataHubIngestionSourceSourceClass,
|
|
27472
27502
|
'DataHubIngestionSourceSourceType': DataHubIngestionSourceSourceTypeClass,
|
|
27503
|
+
'LogicalParent': LogicalParentClass,
|
|
27473
27504
|
'AssertionKey': AssertionKeyClass,
|
|
27474
27505
|
'ChartKey': ChartKeyClass,
|
|
27475
27506
|
'ContainerKey': ContainerKeyClass,
|
|
@@ -27792,6 +27823,7 @@ ASPECT_CLASSES: List[Type[_Aspect]] = [
|
|
|
27792
27823
|
ERModelRelationshipPropertiesClass,
|
|
27793
27824
|
EditableSchemaMetadataClass,
|
|
27794
27825
|
SchemaMetadataClass,
|
|
27826
|
+
LogicalParentClass,
|
|
27795
27827
|
ChartInfoClass,
|
|
27796
27828
|
ChartUsageStatisticsClass,
|
|
27797
27829
|
ChartQueryClass,
|
|
@@ -28024,6 +28056,7 @@ class AspectBag(TypedDict, total=False):
|
|
|
28024
28056
|
erModelRelationshipProperties: ERModelRelationshipPropertiesClass
|
|
28025
28057
|
editableSchemaMetadata: EditableSchemaMetadataClass
|
|
28026
28058
|
schemaMetadata: SchemaMetadataClass
|
|
28059
|
+
logicalParent: LogicalParentClass
|
|
28027
28060
|
chartInfo: ChartInfoClass
|
|
28028
28061
|
chartUsageStatistics: ChartUsageStatisticsClass
|
|
28029
28062
|
chartQuery: ChartQueryClass
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# mypy: ignore-errors
|
|
2
|
+
# flake8: noqa
|
|
3
|
+
|
|
4
|
+
# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py
|
|
5
|
+
# Do not modify manually!
|
|
6
|
+
|
|
7
|
+
# pylint: skip-file
|
|
8
|
+
# fmt: off
|
|
9
|
+
# isort: skip_file
|
|
10
|
+
from .....schema_classes import LogicalParentClass
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
LogicalParent = LogicalParentClass
|
|
14
|
+
|
|
15
|
+
# fmt: on
|
datahub/metadata/schema.avsc
CHANGED
|
@@ -9164,6 +9164,44 @@
|
|
|
9164
9164
|
"doc": "EditableSchemaMetadata stores editable changes made to schema metadata. This separates changes made from\ningestion pipelines and edits in the UI to avoid accidental overwrites of user-provided data by ingestion pipelines."
|
|
9165
9165
|
},
|
|
9166
9166
|
"com.linkedin.pegasus2avro.schema.SchemaMetadata",
|
|
9167
|
+
{
|
|
9168
|
+
"type": "record",
|
|
9169
|
+
"Aspect": {
|
|
9170
|
+
"name": "logicalParent"
|
|
9171
|
+
},
|
|
9172
|
+
"name": "LogicalParent",
|
|
9173
|
+
"namespace": "com.linkedin.pegasus2avro.logical",
|
|
9174
|
+
"fields": [
|
|
9175
|
+
{
|
|
9176
|
+
"Relationship": {
|
|
9177
|
+
"/destinationUrn": {
|
|
9178
|
+
"createdActor": "parent/created/actor",
|
|
9179
|
+
"createdOn": "parent/created/time",
|
|
9180
|
+
"entityTypes": [
|
|
9181
|
+
"dataset",
|
|
9182
|
+
"schemaField"
|
|
9183
|
+
],
|
|
9184
|
+
"name": "PhysicalInstanceOf",
|
|
9185
|
+
"properties": "parent/properties",
|
|
9186
|
+
"updatedActor": "parent/lastModified/actor",
|
|
9187
|
+
"updatedOn": "parent/lastModified/time"
|
|
9188
|
+
}
|
|
9189
|
+
},
|
|
9190
|
+
"Searchable": {
|
|
9191
|
+
"/destinationUrn": {
|
|
9192
|
+
"addToFilters": true,
|
|
9193
|
+
"fieldName": "logicalParent",
|
|
9194
|
+
"fieldType": "URN",
|
|
9195
|
+
"filterNameOverride": "Physical Instance Of",
|
|
9196
|
+
"hasValuesFieldName": "hasLogicalParent",
|
|
9197
|
+
"queryByDefault": false
|
|
9198
|
+
}
|
|
9199
|
+
},
|
|
9200
|
+
"type": "com.linkedin.pegasus2avro.common.Edge",
|
|
9201
|
+
"name": "parent"
|
|
9202
|
+
}
|
|
9203
|
+
]
|
|
9204
|
+
},
|
|
9167
9205
|
{
|
|
9168
9206
|
"type": "record",
|
|
9169
9207
|
"Aspect": {
|
|
@@ -10853,13 +10891,6 @@
|
|
|
10853
10891
|
"namespace": "com.linkedin.pegasus2avro.query",
|
|
10854
10892
|
"fields": [
|
|
10855
10893
|
{
|
|
10856
|
-
"Relationship": {
|
|
10857
|
-
"entityTypes": [
|
|
10858
|
-
"dataset",
|
|
10859
|
-
"schemaField"
|
|
10860
|
-
],
|
|
10861
|
-
"name": "IsAssociatedWith"
|
|
10862
|
-
},
|
|
10863
10894
|
"Searchable": {
|
|
10864
10895
|
"fieldName": "entities",
|
|
10865
10896
|
"fieldType": "URN"
|
|
@@ -10868,10 +10899,6 @@
|
|
|
10868
10899
|
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
10869
10900
|
},
|
|
10870
10901
|
"Urn": "Urn",
|
|
10871
|
-
"entityTypes": [
|
|
10872
|
-
"dataset",
|
|
10873
|
-
"schemaField"
|
|
10874
|
-
],
|
|
10875
10902
|
"type": "string",
|
|
10876
10903
|
"name": "entity",
|
|
10877
10904
|
"doc": "An entity which is the subject of a query."
|
|
@@ -38,7 +38,8 @@
|
|
|
38
38
|
"forms",
|
|
39
39
|
"partitionsSummary",
|
|
40
40
|
"versionProperties",
|
|
41
|
-
"icebergCatalogInfo"
|
|
41
|
+
"icebergCatalogInfo",
|
|
42
|
+
"logicalParent"
|
|
42
43
|
],
|
|
43
44
|
"entityDoc": "Datasets represent logical or physical data assets stored or represented in various data platforms. Tables, Views, Streams are all instances of datasets."
|
|
44
45
|
},
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "record",
|
|
3
|
+
"Aspect": {
|
|
4
|
+
"name": "logicalParent"
|
|
5
|
+
},
|
|
6
|
+
"name": "LogicalParent",
|
|
7
|
+
"namespace": "com.linkedin.pegasus2avro.logical",
|
|
8
|
+
"fields": [
|
|
9
|
+
{
|
|
10
|
+
"Relationship": {
|
|
11
|
+
"/destinationUrn": {
|
|
12
|
+
"createdActor": "parent/created/actor",
|
|
13
|
+
"createdOn": "parent/created/time",
|
|
14
|
+
"entityTypes": [
|
|
15
|
+
"dataset",
|
|
16
|
+
"schemaField"
|
|
17
|
+
],
|
|
18
|
+
"name": "PhysicalInstanceOf",
|
|
19
|
+
"properties": "parent/properties",
|
|
20
|
+
"updatedActor": "parent/lastModified/actor",
|
|
21
|
+
"updatedOn": "parent/lastModified/time"
|
|
22
|
+
}
|
|
23
|
+
},
|
|
24
|
+
"Searchable": {
|
|
25
|
+
"/destinationUrn": {
|
|
26
|
+
"addToFilters": true,
|
|
27
|
+
"fieldName": "logicalParent",
|
|
28
|
+
"fieldType": "URN",
|
|
29
|
+
"filterNameOverride": "Physical Instance Of",
|
|
30
|
+
"hasValuesFieldName": "hasLogicalParent",
|
|
31
|
+
"queryByDefault": false
|
|
32
|
+
}
|
|
33
|
+
},
|
|
34
|
+
"type": {
|
|
35
|
+
"type": "record",
|
|
36
|
+
"name": "Edge",
|
|
37
|
+
"namespace": "com.linkedin.pegasus2avro.common",
|
|
38
|
+
"fields": [
|
|
39
|
+
{
|
|
40
|
+
"java": {
|
|
41
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
42
|
+
},
|
|
43
|
+
"type": [
|
|
44
|
+
"null",
|
|
45
|
+
"string"
|
|
46
|
+
],
|
|
47
|
+
"name": "sourceUrn",
|
|
48
|
+
"default": null,
|
|
49
|
+
"doc": "Urn of the source of this relationship edge.\nIf not specified, assumed to be the entity that this aspect belongs to.",
|
|
50
|
+
"Urn": "Urn"
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
"java": {
|
|
54
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
55
|
+
},
|
|
56
|
+
"type": "string",
|
|
57
|
+
"name": "destinationUrn",
|
|
58
|
+
"doc": "Urn of the destination of this relationship edge.",
|
|
59
|
+
"Urn": "Urn"
|
|
60
|
+
},
|
|
61
|
+
{
|
|
62
|
+
"type": [
|
|
63
|
+
"null",
|
|
64
|
+
{
|
|
65
|
+
"type": "record",
|
|
66
|
+
"name": "AuditStamp",
|
|
67
|
+
"namespace": "com.linkedin.pegasus2avro.common",
|
|
68
|
+
"fields": [
|
|
69
|
+
{
|
|
70
|
+
"type": "long",
|
|
71
|
+
"name": "time",
|
|
72
|
+
"doc": "When did the resource/association/sub-resource move into the specific lifecycle stage represented by this AuditEvent."
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
"java": {
|
|
76
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
77
|
+
},
|
|
78
|
+
"type": "string",
|
|
79
|
+
"name": "actor",
|
|
80
|
+
"doc": "The entity (e.g. a member URN) which will be credited for moving the resource/association/sub-resource into the specific lifecycle stage. It is also the one used to authorize the change.",
|
|
81
|
+
"Urn": "Urn"
|
|
82
|
+
},
|
|
83
|
+
{
|
|
84
|
+
"java": {
|
|
85
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
86
|
+
},
|
|
87
|
+
"type": [
|
|
88
|
+
"null",
|
|
89
|
+
"string"
|
|
90
|
+
],
|
|
91
|
+
"name": "impersonator",
|
|
92
|
+
"default": null,
|
|
93
|
+
"doc": "The entity (e.g. a service URN) which performs the change on behalf of the Actor and must be authorized to act as the Actor.",
|
|
94
|
+
"Urn": "Urn"
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
"type": [
|
|
98
|
+
"null",
|
|
99
|
+
"string"
|
|
100
|
+
],
|
|
101
|
+
"name": "message",
|
|
102
|
+
"default": null,
|
|
103
|
+
"doc": "Additional context around how DataHub was informed of the particular change. For example: was the change created by an automated process, or manually."
|
|
104
|
+
}
|
|
105
|
+
],
|
|
106
|
+
"doc": "Data captured on a resource/association/sub-resource level giving insight into when that resource/association/sub-resource moved into a particular lifecycle stage, and who acted to move it into that specific lifecycle stage."
|
|
107
|
+
}
|
|
108
|
+
],
|
|
109
|
+
"name": "created",
|
|
110
|
+
"default": null,
|
|
111
|
+
"doc": "Audit stamp containing who created this relationship edge and when"
|
|
112
|
+
},
|
|
113
|
+
{
|
|
114
|
+
"type": [
|
|
115
|
+
"null",
|
|
116
|
+
"com.linkedin.pegasus2avro.common.AuditStamp"
|
|
117
|
+
],
|
|
118
|
+
"name": "lastModified",
|
|
119
|
+
"default": null,
|
|
120
|
+
"doc": "Audit stamp containing who last modified this relationship edge and when"
|
|
121
|
+
},
|
|
122
|
+
{
|
|
123
|
+
"type": [
|
|
124
|
+
"null",
|
|
125
|
+
{
|
|
126
|
+
"type": "map",
|
|
127
|
+
"values": "string"
|
|
128
|
+
}
|
|
129
|
+
],
|
|
130
|
+
"name": "properties",
|
|
131
|
+
"default": null,
|
|
132
|
+
"doc": "A generic properties bag that allows us to store specific information on this graph edge."
|
|
133
|
+
}
|
|
134
|
+
],
|
|
135
|
+
"doc": "A common structure to represent all edges to entities when used inside aspects as collections\nThis ensures that all edges have common structure around audit-stamps and will support PATCH, time-travel automatically."
|
|
136
|
+
},
|
|
137
|
+
"name": "parent"
|
|
138
|
+
}
|
|
139
|
+
]
|
|
140
|
+
}
|
|
@@ -15,13 +15,6 @@
|
|
|
15
15
|
"namespace": "com.linkedin.pegasus2avro.query",
|
|
16
16
|
"fields": [
|
|
17
17
|
{
|
|
18
|
-
"Relationship": {
|
|
19
|
-
"entityTypes": [
|
|
20
|
-
"dataset",
|
|
21
|
-
"schemaField"
|
|
22
|
-
],
|
|
23
|
-
"name": "IsAssociatedWith"
|
|
24
|
-
},
|
|
25
18
|
"Searchable": {
|
|
26
19
|
"fieldName": "entities",
|
|
27
20
|
"fieldType": "URN"
|
|
@@ -32,11 +25,7 @@
|
|
|
32
25
|
"type": "string",
|
|
33
26
|
"name": "entity",
|
|
34
27
|
"doc": "An entity which is the subject of a query.",
|
|
35
|
-
"Urn": "Urn"
|
|
36
|
-
"entityTypes": [
|
|
37
|
-
"dataset",
|
|
38
|
-
"schemaField"
|
|
39
|
-
]
|
|
28
|
+
"Urn": "Urn"
|
|
40
29
|
}
|
|
41
30
|
],
|
|
42
31
|
"doc": "A single subject of a particular query.\nIn the future, we may evolve this model to include richer details\nabout the Query Subject in relation to the query."
|
|
File without changes
|
{acryl_datahub-1.1.0.5rc11.dist-info → acryl_datahub-1.1.0.5rc13.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{acryl_datahub-1.1.0.5rc11.dist-info → acryl_datahub-1.1.0.5rc13.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
|
File without changes
|