acryl-datahub 1.3.0.1rc5__py3-none-any.whl → 1.3.0.1rc7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.3.0.1rc5.dist-info → acryl_datahub-1.3.0.1rc7.dist-info}/METADATA +2332 -2333
- {acryl_datahub-1.3.0.1rc5.dist-info → acryl_datahub-1.3.0.1rc7.dist-info}/RECORD +47 -42
- datahub/_version.py +1 -1
- datahub/cli/docker_check.py +1 -1
- datahub/emitter/mce_builder.py +6 -0
- datahub/ingestion/autogenerated/capability_summary.json +12 -12
- datahub/ingestion/source/bigquery_v2/bigquery.py +17 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +16 -0
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +2 -0
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +41 -4
- datahub/ingestion/source/common/subtypes.py +2 -0
- datahub/ingestion/source/dremio/dremio_source.py +15 -15
- datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
- datahub/ingestion/source/fivetran/config.py +33 -0
- datahub/ingestion/source/fivetran/fivetran.py +184 -13
- datahub/ingestion/source/fivetran/fivetran_log_api.py +20 -5
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/hex/hex.py +1 -1
- datahub/ingestion/source/iceberg/iceberg.py +1 -1
- datahub/ingestion/source/metabase.py +23 -4
- datahub/ingestion/source/mlflow.py +1 -1
- datahub/ingestion/source/s3/source.py +1 -1
- datahub/ingestion/source/salesforce.py +1 -1
- datahub/ingestion/source/slack/slack.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_config.py +16 -0
- datahub/ingestion/source/snowflake/snowflake_queries.py +49 -6
- datahub/ingestion/source/snowflake/snowflake_summary.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_v2.py +14 -1
- datahub/ingestion/source/sql_queries.py +1 -1
- datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
- datahub/ingestion/source/state/stateful_ingestion_base.py +30 -2
- datahub/ingestion/source/unity/source.py +1 -1
- datahub/ingestion/source/vertexai/vertexai.py +1 -1
- datahub/metadata/_internal_schema_classes.py +223 -0
- datahub/metadata/_urns/urn_defs.py +56 -0
- datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
- datahub/metadata/schema.avsc +208 -0
- datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
- datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
- datahub/sdk/mlmodel.py +19 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +18 -4
- {acryl_datahub-1.3.0.1rc5.dist-info → acryl_datahub-1.3.0.1rc7.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.3.0.1rc5.dist-info → acryl_datahub-1.3.0.1rc7.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.3.0.1rc5.dist-info → acryl_datahub-1.3.0.1rc7.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.3.0.1rc5.dist-info → acryl_datahub-1.3.0.1rc7.dist-info}/top_level.txt +0 -0
|
@@ -13088,6 +13088,185 @@ class StructuredExecutionReportClass(DictWrapper):
|
|
|
13088
13088
|
self._inner_dict['contentType'] = value
|
|
13089
13089
|
|
|
13090
13090
|
|
|
13091
|
+
class BucketStorageLocationClass(DictWrapper):
|
|
13092
|
+
"""Information where a file is stored"""
|
|
13093
|
+
|
|
13094
|
+
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.file.BucketStorageLocation")
|
|
13095
|
+
def __init__(self,
|
|
13096
|
+
storageBucket: str,
|
|
13097
|
+
storageKey: str,
|
|
13098
|
+
):
|
|
13099
|
+
super().__init__()
|
|
13100
|
+
|
|
13101
|
+
self.storageBucket = storageBucket
|
|
13102
|
+
self.storageKey = storageKey
|
|
13103
|
+
|
|
13104
|
+
def _restore_defaults(self) -> None:
|
|
13105
|
+
self.storageBucket = str()
|
|
13106
|
+
self.storageKey = str()
|
|
13107
|
+
|
|
13108
|
+
|
|
13109
|
+
@property
|
|
13110
|
+
def storageBucket(self) -> str:
|
|
13111
|
+
"""The storage bucket this file is stored in"""
|
|
13112
|
+
return self._inner_dict.get('storageBucket') # type: ignore
|
|
13113
|
+
|
|
13114
|
+
@storageBucket.setter
|
|
13115
|
+
def storageBucket(self, value: str) -> None:
|
|
13116
|
+
self._inner_dict['storageBucket'] = value
|
|
13117
|
+
|
|
13118
|
+
|
|
13119
|
+
@property
|
|
13120
|
+
def storageKey(self) -> str:
|
|
13121
|
+
"""The key for where this file is stored inside of the given bucket"""
|
|
13122
|
+
return self._inner_dict.get('storageKey') # type: ignore
|
|
13123
|
+
|
|
13124
|
+
@storageKey.setter
|
|
13125
|
+
def storageKey(self, value: str) -> None:
|
|
13126
|
+
self._inner_dict['storageKey'] = value
|
|
13127
|
+
|
|
13128
|
+
|
|
13129
|
+
class DataHubFileInfoClass(_Aspect):
|
|
13130
|
+
"""Information about a DataHub file - a file stored in S3 for use within DataHub platform features like documentation, home pages, and announcements."""
|
|
13131
|
+
|
|
13132
|
+
|
|
13133
|
+
ASPECT_NAME = 'dataHubFileInfo'
|
|
13134
|
+
ASPECT_INFO = {}
|
|
13135
|
+
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.file.DataHubFileInfo")
|
|
13136
|
+
|
|
13137
|
+
def __init__(self,
|
|
13138
|
+
bucketStorageLocation: "BucketStorageLocationClass",
|
|
13139
|
+
originalFileName: str,
|
|
13140
|
+
mimeType: str,
|
|
13141
|
+
sizeInBytes: int,
|
|
13142
|
+
scenario: Union[str, "FileUploadScenarioClass"],
|
|
13143
|
+
created: "AuditStampClass",
|
|
13144
|
+
referencedByAsset: Union[None, str]=None,
|
|
13145
|
+
schemaField: Union[None, str]=None,
|
|
13146
|
+
contentHash: Union[None, str]=None,
|
|
13147
|
+
):
|
|
13148
|
+
super().__init__()
|
|
13149
|
+
|
|
13150
|
+
self.bucketStorageLocation = bucketStorageLocation
|
|
13151
|
+
self.originalFileName = originalFileName
|
|
13152
|
+
self.mimeType = mimeType
|
|
13153
|
+
self.sizeInBytes = sizeInBytes
|
|
13154
|
+
self.scenario = scenario
|
|
13155
|
+
self.referencedByAsset = referencedByAsset
|
|
13156
|
+
self.schemaField = schemaField
|
|
13157
|
+
self.created = created
|
|
13158
|
+
self.contentHash = contentHash
|
|
13159
|
+
|
|
13160
|
+
def _restore_defaults(self) -> None:
|
|
13161
|
+
self.bucketStorageLocation = BucketStorageLocationClass._construct_with_defaults()
|
|
13162
|
+
self.originalFileName = str()
|
|
13163
|
+
self.mimeType = str()
|
|
13164
|
+
self.sizeInBytes = int()
|
|
13165
|
+
self.scenario = FileUploadScenarioClass.ASSET_DOCUMENTATION
|
|
13166
|
+
self.referencedByAsset = self.RECORD_SCHEMA.fields_dict["referencedByAsset"].default
|
|
13167
|
+
self.schemaField = self.RECORD_SCHEMA.fields_dict["schemaField"].default
|
|
13168
|
+
self.created = AuditStampClass._construct_with_defaults()
|
|
13169
|
+
self.contentHash = self.RECORD_SCHEMA.fields_dict["contentHash"].default
|
|
13170
|
+
|
|
13171
|
+
|
|
13172
|
+
@property
|
|
13173
|
+
def bucketStorageLocation(self) -> "BucketStorageLocationClass":
|
|
13174
|
+
"""Info about where a file is stored"""
|
|
13175
|
+
return self._inner_dict.get('bucketStorageLocation') # type: ignore
|
|
13176
|
+
|
|
13177
|
+
@bucketStorageLocation.setter
|
|
13178
|
+
def bucketStorageLocation(self, value: "BucketStorageLocationClass") -> None:
|
|
13179
|
+
self._inner_dict['bucketStorageLocation'] = value
|
|
13180
|
+
|
|
13181
|
+
|
|
13182
|
+
@property
|
|
13183
|
+
def originalFileName(self) -> str:
|
|
13184
|
+
"""The original filename as uploaded by the user"""
|
|
13185
|
+
return self._inner_dict.get('originalFileName') # type: ignore
|
|
13186
|
+
|
|
13187
|
+
@originalFileName.setter
|
|
13188
|
+
def originalFileName(self, value: str) -> None:
|
|
13189
|
+
self._inner_dict['originalFileName'] = value
|
|
13190
|
+
|
|
13191
|
+
|
|
13192
|
+
@property
|
|
13193
|
+
def mimeType(self) -> str:
|
|
13194
|
+
"""MIME type of the file (e.g., image/png, application/pdf)"""
|
|
13195
|
+
return self._inner_dict.get('mimeType') # type: ignore
|
|
13196
|
+
|
|
13197
|
+
@mimeType.setter
|
|
13198
|
+
def mimeType(self, value: str) -> None:
|
|
13199
|
+
self._inner_dict['mimeType'] = value
|
|
13200
|
+
|
|
13201
|
+
|
|
13202
|
+
@property
|
|
13203
|
+
def sizeInBytes(self) -> int:
|
|
13204
|
+
"""Size of the file in bytes"""
|
|
13205
|
+
return self._inner_dict.get('sizeInBytes') # type: ignore
|
|
13206
|
+
|
|
13207
|
+
@sizeInBytes.setter
|
|
13208
|
+
def sizeInBytes(self, value: int) -> None:
|
|
13209
|
+
self._inner_dict['sizeInBytes'] = value
|
|
13210
|
+
|
|
13211
|
+
|
|
13212
|
+
@property
|
|
13213
|
+
def scenario(self) -> Union[str, "FileUploadScenarioClass"]:
|
|
13214
|
+
"""The scenario/context in which this file was uploaded"""
|
|
13215
|
+
return self._inner_dict.get('scenario') # type: ignore
|
|
13216
|
+
|
|
13217
|
+
@scenario.setter
|
|
13218
|
+
def scenario(self, value: Union[str, "FileUploadScenarioClass"]) -> None:
|
|
13219
|
+
self._inner_dict['scenario'] = value
|
|
13220
|
+
|
|
13221
|
+
|
|
13222
|
+
@property
|
|
13223
|
+
def referencedByAsset(self) -> Union[None, str]:
|
|
13224
|
+
"""Optional URN of the entity this file is associated with (e.g., the dataset whose docs contain this file)"""
|
|
13225
|
+
return self._inner_dict.get('referencedByAsset') # type: ignore
|
|
13226
|
+
|
|
13227
|
+
@referencedByAsset.setter
|
|
13228
|
+
def referencedByAsset(self, value: Union[None, str]) -> None:
|
|
13229
|
+
self._inner_dict['referencedByAsset'] = value
|
|
13230
|
+
|
|
13231
|
+
|
|
13232
|
+
@property
|
|
13233
|
+
def schemaField(self) -> Union[None, str]:
|
|
13234
|
+
"""The dataset schema field urn this file is referenced by"""
|
|
13235
|
+
return self._inner_dict.get('schemaField') # type: ignore
|
|
13236
|
+
|
|
13237
|
+
@schemaField.setter
|
|
13238
|
+
def schemaField(self, value: Union[None, str]) -> None:
|
|
13239
|
+
self._inner_dict['schemaField'] = value
|
|
13240
|
+
|
|
13241
|
+
|
|
13242
|
+
@property
|
|
13243
|
+
def created(self) -> "AuditStampClass":
|
|
13244
|
+
"""Timestamp when this file was created and by whom"""
|
|
13245
|
+
return self._inner_dict.get('created') # type: ignore
|
|
13246
|
+
|
|
13247
|
+
@created.setter
|
|
13248
|
+
def created(self, value: "AuditStampClass") -> None:
|
|
13249
|
+
self._inner_dict['created'] = value
|
|
13250
|
+
|
|
13251
|
+
|
|
13252
|
+
@property
|
|
13253
|
+
def contentHash(self) -> Union[None, str]:
|
|
13254
|
+
"""SHA-256 hash of file contents"""
|
|
13255
|
+
return self._inner_dict.get('contentHash') # type: ignore
|
|
13256
|
+
|
|
13257
|
+
@contentHash.setter
|
|
13258
|
+
def contentHash(self, value: Union[None, str]) -> None:
|
|
13259
|
+
self._inner_dict['contentHash'] = value
|
|
13260
|
+
|
|
13261
|
+
|
|
13262
|
+
class FileUploadScenarioClass(object):
|
|
13263
|
+
# No docs available.
|
|
13264
|
+
|
|
13265
|
+
ASSET_DOCUMENTATION = "ASSET_DOCUMENTATION"
|
|
13266
|
+
"""File uploaded for entity documentation"""
|
|
13267
|
+
|
|
13268
|
+
|
|
13269
|
+
|
|
13091
13270
|
class DynamicFormAssignmentClass(_Aspect):
|
|
13092
13271
|
"""Information about how a form is assigned to entities dynamically. Provide a filter to
|
|
13093
13272
|
match a set of entities instead of explicitly applying a form to specific entities."""
|
|
@@ -15712,6 +15891,35 @@ class DataHubConnectionKeyClass(_Aspect):
|
|
|
15712
15891
|
self._inner_dict['id'] = value
|
|
15713
15892
|
|
|
15714
15893
|
|
|
15894
|
+
class DataHubFileKeyClass(_Aspect):
|
|
15895
|
+
"""Key for a DataHubFile"""
|
|
15896
|
+
|
|
15897
|
+
|
|
15898
|
+
ASPECT_NAME = 'dataHubFileKey'
|
|
15899
|
+
ASPECT_INFO = {'keyForEntity': 'dataHubFile', 'entityCategory': 'core', 'entityAspects': ['dataHubFileInfo']}
|
|
15900
|
+
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.DataHubFileKey")
|
|
15901
|
+
|
|
15902
|
+
def __init__(self,
|
|
15903
|
+
id: str,
|
|
15904
|
+
):
|
|
15905
|
+
super().__init__()
|
|
15906
|
+
|
|
15907
|
+
self.id = id
|
|
15908
|
+
|
|
15909
|
+
def _restore_defaults(self) -> None:
|
|
15910
|
+
self.id = str()
|
|
15911
|
+
|
|
15912
|
+
|
|
15913
|
+
@property
|
|
15914
|
+
def id(self) -> str:
|
|
15915
|
+
"""Unique id for the file."""
|
|
15916
|
+
return self._inner_dict.get('id') # type: ignore
|
|
15917
|
+
|
|
15918
|
+
@id.setter
|
|
15919
|
+
def id(self, value: str) -> None:
|
|
15920
|
+
self._inner_dict['id'] = value
|
|
15921
|
+
|
|
15922
|
+
|
|
15715
15923
|
class DataHubIngestionSourceKeyClass(_Aspect):
|
|
15716
15924
|
"""Key for a DataHub ingestion source"""
|
|
15717
15925
|
|
|
@@ -27751,6 +27959,9 @@ __SCHEMA_TYPES = {
|
|
|
27751
27959
|
'com.linkedin.pegasus2avro.execution.ExecutionRequestSignal': ExecutionRequestSignalClass,
|
|
27752
27960
|
'com.linkedin.pegasus2avro.execution.ExecutionRequestSource': ExecutionRequestSourceClass,
|
|
27753
27961
|
'com.linkedin.pegasus2avro.execution.StructuredExecutionReport': StructuredExecutionReportClass,
|
|
27962
|
+
'com.linkedin.pegasus2avro.file.BucketStorageLocation': BucketStorageLocationClass,
|
|
27963
|
+
'com.linkedin.pegasus2avro.file.DataHubFileInfo': DataHubFileInfoClass,
|
|
27964
|
+
'com.linkedin.pegasus2avro.file.FileUploadScenario': FileUploadScenarioClass,
|
|
27754
27965
|
'com.linkedin.pegasus2avro.form.DynamicFormAssignment': DynamicFormAssignmentClass,
|
|
27755
27966
|
'com.linkedin.pegasus2avro.form.FormActorAssignment': FormActorAssignmentClass,
|
|
27756
27967
|
'com.linkedin.pegasus2avro.form.FormInfo': FormInfoClass,
|
|
@@ -27800,6 +28011,7 @@ __SCHEMA_TYPES = {
|
|
|
27800
28011
|
'com.linkedin.pegasus2avro.metadata.key.DataHubAccessTokenKey': DataHubAccessTokenKeyClass,
|
|
27801
28012
|
'com.linkedin.pegasus2avro.metadata.key.DataHubActionKey': DataHubActionKeyClass,
|
|
27802
28013
|
'com.linkedin.pegasus2avro.metadata.key.DataHubConnectionKey': DataHubConnectionKeyClass,
|
|
28014
|
+
'com.linkedin.pegasus2avro.metadata.key.DataHubFileKey': DataHubFileKeyClass,
|
|
27803
28015
|
'com.linkedin.pegasus2avro.metadata.key.DataHubIngestionSourceKey': DataHubIngestionSourceKeyClass,
|
|
27804
28016
|
'com.linkedin.pegasus2avro.metadata.key.DataHubOpenAPISchemaKey': DataHubOpenAPISchemaKeyClass,
|
|
27805
28017
|
'com.linkedin.pegasus2avro.metadata.key.DataHubPageModuleKey': DataHubPageModuleKeyClass,
|
|
@@ -28273,6 +28485,9 @@ __SCHEMA_TYPES = {
|
|
|
28273
28485
|
'ExecutionRequestSignal': ExecutionRequestSignalClass,
|
|
28274
28486
|
'ExecutionRequestSource': ExecutionRequestSourceClass,
|
|
28275
28487
|
'StructuredExecutionReport': StructuredExecutionReportClass,
|
|
28488
|
+
'BucketStorageLocation': BucketStorageLocationClass,
|
|
28489
|
+
'DataHubFileInfo': DataHubFileInfoClass,
|
|
28490
|
+
'FileUploadScenario': FileUploadScenarioClass,
|
|
28276
28491
|
'DynamicFormAssignment': DynamicFormAssignmentClass,
|
|
28277
28492
|
'FormActorAssignment': FormActorAssignmentClass,
|
|
28278
28493
|
'FormInfo': FormInfoClass,
|
|
@@ -28322,6 +28537,7 @@ __SCHEMA_TYPES = {
|
|
|
28322
28537
|
'DataHubAccessTokenKey': DataHubAccessTokenKeyClass,
|
|
28323
28538
|
'DataHubActionKey': DataHubActionKeyClass,
|
|
28324
28539
|
'DataHubConnectionKey': DataHubConnectionKeyClass,
|
|
28540
|
+
'DataHubFileKey': DataHubFileKeyClass,
|
|
28325
28541
|
'DataHubIngestionSourceKey': DataHubIngestionSourceKeyClass,
|
|
28326
28542
|
'DataHubOpenAPISchemaKey': DataHubOpenAPISchemaKeyClass,
|
|
28327
28543
|
'DataHubPageModuleKey': DataHubPageModuleKeyClass,
|
|
@@ -28585,6 +28801,7 @@ ASPECT_CLASSES: List[Type[_Aspect]] = [
|
|
|
28585
28801
|
DashboardInfoClass,
|
|
28586
28802
|
EditableSchemaMetadataClass,
|
|
28587
28803
|
SchemaMetadataClass,
|
|
28804
|
+
DataHubFileInfoClass,
|
|
28588
28805
|
AssertionActionsClass,
|
|
28589
28806
|
AssertionRunEventClass,
|
|
28590
28807
|
AssertionInfoClass,
|
|
@@ -28666,6 +28883,7 @@ ASPECT_CLASSES: List[Type[_Aspect]] = [
|
|
|
28666
28883
|
DataHubAccessTokenKeyClass,
|
|
28667
28884
|
DataHubActionKeyClass,
|
|
28668
28885
|
MLPrimaryKeyKeyClass,
|
|
28886
|
+
DataHubFileKeyClass,
|
|
28669
28887
|
TestKeyClass,
|
|
28670
28888
|
GlossaryTermKeyClass,
|
|
28671
28889
|
InviteTokenKeyClass,
|
|
@@ -28819,6 +29037,7 @@ class AspectBag(TypedDict, total=False):
|
|
|
28819
29037
|
dashboardInfo: DashboardInfoClass
|
|
28820
29038
|
editableSchemaMetadata: EditableSchemaMetadataClass
|
|
28821
29039
|
schemaMetadata: SchemaMetadataClass
|
|
29040
|
+
dataHubFileInfo: DataHubFileInfoClass
|
|
28822
29041
|
assertionActions: AssertionActionsClass
|
|
28823
29042
|
assertionRunEvent: AssertionRunEventClass
|
|
28824
29043
|
assertionInfo: AssertionInfoClass
|
|
@@ -28900,6 +29119,7 @@ class AspectBag(TypedDict, total=False):
|
|
|
28900
29119
|
dataHubAccessTokenKey: DataHubAccessTokenKeyClass
|
|
28901
29120
|
dataHubActionKey: DataHubActionKeyClass
|
|
28902
29121
|
mlPrimaryKeyKey: MLPrimaryKeyKeyClass
|
|
29122
|
+
dataHubFileKey: DataHubFileKeyClass
|
|
28903
29123
|
testKey: TestKeyClass
|
|
28904
29124
|
glossaryTermKey: GlossaryTermKeyClass
|
|
28905
29125
|
inviteTokenKey: InviteTokenKeyClass
|
|
@@ -29069,6 +29289,7 @@ KEY_ASPECTS: Dict[str, Type[_Aspect]] = {
|
|
|
29069
29289
|
'dataHubAccessToken': DataHubAccessTokenKeyClass,
|
|
29070
29290
|
'dataHubAction': DataHubActionKeyClass,
|
|
29071
29291
|
'mlPrimaryKey': MLPrimaryKeyKeyClass,
|
|
29292
|
+
'dataHubFile': DataHubFileKeyClass,
|
|
29072
29293
|
'test': TestKeyClass,
|
|
29073
29294
|
'glossaryTerm': GlossaryTermKeyClass,
|
|
29074
29295
|
'inviteToken': InviteTokenKeyClass,
|
|
@@ -29135,6 +29356,7 @@ ENTITY_TYPE_NAMES: List[str] = [
|
|
|
29135
29356
|
'dataHubAccessToken',
|
|
29136
29357
|
'dataHubAction',
|
|
29137
29358
|
'mlPrimaryKey',
|
|
29359
|
+
'dataHubFile',
|
|
29138
29360
|
'test',
|
|
29139
29361
|
'glossaryTerm',
|
|
29140
29362
|
'inviteToken',
|
|
@@ -29198,6 +29420,7 @@ EntityTypeName = Literal[
|
|
|
29198
29420
|
'dataHubAccessToken',
|
|
29199
29421
|
'dataHubAction',
|
|
29200
29422
|
'mlPrimaryKey',
|
|
29423
|
+
'dataHubFile',
|
|
29201
29424
|
'test',
|
|
29202
29425
|
'glossaryTerm',
|
|
29203
29426
|
'inviteToken',
|
|
@@ -1885,6 +1885,62 @@ class MlPrimaryKeyUrn(_SpecificUrn):
|
|
|
1885
1885
|
def name(self) -> str:
|
|
1886
1886
|
return self._entity_ids[1]
|
|
1887
1887
|
|
|
1888
|
+
if TYPE_CHECKING:
|
|
1889
|
+
from datahub.metadata.schema_classes import DataHubFileKeyClass
|
|
1890
|
+
|
|
1891
|
+
class DataHubFileUrn(_SpecificUrn):
|
|
1892
|
+
ENTITY_TYPE: ClassVar[Literal["dataHubFile"]] = "dataHubFile"
|
|
1893
|
+
_URN_PARTS: ClassVar[int] = 1
|
|
1894
|
+
|
|
1895
|
+
def __init__(self, id: Union["DataHubFileUrn", str], *, _allow_coercion: bool = True) -> None:
|
|
1896
|
+
if _allow_coercion:
|
|
1897
|
+
# Field coercion logic (if any is required).
|
|
1898
|
+
if isinstance(id, str):
|
|
1899
|
+
if id.startswith('urn:li:'):
|
|
1900
|
+
try:
|
|
1901
|
+
id = DataHubFileUrn.from_string(id)
|
|
1902
|
+
except InvalidUrnError:
|
|
1903
|
+
raise InvalidUrnError(f'Expecting a DataHubFileUrn but got {id}')
|
|
1904
|
+
else:
|
|
1905
|
+
id = UrnEncoder.encode_string(id)
|
|
1906
|
+
|
|
1907
|
+
# Validation logic.
|
|
1908
|
+
if not id:
|
|
1909
|
+
raise InvalidUrnError("DataHubFileUrn id cannot be empty")
|
|
1910
|
+
if isinstance(id, DataHubFileUrn):
|
|
1911
|
+
id = id.id
|
|
1912
|
+
elif isinstance(id, Urn):
|
|
1913
|
+
raise InvalidUrnError(f'Expecting a DataHubFileUrn but got {id}')
|
|
1914
|
+
if UrnEncoder.contains_reserved_char(id):
|
|
1915
|
+
raise InvalidUrnError(f'DataHubFileUrn id contains reserved characters')
|
|
1916
|
+
|
|
1917
|
+
super().__init__(self.ENTITY_TYPE, [id])
|
|
1918
|
+
|
|
1919
|
+
@classmethod
|
|
1920
|
+
def _parse_ids(cls, entity_ids: List[str]) -> "DataHubFileUrn":
|
|
1921
|
+
if len(entity_ids) != cls._URN_PARTS:
|
|
1922
|
+
raise InvalidUrnError(f"DataHubFileUrn should have {cls._URN_PARTS} parts, got {len(entity_ids)}: {entity_ids}")
|
|
1923
|
+
return cls(id=entity_ids[0], _allow_coercion=False)
|
|
1924
|
+
|
|
1925
|
+
@classmethod
|
|
1926
|
+
def underlying_key_aspect_type(cls) -> Type["DataHubFileKeyClass"]:
|
|
1927
|
+
from datahub.metadata.schema_classes import DataHubFileKeyClass
|
|
1928
|
+
|
|
1929
|
+
return DataHubFileKeyClass
|
|
1930
|
+
|
|
1931
|
+
def to_key_aspect(self) -> "DataHubFileKeyClass":
|
|
1932
|
+
from datahub.metadata.schema_classes import DataHubFileKeyClass
|
|
1933
|
+
|
|
1934
|
+
return DataHubFileKeyClass(id=self.id)
|
|
1935
|
+
|
|
1936
|
+
@classmethod
|
|
1937
|
+
def from_key_aspect(cls, key_aspect: "DataHubFileKeyClass") -> "DataHubFileUrn":
|
|
1938
|
+
return cls(id=key_aspect.id)
|
|
1939
|
+
|
|
1940
|
+
@property
|
|
1941
|
+
def id(self) -> str:
|
|
1942
|
+
return self._entity_ids[0]
|
|
1943
|
+
|
|
1888
1944
|
if TYPE_CHECKING:
|
|
1889
1945
|
from datahub.metadata.schema_classes import TestKeyClass
|
|
1890
1946
|
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# mypy: ignore-errors
|
|
2
|
+
# flake8: noqa
|
|
3
|
+
|
|
4
|
+
# This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py
|
|
5
|
+
# Do not modify manually!
|
|
6
|
+
|
|
7
|
+
# pylint: skip-file
|
|
8
|
+
# fmt: off
|
|
9
|
+
# isort: skip_file
|
|
10
|
+
from .....schema_classes import BucketStorageLocationClass
|
|
11
|
+
from .....schema_classes import DataHubFileInfoClass
|
|
12
|
+
from .....schema_classes import FileUploadScenarioClass
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
BucketStorageLocation = BucketStorageLocationClass
|
|
16
|
+
DataHubFileInfo = DataHubFileInfoClass
|
|
17
|
+
FileUploadScenario = FileUploadScenarioClass
|
|
18
|
+
|
|
19
|
+
# fmt: on
|
|
@@ -18,6 +18,7 @@ from ......schema_classes import DataFlowKeyClass
|
|
|
18
18
|
from ......schema_classes import DataHubAccessTokenKeyClass
|
|
19
19
|
from ......schema_classes import DataHubActionKeyClass
|
|
20
20
|
from ......schema_classes import DataHubConnectionKeyClass
|
|
21
|
+
from ......schema_classes import DataHubFileKeyClass
|
|
21
22
|
from ......schema_classes import DataHubIngestionSourceKeyClass
|
|
22
23
|
from ......schema_classes import DataHubOpenAPISchemaKeyClass
|
|
23
24
|
from ......schema_classes import DataHubPageModuleKeyClass
|
|
@@ -74,6 +75,7 @@ DataFlowKey = DataFlowKeyClass
|
|
|
74
75
|
DataHubAccessTokenKey = DataHubAccessTokenKeyClass
|
|
75
76
|
DataHubActionKey = DataHubActionKeyClass
|
|
76
77
|
DataHubConnectionKey = DataHubConnectionKeyClass
|
|
78
|
+
DataHubFileKey = DataHubFileKeyClass
|
|
77
79
|
DataHubIngestionSourceKey = DataHubIngestionSourceKeyClass
|
|
78
80
|
DataHubOpenAPISchemaKey = DataHubOpenAPISchemaKeyClass
|
|
79
81
|
DataHubPageModuleKey = DataHubPageModuleKeyClass
|
datahub/metadata/schema.avsc
CHANGED
|
@@ -1903,6 +1903,193 @@
|
|
|
1903
1903
|
],
|
|
1904
1904
|
"doc": "SchemaMetadata to describe metadata related to store schema"
|
|
1905
1905
|
},
|
|
1906
|
+
{
|
|
1907
|
+
"type": "record",
|
|
1908
|
+
"Aspect": {
|
|
1909
|
+
"name": "dataHubFileInfo"
|
|
1910
|
+
},
|
|
1911
|
+
"name": "DataHubFileInfo",
|
|
1912
|
+
"namespace": "com.linkedin.pegasus2avro.file",
|
|
1913
|
+
"fields": [
|
|
1914
|
+
{
|
|
1915
|
+
"type": {
|
|
1916
|
+
"type": "record",
|
|
1917
|
+
"name": "BucketStorageLocation",
|
|
1918
|
+
"namespace": "com.linkedin.pegasus2avro.file",
|
|
1919
|
+
"fields": [
|
|
1920
|
+
{
|
|
1921
|
+
"Searchable": {
|
|
1922
|
+
"fieldType": "KEYWORD"
|
|
1923
|
+
},
|
|
1924
|
+
"type": "string",
|
|
1925
|
+
"name": "storageBucket",
|
|
1926
|
+
"doc": "The storage bucket this file is stored in"
|
|
1927
|
+
},
|
|
1928
|
+
{
|
|
1929
|
+
"Searchable": {
|
|
1930
|
+
"fieldType": "KEYWORD"
|
|
1931
|
+
},
|
|
1932
|
+
"type": "string",
|
|
1933
|
+
"name": "storageKey",
|
|
1934
|
+
"doc": "The key for where this file is stored inside of the given bucket"
|
|
1935
|
+
}
|
|
1936
|
+
],
|
|
1937
|
+
"doc": "Information where a file is stored"
|
|
1938
|
+
},
|
|
1939
|
+
"name": "bucketStorageLocation",
|
|
1940
|
+
"doc": "Info about where a file is stored"
|
|
1941
|
+
},
|
|
1942
|
+
{
|
|
1943
|
+
"Searchable": {
|
|
1944
|
+
"fieldType": "TEXT_PARTIAL"
|
|
1945
|
+
},
|
|
1946
|
+
"type": "string",
|
|
1947
|
+
"name": "originalFileName",
|
|
1948
|
+
"doc": "The original filename as uploaded by the user"
|
|
1949
|
+
},
|
|
1950
|
+
{
|
|
1951
|
+
"Searchable": {
|
|
1952
|
+
"fieldType": "KEYWORD"
|
|
1953
|
+
},
|
|
1954
|
+
"type": "string",
|
|
1955
|
+
"name": "mimeType",
|
|
1956
|
+
"doc": "MIME type of the file (e.g., image/png, application/pdf)"
|
|
1957
|
+
},
|
|
1958
|
+
{
|
|
1959
|
+
"type": "long",
|
|
1960
|
+
"name": "sizeInBytes",
|
|
1961
|
+
"doc": "Size of the file in bytes"
|
|
1962
|
+
},
|
|
1963
|
+
{
|
|
1964
|
+
"Searchable": {
|
|
1965
|
+
"fieldType": "KEYWORD"
|
|
1966
|
+
},
|
|
1967
|
+
"type": {
|
|
1968
|
+
"type": "enum",
|
|
1969
|
+
"symbolDocs": {
|
|
1970
|
+
"ASSET_DOCUMENTATION": "File uploaded for entity documentation"
|
|
1971
|
+
},
|
|
1972
|
+
"name": "FileUploadScenario",
|
|
1973
|
+
"namespace": "com.linkedin.pegasus2avro.file",
|
|
1974
|
+
"symbols": [
|
|
1975
|
+
"ASSET_DOCUMENTATION"
|
|
1976
|
+
]
|
|
1977
|
+
},
|
|
1978
|
+
"name": "scenario",
|
|
1979
|
+
"doc": "The scenario/context in which this file was uploaded"
|
|
1980
|
+
},
|
|
1981
|
+
{
|
|
1982
|
+
"Relationship": {
|
|
1983
|
+
"entityTypes": [
|
|
1984
|
+
"dataset",
|
|
1985
|
+
"chart",
|
|
1986
|
+
"container",
|
|
1987
|
+
"dashboard",
|
|
1988
|
+
"dataFlow",
|
|
1989
|
+
"dataJob",
|
|
1990
|
+
"glossaryTerm",
|
|
1991
|
+
"glossaryNode",
|
|
1992
|
+
"mlModel",
|
|
1993
|
+
"mlFeature",
|
|
1994
|
+
"notebook",
|
|
1995
|
+
"mlFeatureTable",
|
|
1996
|
+
"mlPrimaryKey",
|
|
1997
|
+
"mlModelGroup",
|
|
1998
|
+
"domain",
|
|
1999
|
+
"dataProduct",
|
|
2000
|
+
"businessAttribute"
|
|
2001
|
+
],
|
|
2002
|
+
"name": "ReferencedBy"
|
|
2003
|
+
},
|
|
2004
|
+
"Searchable": {
|
|
2005
|
+
"fieldType": "URN"
|
|
2006
|
+
},
|
|
2007
|
+
"java": {
|
|
2008
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
2009
|
+
},
|
|
2010
|
+
"Urn": "Urn",
|
|
2011
|
+
"entityTypes": [
|
|
2012
|
+
"dataset",
|
|
2013
|
+
"chart",
|
|
2014
|
+
"container",
|
|
2015
|
+
"dashboard",
|
|
2016
|
+
"dataFlow",
|
|
2017
|
+
"dataJob",
|
|
2018
|
+
"glossaryTerm",
|
|
2019
|
+
"glossaryNode",
|
|
2020
|
+
"mlModel",
|
|
2021
|
+
"mlFeature",
|
|
2022
|
+
"notebook",
|
|
2023
|
+
"mlFeatureTable",
|
|
2024
|
+
"mlPrimaryKey",
|
|
2025
|
+
"mlModelGroup",
|
|
2026
|
+
"domain",
|
|
2027
|
+
"dataProduct",
|
|
2028
|
+
"businessAttribute"
|
|
2029
|
+
],
|
|
2030
|
+
"type": [
|
|
2031
|
+
"null",
|
|
2032
|
+
"string"
|
|
2033
|
+
],
|
|
2034
|
+
"name": "referencedByAsset",
|
|
2035
|
+
"default": null,
|
|
2036
|
+
"doc": "Optional URN of the entity this file is associated with (e.g., the dataset whose docs contain this file)"
|
|
2037
|
+
},
|
|
2038
|
+
{
|
|
2039
|
+
"Relationship": {
|
|
2040
|
+
"entityTypes": [
|
|
2041
|
+
"schemaField"
|
|
2042
|
+
],
|
|
2043
|
+
"name": "ReferencedBy"
|
|
2044
|
+
},
|
|
2045
|
+
"Searchable": {
|
|
2046
|
+
"fieldType": "URN"
|
|
2047
|
+
},
|
|
2048
|
+
"java": {
|
|
2049
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
2050
|
+
},
|
|
2051
|
+
"Urn": "Urn",
|
|
2052
|
+
"entityTypes": [
|
|
2053
|
+
"schemaField"
|
|
2054
|
+
],
|
|
2055
|
+
"type": [
|
|
2056
|
+
"null",
|
|
2057
|
+
"string"
|
|
2058
|
+
],
|
|
2059
|
+
"name": "schemaField",
|
|
2060
|
+
"default": null,
|
|
2061
|
+
"doc": "The dataset schema field urn this file is referenced by"
|
|
2062
|
+
},
|
|
2063
|
+
{
|
|
2064
|
+
"Searchable": {
|
|
2065
|
+
"/actor": {
|
|
2066
|
+
"fieldName": "createdBy",
|
|
2067
|
+
"fieldType": "URN"
|
|
2068
|
+
},
|
|
2069
|
+
"/time": {
|
|
2070
|
+
"fieldName": "createdAt",
|
|
2071
|
+
"fieldType": "DATETIME"
|
|
2072
|
+
}
|
|
2073
|
+
},
|
|
2074
|
+
"type": "com.linkedin.pegasus2avro.common.AuditStamp",
|
|
2075
|
+
"name": "created",
|
|
2076
|
+
"doc": "Timestamp when this file was created and by whom"
|
|
2077
|
+
},
|
|
2078
|
+
{
|
|
2079
|
+
"Searchable": {
|
|
2080
|
+
"fieldType": "KEYWORD"
|
|
2081
|
+
},
|
|
2082
|
+
"type": [
|
|
2083
|
+
"null",
|
|
2084
|
+
"string"
|
|
2085
|
+
],
|
|
2086
|
+
"name": "contentHash",
|
|
2087
|
+
"default": null,
|
|
2088
|
+
"doc": "SHA-256 hash of file contents"
|
|
2089
|
+
}
|
|
2090
|
+
],
|
|
2091
|
+
"doc": "Information about a DataHub file - a file stored in S3 for use within DataHub platform features like documentation, home pages, and announcements."
|
|
2092
|
+
},
|
|
1906
2093
|
{
|
|
1907
2094
|
"type": "record",
|
|
1908
2095
|
"Aspect": {
|
|
@@ -15005,6 +15192,27 @@
|
|
|
15005
15192
|
"doc": "Key for a DataHub Action Pipeline"
|
|
15006
15193
|
},
|
|
15007
15194
|
"com.linkedin.pegasus2avro.metadata.key.MLPrimaryKeyKey",
|
|
15195
|
+
{
|
|
15196
|
+
"type": "record",
|
|
15197
|
+
"Aspect": {
|
|
15198
|
+
"name": "dataHubFileKey",
|
|
15199
|
+
"keyForEntity": "dataHubFile",
|
|
15200
|
+
"entityCategory": "core",
|
|
15201
|
+
"entityAspects": [
|
|
15202
|
+
"dataHubFileInfo"
|
|
15203
|
+
]
|
|
15204
|
+
},
|
|
15205
|
+
"name": "DataHubFileKey",
|
|
15206
|
+
"namespace": "com.linkedin.pegasus2avro.metadata.key",
|
|
15207
|
+
"fields": [
|
|
15208
|
+
{
|
|
15209
|
+
"type": "string",
|
|
15210
|
+
"name": "id",
|
|
15211
|
+
"doc": "Unique id for the file."
|
|
15212
|
+
}
|
|
15213
|
+
],
|
|
15214
|
+
"doc": "Key for a DataHubFile"
|
|
15215
|
+
},
|
|
15008
15216
|
{
|
|
15009
15217
|
"type": "record",
|
|
15010
15218
|
"Aspect": {
|