acryl-datahub 1.3.0.1rc5__py3-none-any.whl → 1.3.0.1rc6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (23) hide show
  1. {acryl_datahub-1.3.0.1rc5.dist-info → acryl_datahub-1.3.0.1rc6.dist-info}/METADATA +2439 -2439
  2. {acryl_datahub-1.3.0.1rc5.dist-info → acryl_datahub-1.3.0.1rc6.dist-info}/RECORD +23 -20
  3. datahub/_version.py +1 -1
  4. datahub/ingestion/source/bigquery_v2/bigquery.py +17 -1
  5. datahub/ingestion/source/bigquery_v2/bigquery_config.py +16 -0
  6. datahub/ingestion/source/bigquery_v2/queries_extractor.py +41 -4
  7. datahub/ingestion/source/snowflake/snowflake_config.py +16 -0
  8. datahub/ingestion/source/snowflake/snowflake_queries.py +46 -6
  9. datahub/ingestion/source/snowflake/snowflake_v2.py +14 -1
  10. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  11. datahub/ingestion/source/state/stateful_ingestion_base.py +30 -2
  12. datahub/metadata/_internal_schema_classes.py +223 -0
  13. datahub/metadata/_urns/urn_defs.py +56 -0
  14. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  15. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
  16. datahub/metadata/schema.avsc +206 -0
  17. datahub/metadata/schemas/DataHubFileInfo.avsc +228 -0
  18. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  19. datahub/sql_parsing/sql_parsing_aggregator.py +18 -4
  20. {acryl_datahub-1.3.0.1rc5.dist-info → acryl_datahub-1.3.0.1rc6.dist-info}/WHEEL +0 -0
  21. {acryl_datahub-1.3.0.1rc5.dist-info → acryl_datahub-1.3.0.1rc6.dist-info}/entry_points.txt +0 -0
  22. {acryl_datahub-1.3.0.1rc5.dist-info → acryl_datahub-1.3.0.1rc6.dist-info}/licenses/LICENSE +0 -0
  23. {acryl_datahub-1.3.0.1rc5.dist-info → acryl_datahub-1.3.0.1rc6.dist-info}/top_level.txt +0 -0
@@ -13088,6 +13088,185 @@ class StructuredExecutionReportClass(DictWrapper):
13088
13088
  self._inner_dict['contentType'] = value
13089
13089
 
13090
13090
 
13091
+ class BucketStorageLocationClass(DictWrapper):
13092
+ """Information where a file is stored"""
13093
+
13094
+ RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.file.BucketStorageLocation")
13095
+ def __init__(self,
13096
+ storageBucket: str,
13097
+ storageKey: str,
13098
+ ):
13099
+ super().__init__()
13100
+
13101
+ self.storageBucket = storageBucket
13102
+ self.storageKey = storageKey
13103
+
13104
+ def _restore_defaults(self) -> None:
13105
+ self.storageBucket = str()
13106
+ self.storageKey = str()
13107
+
13108
+
13109
+ @property
13110
+ def storageBucket(self) -> str:
13111
+ """The storage bucket this file is stored in"""
13112
+ return self._inner_dict.get('storageBucket') # type: ignore
13113
+
13114
+ @storageBucket.setter
13115
+ def storageBucket(self, value: str) -> None:
13116
+ self._inner_dict['storageBucket'] = value
13117
+
13118
+
13119
+ @property
13120
+ def storageKey(self) -> str:
13121
+ """The key for where this file is stored inside of the given bucket"""
13122
+ return self._inner_dict.get('storageKey') # type: ignore
13123
+
13124
+ @storageKey.setter
13125
+ def storageKey(self, value: str) -> None:
13126
+ self._inner_dict['storageKey'] = value
13127
+
13128
+
13129
+ class DataHubFileInfoClass(_Aspect):
13130
+ """Information about a DataHub file - a file stored in S3 for use within DataHub platform features like documentation, home pages, and announcements."""
13131
+
13132
+
13133
+ ASPECT_NAME = 'dataHubFileInfo'
13134
+ ASPECT_INFO = {}
13135
+ RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.file.DataHubFileInfo")
13136
+
13137
+ def __init__(self,
13138
+ bucketStorageLocation: "BucketStorageLocationClass",
13139
+ originalFileName: str,
13140
+ mimeType: str,
13141
+ sizeInBytes: int,
13142
+ scenario: Union[str, "FileUploadScenarioClass"],
13143
+ created: "AuditStampClass",
13144
+ referencedByAsset: Union[None, str]=None,
13145
+ schemaField: Union[None, str]=None,
13146
+ contentHash: Union[None, str]=None,
13147
+ ):
13148
+ super().__init__()
13149
+
13150
+ self.bucketStorageLocation = bucketStorageLocation
13151
+ self.originalFileName = originalFileName
13152
+ self.mimeType = mimeType
13153
+ self.sizeInBytes = sizeInBytes
13154
+ self.scenario = scenario
13155
+ self.referencedByAsset = referencedByAsset
13156
+ self.schemaField = schemaField
13157
+ self.created = created
13158
+ self.contentHash = contentHash
13159
+
13160
+ def _restore_defaults(self) -> None:
13161
+ self.bucketStorageLocation = BucketStorageLocationClass._construct_with_defaults()
13162
+ self.originalFileName = str()
13163
+ self.mimeType = str()
13164
+ self.sizeInBytes = int()
13165
+ self.scenario = FileUploadScenarioClass.ASSET_DOCUMENTATION
13166
+ self.referencedByAsset = self.RECORD_SCHEMA.fields_dict["referencedByAsset"].default
13167
+ self.schemaField = self.RECORD_SCHEMA.fields_dict["schemaField"].default
13168
+ self.created = AuditStampClass._construct_with_defaults()
13169
+ self.contentHash = self.RECORD_SCHEMA.fields_dict["contentHash"].default
13170
+
13171
+
13172
+ @property
13173
+ def bucketStorageLocation(self) -> "BucketStorageLocationClass":
13174
+ """Info about where a file is stored"""
13175
+ return self._inner_dict.get('bucketStorageLocation') # type: ignore
13176
+
13177
+ @bucketStorageLocation.setter
13178
+ def bucketStorageLocation(self, value: "BucketStorageLocationClass") -> None:
13179
+ self._inner_dict['bucketStorageLocation'] = value
13180
+
13181
+
13182
+ @property
13183
+ def originalFileName(self) -> str:
13184
+ """The original filename as uploaded by the user"""
13185
+ return self._inner_dict.get('originalFileName') # type: ignore
13186
+
13187
+ @originalFileName.setter
13188
+ def originalFileName(self, value: str) -> None:
13189
+ self._inner_dict['originalFileName'] = value
13190
+
13191
+
13192
+ @property
13193
+ def mimeType(self) -> str:
13194
+ """MIME type of the file (e.g., image/png, application/pdf)"""
13195
+ return self._inner_dict.get('mimeType') # type: ignore
13196
+
13197
+ @mimeType.setter
13198
+ def mimeType(self, value: str) -> None:
13199
+ self._inner_dict['mimeType'] = value
13200
+
13201
+
13202
+ @property
13203
+ def sizeInBytes(self) -> int:
13204
+ """Size of the file in bytes"""
13205
+ return self._inner_dict.get('sizeInBytes') # type: ignore
13206
+
13207
+ @sizeInBytes.setter
13208
+ def sizeInBytes(self, value: int) -> None:
13209
+ self._inner_dict['sizeInBytes'] = value
13210
+
13211
+
13212
+ @property
13213
+ def scenario(self) -> Union[str, "FileUploadScenarioClass"]:
13214
+ """The scenario/context in which this file was uploaded"""
13215
+ return self._inner_dict.get('scenario') # type: ignore
13216
+
13217
+ @scenario.setter
13218
+ def scenario(self, value: Union[str, "FileUploadScenarioClass"]) -> None:
13219
+ self._inner_dict['scenario'] = value
13220
+
13221
+
13222
+ @property
13223
+ def referencedByAsset(self) -> Union[None, str]:
13224
+ """Optional URN of the entity this file is associated with (e.g., the dataset whose docs contain this file)"""
13225
+ return self._inner_dict.get('referencedByAsset') # type: ignore
13226
+
13227
+ @referencedByAsset.setter
13228
+ def referencedByAsset(self, value: Union[None, str]) -> None:
13229
+ self._inner_dict['referencedByAsset'] = value
13230
+
13231
+
13232
+ @property
13233
+ def schemaField(self) -> Union[None, str]:
13234
+ """The dataset schema field urn this file is referenced by"""
13235
+ return self._inner_dict.get('schemaField') # type: ignore
13236
+
13237
+ @schemaField.setter
13238
+ def schemaField(self, value: Union[None, str]) -> None:
13239
+ self._inner_dict['schemaField'] = value
13240
+
13241
+
13242
+ @property
13243
+ def created(self) -> "AuditStampClass":
13244
+ """Timestamp when this file was created and by whom"""
13245
+ return self._inner_dict.get('created') # type: ignore
13246
+
13247
+ @created.setter
13248
+ def created(self, value: "AuditStampClass") -> None:
13249
+ self._inner_dict['created'] = value
13250
+
13251
+
13252
+ @property
13253
+ def contentHash(self) -> Union[None, str]:
13254
+ """SHA-256 hash of file contents"""
13255
+ return self._inner_dict.get('contentHash') # type: ignore
13256
+
13257
+ @contentHash.setter
13258
+ def contentHash(self, value: Union[None, str]) -> None:
13259
+ self._inner_dict['contentHash'] = value
13260
+
13261
+
13262
+ class FileUploadScenarioClass(object):
13263
+ # No docs available.
13264
+
13265
+ ASSET_DOCUMENTATION = "ASSET_DOCUMENTATION"
13266
+ """File uploaded for entity documentation"""
13267
+
13268
+
13269
+
13091
13270
  class DynamicFormAssignmentClass(_Aspect):
13092
13271
  """Information about how a form is assigned to entities dynamically. Provide a filter to
13093
13272
  match a set of entities instead of explicitly applying a form to specific entities."""
@@ -15712,6 +15891,35 @@ class DataHubConnectionKeyClass(_Aspect):
15712
15891
  self._inner_dict['id'] = value
15713
15892
 
15714
15893
 
15894
+ class DataHubFileKeyClass(_Aspect):
15895
+ """Key for a DataHubFile"""
15896
+
15897
+
15898
+ ASPECT_NAME = 'dataHubFileKey'
15899
+ ASPECT_INFO = {'keyForEntity': 'dataHubFile', 'entityCategory': 'core', 'entityAspects': ['dataHubFileInfo']}
15900
+ RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.DataHubFileKey")
15901
+
15902
+ def __init__(self,
15903
+ id: str,
15904
+ ):
15905
+ super().__init__()
15906
+
15907
+ self.id = id
15908
+
15909
+ def _restore_defaults(self) -> None:
15910
+ self.id = str()
15911
+
15912
+
15913
+ @property
15914
+ def id(self) -> str:
15915
+ """Unique id for the file."""
15916
+ return self._inner_dict.get('id') # type: ignore
15917
+
15918
+ @id.setter
15919
+ def id(self, value: str) -> None:
15920
+ self._inner_dict['id'] = value
15921
+
15922
+
15715
15923
  class DataHubIngestionSourceKeyClass(_Aspect):
15716
15924
  """Key for a DataHub ingestion source"""
15717
15925
 
@@ -27751,6 +27959,9 @@ __SCHEMA_TYPES = {
27751
27959
  'com.linkedin.pegasus2avro.execution.ExecutionRequestSignal': ExecutionRequestSignalClass,
27752
27960
  'com.linkedin.pegasus2avro.execution.ExecutionRequestSource': ExecutionRequestSourceClass,
27753
27961
  'com.linkedin.pegasus2avro.execution.StructuredExecutionReport': StructuredExecutionReportClass,
27962
+ 'com.linkedin.pegasus2avro.file.BucketStorageLocation': BucketStorageLocationClass,
27963
+ 'com.linkedin.pegasus2avro.file.DataHubFileInfo': DataHubFileInfoClass,
27964
+ 'com.linkedin.pegasus2avro.file.FileUploadScenario': FileUploadScenarioClass,
27754
27965
  'com.linkedin.pegasus2avro.form.DynamicFormAssignment': DynamicFormAssignmentClass,
27755
27966
  'com.linkedin.pegasus2avro.form.FormActorAssignment': FormActorAssignmentClass,
27756
27967
  'com.linkedin.pegasus2avro.form.FormInfo': FormInfoClass,
@@ -27800,6 +28011,7 @@ __SCHEMA_TYPES = {
27800
28011
  'com.linkedin.pegasus2avro.metadata.key.DataHubAccessTokenKey': DataHubAccessTokenKeyClass,
27801
28012
  'com.linkedin.pegasus2avro.metadata.key.DataHubActionKey': DataHubActionKeyClass,
27802
28013
  'com.linkedin.pegasus2avro.metadata.key.DataHubConnectionKey': DataHubConnectionKeyClass,
28014
+ 'com.linkedin.pegasus2avro.metadata.key.DataHubFileKey': DataHubFileKeyClass,
27803
28015
  'com.linkedin.pegasus2avro.metadata.key.DataHubIngestionSourceKey': DataHubIngestionSourceKeyClass,
27804
28016
  'com.linkedin.pegasus2avro.metadata.key.DataHubOpenAPISchemaKey': DataHubOpenAPISchemaKeyClass,
27805
28017
  'com.linkedin.pegasus2avro.metadata.key.DataHubPageModuleKey': DataHubPageModuleKeyClass,
@@ -28273,6 +28485,9 @@ __SCHEMA_TYPES = {
28273
28485
  'ExecutionRequestSignal': ExecutionRequestSignalClass,
28274
28486
  'ExecutionRequestSource': ExecutionRequestSourceClass,
28275
28487
  'StructuredExecutionReport': StructuredExecutionReportClass,
28488
+ 'BucketStorageLocation': BucketStorageLocationClass,
28489
+ 'DataHubFileInfo': DataHubFileInfoClass,
28490
+ 'FileUploadScenario': FileUploadScenarioClass,
28276
28491
  'DynamicFormAssignment': DynamicFormAssignmentClass,
28277
28492
  'FormActorAssignment': FormActorAssignmentClass,
28278
28493
  'FormInfo': FormInfoClass,
@@ -28322,6 +28537,7 @@ __SCHEMA_TYPES = {
28322
28537
  'DataHubAccessTokenKey': DataHubAccessTokenKeyClass,
28323
28538
  'DataHubActionKey': DataHubActionKeyClass,
28324
28539
  'DataHubConnectionKey': DataHubConnectionKeyClass,
28540
+ 'DataHubFileKey': DataHubFileKeyClass,
28325
28541
  'DataHubIngestionSourceKey': DataHubIngestionSourceKeyClass,
28326
28542
  'DataHubOpenAPISchemaKey': DataHubOpenAPISchemaKeyClass,
28327
28543
  'DataHubPageModuleKey': DataHubPageModuleKeyClass,
@@ -28585,6 +28801,7 @@ ASPECT_CLASSES: List[Type[_Aspect]] = [
28585
28801
  DashboardInfoClass,
28586
28802
  EditableSchemaMetadataClass,
28587
28803
  SchemaMetadataClass,
28804
+ DataHubFileInfoClass,
28588
28805
  AssertionActionsClass,
28589
28806
  AssertionRunEventClass,
28590
28807
  AssertionInfoClass,
@@ -28666,6 +28883,7 @@ ASPECT_CLASSES: List[Type[_Aspect]] = [
28666
28883
  DataHubAccessTokenKeyClass,
28667
28884
  DataHubActionKeyClass,
28668
28885
  MLPrimaryKeyKeyClass,
28886
+ DataHubFileKeyClass,
28669
28887
  TestKeyClass,
28670
28888
  GlossaryTermKeyClass,
28671
28889
  InviteTokenKeyClass,
@@ -28819,6 +29037,7 @@ class AspectBag(TypedDict, total=False):
28819
29037
  dashboardInfo: DashboardInfoClass
28820
29038
  editableSchemaMetadata: EditableSchemaMetadataClass
28821
29039
  schemaMetadata: SchemaMetadataClass
29040
+ dataHubFileInfo: DataHubFileInfoClass
28822
29041
  assertionActions: AssertionActionsClass
28823
29042
  assertionRunEvent: AssertionRunEventClass
28824
29043
  assertionInfo: AssertionInfoClass
@@ -28900,6 +29119,7 @@ class AspectBag(TypedDict, total=False):
28900
29119
  dataHubAccessTokenKey: DataHubAccessTokenKeyClass
28901
29120
  dataHubActionKey: DataHubActionKeyClass
28902
29121
  mlPrimaryKeyKey: MLPrimaryKeyKeyClass
29122
+ dataHubFileKey: DataHubFileKeyClass
28903
29123
  testKey: TestKeyClass
28904
29124
  glossaryTermKey: GlossaryTermKeyClass
28905
29125
  inviteTokenKey: InviteTokenKeyClass
@@ -29069,6 +29289,7 @@ KEY_ASPECTS: Dict[str, Type[_Aspect]] = {
29069
29289
  'dataHubAccessToken': DataHubAccessTokenKeyClass,
29070
29290
  'dataHubAction': DataHubActionKeyClass,
29071
29291
  'mlPrimaryKey': MLPrimaryKeyKeyClass,
29292
+ 'dataHubFile': DataHubFileKeyClass,
29072
29293
  'test': TestKeyClass,
29073
29294
  'glossaryTerm': GlossaryTermKeyClass,
29074
29295
  'inviteToken': InviteTokenKeyClass,
@@ -29135,6 +29356,7 @@ ENTITY_TYPE_NAMES: List[str] = [
29135
29356
  'dataHubAccessToken',
29136
29357
  'dataHubAction',
29137
29358
  'mlPrimaryKey',
29359
+ 'dataHubFile',
29138
29360
  'test',
29139
29361
  'glossaryTerm',
29140
29362
  'inviteToken',
@@ -29198,6 +29420,7 @@ EntityTypeName = Literal[
29198
29420
  'dataHubAccessToken',
29199
29421
  'dataHubAction',
29200
29422
  'mlPrimaryKey',
29423
+ 'dataHubFile',
29201
29424
  'test',
29202
29425
  'glossaryTerm',
29203
29426
  'inviteToken',
@@ -1885,6 +1885,62 @@ class MlPrimaryKeyUrn(_SpecificUrn):
1885
1885
  def name(self) -> str:
1886
1886
  return self._entity_ids[1]
1887
1887
 
1888
+ if TYPE_CHECKING:
1889
+ from datahub.metadata.schema_classes import DataHubFileKeyClass
1890
+
1891
+ class DataHubFileUrn(_SpecificUrn):
1892
+ ENTITY_TYPE: ClassVar[Literal["dataHubFile"]] = "dataHubFile"
1893
+ _URN_PARTS: ClassVar[int] = 1
1894
+
1895
+ def __init__(self, id: Union["DataHubFileUrn", str], *, _allow_coercion: bool = True) -> None:
1896
+ if _allow_coercion:
1897
+ # Field coercion logic (if any is required).
1898
+ if isinstance(id, str):
1899
+ if id.startswith('urn:li:'):
1900
+ try:
1901
+ id = DataHubFileUrn.from_string(id)
1902
+ except InvalidUrnError:
1903
+ raise InvalidUrnError(f'Expecting a DataHubFileUrn but got {id}')
1904
+ else:
1905
+ id = UrnEncoder.encode_string(id)
1906
+
1907
+ # Validation logic.
1908
+ if not id:
1909
+ raise InvalidUrnError("DataHubFileUrn id cannot be empty")
1910
+ if isinstance(id, DataHubFileUrn):
1911
+ id = id.id
1912
+ elif isinstance(id, Urn):
1913
+ raise InvalidUrnError(f'Expecting a DataHubFileUrn but got {id}')
1914
+ if UrnEncoder.contains_reserved_char(id):
1915
+ raise InvalidUrnError(f'DataHubFileUrn id contains reserved characters')
1916
+
1917
+ super().__init__(self.ENTITY_TYPE, [id])
1918
+
1919
+ @classmethod
1920
+ def _parse_ids(cls, entity_ids: List[str]) -> "DataHubFileUrn":
1921
+ if len(entity_ids) != cls._URN_PARTS:
1922
+ raise InvalidUrnError(f"DataHubFileUrn should have {cls._URN_PARTS} parts, got {len(entity_ids)}: {entity_ids}")
1923
+ return cls(id=entity_ids[0], _allow_coercion=False)
1924
+
1925
+ @classmethod
1926
+ def underlying_key_aspect_type(cls) -> Type["DataHubFileKeyClass"]:
1927
+ from datahub.metadata.schema_classes import DataHubFileKeyClass
1928
+
1929
+ return DataHubFileKeyClass
1930
+
1931
+ def to_key_aspect(self) -> "DataHubFileKeyClass":
1932
+ from datahub.metadata.schema_classes import DataHubFileKeyClass
1933
+
1934
+ return DataHubFileKeyClass(id=self.id)
1935
+
1936
+ @classmethod
1937
+ def from_key_aspect(cls, key_aspect: "DataHubFileKeyClass") -> "DataHubFileUrn":
1938
+ return cls(id=key_aspect.id)
1939
+
1940
+ @property
1941
+ def id(self) -> str:
1942
+ return self._entity_ids[0]
1943
+
1888
1944
  if TYPE_CHECKING:
1889
1945
  from datahub.metadata.schema_classes import TestKeyClass
1890
1946
 
@@ -0,0 +1,19 @@
1
+ # mypy: ignore-errors
2
+ # flake8: noqa
3
+
4
+ # This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py
5
+ # Do not modify manually!
6
+
7
+ # pylint: skip-file
8
+ # fmt: off
9
+ # isort: skip_file
10
+ from .....schema_classes import BucketStorageLocationClass
11
+ from .....schema_classes import DataHubFileInfoClass
12
+ from .....schema_classes import FileUploadScenarioClass
13
+
14
+
15
+ BucketStorageLocation = BucketStorageLocationClass
16
+ DataHubFileInfo = DataHubFileInfoClass
17
+ FileUploadScenario = FileUploadScenarioClass
18
+
19
+ # fmt: on
@@ -18,6 +18,7 @@ from ......schema_classes import DataFlowKeyClass
18
18
  from ......schema_classes import DataHubAccessTokenKeyClass
19
19
  from ......schema_classes import DataHubActionKeyClass
20
20
  from ......schema_classes import DataHubConnectionKeyClass
21
+ from ......schema_classes import DataHubFileKeyClass
21
22
  from ......schema_classes import DataHubIngestionSourceKeyClass
22
23
  from ......schema_classes import DataHubOpenAPISchemaKeyClass
23
24
  from ......schema_classes import DataHubPageModuleKeyClass
@@ -74,6 +75,7 @@ DataFlowKey = DataFlowKeyClass
74
75
  DataHubAccessTokenKey = DataHubAccessTokenKeyClass
75
76
  DataHubActionKey = DataHubActionKeyClass
76
77
  DataHubConnectionKey = DataHubConnectionKeyClass
78
+ DataHubFileKey = DataHubFileKeyClass
77
79
  DataHubIngestionSourceKey = DataHubIngestionSourceKeyClass
78
80
  DataHubOpenAPISchemaKey = DataHubOpenAPISchemaKeyClass
79
81
  DataHubPageModuleKey = DataHubPageModuleKeyClass
@@ -1903,6 +1903,191 @@
1903
1903
  ],
1904
1904
  "doc": "SchemaMetadata to describe metadata related to store schema"
1905
1905
  },
1906
+ {
1907
+ "type": "record",
1908
+ "Aspect": {
1909
+ "name": "dataHubFileInfo"
1910
+ },
1911
+ "name": "DataHubFileInfo",
1912
+ "namespace": "com.linkedin.pegasus2avro.file",
1913
+ "fields": [
1914
+ {
1915
+ "type": {
1916
+ "type": "record",
1917
+ "name": "BucketStorageLocation",
1918
+ "namespace": "com.linkedin.pegasus2avro.file",
1919
+ "fields": [
1920
+ {
1921
+ "Searchable": {
1922
+ "fieldType": "KEYWORD"
1923
+ },
1924
+ "type": "string",
1925
+ "name": "storageBucket",
1926
+ "doc": "The storage bucket this file is stored in"
1927
+ },
1928
+ {
1929
+ "Searchable": {
1930
+ "fieldType": "KEYWORD"
1931
+ },
1932
+ "type": "string",
1933
+ "name": "storageKey",
1934
+ "doc": "The key for where this file is stored inside of the given bucket"
1935
+ }
1936
+ ],
1937
+ "doc": "Information where a file is stored"
1938
+ },
1939
+ "name": "bucketStorageLocation",
1940
+ "doc": "Info about where a file is stored"
1941
+ },
1942
+ {
1943
+ "Searchable": {
1944
+ "fieldType": "TEXT_PARTIAL"
1945
+ },
1946
+ "type": "string",
1947
+ "name": "originalFileName",
1948
+ "doc": "The original filename as uploaded by the user"
1949
+ },
1950
+ {
1951
+ "Searchable": {
1952
+ "fieldType": "KEYWORD"
1953
+ },
1954
+ "type": "string",
1955
+ "name": "mimeType",
1956
+ "doc": "MIME type of the file (e.g., image/png, application/pdf)"
1957
+ },
1958
+ {
1959
+ "type": "long",
1960
+ "name": "sizeInBytes",
1961
+ "doc": "Size of the file in bytes"
1962
+ },
1963
+ {
1964
+ "Searchable": {
1965
+ "fieldType": "KEYWORD"
1966
+ },
1967
+ "type": {
1968
+ "type": "enum",
1969
+ "symbolDocs": {
1970
+ "ASSET_DOCUMENTATION": "File uploaded for entity documentation"
1971
+ },
1972
+ "name": "FileUploadScenario",
1973
+ "namespace": "com.linkedin.pegasus2avro.file",
1974
+ "symbols": [
1975
+ "ASSET_DOCUMENTATION"
1976
+ ]
1977
+ },
1978
+ "name": "scenario",
1979
+ "doc": "The scenario/context in which this file was uploaded"
1980
+ },
1981
+ {
1982
+ "Relationship": {
1983
+ "entityTypes": [
1984
+ "dataset",
1985
+ "chart",
1986
+ "container",
1987
+ "dashboard",
1988
+ "dataFlow",
1989
+ "dataJob",
1990
+ "glossaryTerm",
1991
+ "glossaryNode",
1992
+ "mlModel",
1993
+ "mlFeature",
1994
+ "notebook",
1995
+ "mlFeatureTable",
1996
+ "mlPrimaryKey",
1997
+ "mlModelGroup",
1998
+ "domain",
1999
+ "dataProduct"
2000
+ ],
2001
+ "name": "ReferencedBy"
2002
+ },
2003
+ "Searchable": {
2004
+ "fieldType": "URN"
2005
+ },
2006
+ "java": {
2007
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
2008
+ },
2009
+ "Urn": "Urn",
2010
+ "entityTypes": [
2011
+ "dataset",
2012
+ "chart",
2013
+ "container",
2014
+ "dashboard",
2015
+ "dataFlow",
2016
+ "dataJob",
2017
+ "glossaryTerm",
2018
+ "glossaryNode",
2019
+ "mlModel",
2020
+ "mlFeature",
2021
+ "notebook",
2022
+ "mlFeatureTable",
2023
+ "mlPrimaryKey",
2024
+ "mlModelGroup",
2025
+ "domain",
2026
+ "dataProduct"
2027
+ ],
2028
+ "type": [
2029
+ "null",
2030
+ "string"
2031
+ ],
2032
+ "name": "referencedByAsset",
2033
+ "default": null,
2034
+ "doc": "Optional URN of the entity this file is associated with (e.g., the dataset whose docs contain this file)"
2035
+ },
2036
+ {
2037
+ "Relationship": {
2038
+ "entityTypes": [
2039
+ "schemaField"
2040
+ ],
2041
+ "name": "ReferencedBy"
2042
+ },
2043
+ "Searchable": {
2044
+ "fieldType": "URN"
2045
+ },
2046
+ "java": {
2047
+ "class": "com.linkedin.pegasus2avro.common.urn.Urn"
2048
+ },
2049
+ "Urn": "Urn",
2050
+ "entityTypes": [
2051
+ "schemaField"
2052
+ ],
2053
+ "type": [
2054
+ "null",
2055
+ "string"
2056
+ ],
2057
+ "name": "schemaField",
2058
+ "default": null,
2059
+ "doc": "The dataset schema field urn this file is referenced by"
2060
+ },
2061
+ {
2062
+ "Searchable": {
2063
+ "/actor": {
2064
+ "fieldName": "createdBy",
2065
+ "fieldType": "URN"
2066
+ },
2067
+ "/time": {
2068
+ "fieldName": "createdAt",
2069
+ "fieldType": "DATETIME"
2070
+ }
2071
+ },
2072
+ "type": "com.linkedin.pegasus2avro.common.AuditStamp",
2073
+ "name": "created",
2074
+ "doc": "Timestamp when this file was created and by whom"
2075
+ },
2076
+ {
2077
+ "Searchable": {
2078
+ "fieldType": "KEYWORD"
2079
+ },
2080
+ "type": [
2081
+ "null",
2082
+ "string"
2083
+ ],
2084
+ "name": "contentHash",
2085
+ "default": null,
2086
+ "doc": "SHA-256 hash of file contents"
2087
+ }
2088
+ ],
2089
+ "doc": "Information about a DataHub file - a file stored in S3 for use within DataHub platform features like documentation, home pages, and announcements."
2090
+ },
1906
2091
  {
1907
2092
  "type": "record",
1908
2093
  "Aspect": {
@@ -15005,6 +15190,27 @@
15005
15190
  "doc": "Key for a DataHub Action Pipeline"
15006
15191
  },
15007
15192
  "com.linkedin.pegasus2avro.metadata.key.MLPrimaryKeyKey",
15193
+ {
15194
+ "type": "record",
15195
+ "Aspect": {
15196
+ "name": "dataHubFileKey",
15197
+ "keyForEntity": "dataHubFile",
15198
+ "entityCategory": "core",
15199
+ "entityAspects": [
15200
+ "dataHubFileInfo"
15201
+ ]
15202
+ },
15203
+ "name": "DataHubFileKey",
15204
+ "namespace": "com.linkedin.pegasus2avro.metadata.key",
15205
+ "fields": [
15206
+ {
15207
+ "type": "string",
15208
+ "name": "id",
15209
+ "doc": "Unique id for the file."
15210
+ }
15211
+ ],
15212
+ "doc": "Key for a DataHubFile"
15213
+ },
15008
15214
  {
15009
15215
  "type": "record",
15010
15216
  "Aspect": {