acryl-datahub-cloud 0.3.14.1rc4__py3-none-any.whl → 0.3.15rc0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (40) hide show
  1. acryl_datahub_cloud/_codegen_config.json +1 -1
  2. acryl_datahub_cloud/datahub_forms_notifications/forms_notifications_source.py +1 -1
  3. acryl_datahub_cloud/datahub_reporting/datahub_dataset.py +30 -7
  4. acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +1 -1
  5. acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +22 -18
  6. acryl_datahub_cloud/elasticsearch/graph_service.py +23 -9
  7. acryl_datahub_cloud/lineage_features/source.py +77 -6
  8. acryl_datahub_cloud/metadata/_urns/urn_defs.py +60 -0
  9. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/execution/__init__.py +2 -0
  10. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  11. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
  12. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  13. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +4 -0
  14. acryl_datahub_cloud/metadata/schema.avsc +420 -21
  15. acryl_datahub_cloud/metadata/schema_classes.py +521 -8
  16. acryl_datahub_cloud/metadata/schemas/Actors.avsc +38 -1
  17. acryl_datahub_cloud/metadata/schemas/AssertionAnalyticsRunEvent.avsc +37 -15
  18. acryl_datahub_cloud/metadata/schemas/AssertionInfo.avsc +18 -15
  19. acryl_datahub_cloud/metadata/schemas/AssertionRunEvent.avsc +19 -15
  20. acryl_datahub_cloud/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  21. acryl_datahub_cloud/metadata/schemas/DataHubFileInfo.avsc +230 -0
  22. acryl_datahub_cloud/metadata/schemas/DataHubFileKey.avsc +21 -0
  23. acryl_datahub_cloud/metadata/schemas/DataHubPageModuleProperties.avsc +3 -1
  24. acryl_datahub_cloud/metadata/schemas/ExecutionRequestArtifactsLocation.avsc +16 -0
  25. acryl_datahub_cloud/metadata/schemas/ExecutionRequestKey.avsc +2 -1
  26. acryl_datahub_cloud/metadata/schemas/GlobalSettingsInfo.avsc +72 -0
  27. acryl_datahub_cloud/metadata/schemas/LineageFeatures.avsc +67 -42
  28. acryl_datahub_cloud/metadata/schemas/LogicalParent.avsc +2 -1
  29. acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +1 -1
  30. acryl_datahub_cloud/metadata/schemas/MonitorInfo.avsc +24 -15
  31. acryl_datahub_cloud/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  32. acryl_datahub_cloud/sdk/assertion_input/assertion_input.py +22 -6
  33. acryl_datahub_cloud/sdk/assertions_client.py +35 -7
  34. acryl_datahub_cloud/sdk/entities/subscription.py +22 -6
  35. acryl_datahub_cloud/sdk/subscription_client.py +8 -2
  36. {acryl_datahub_cloud-0.3.14.1rc4.dist-info → acryl_datahub_cloud-0.3.15rc0.dist-info}/METADATA +39 -42
  37. {acryl_datahub_cloud-0.3.14.1rc4.dist-info → acryl_datahub_cloud-0.3.15rc0.dist-info}/RECORD +40 -36
  38. {acryl_datahub_cloud-0.3.14.1rc4.dist-info → acryl_datahub_cloud-0.3.15rc0.dist-info}/WHEEL +0 -0
  39. {acryl_datahub_cloud-0.3.14.1rc4.dist-info → acryl_datahub_cloud-0.3.15rc0.dist-info}/entry_points.txt +0 -0
  40. {acryl_datahub_cloud-0.3.14.1rc4.dist-info → acryl_datahub_cloud-0.3.15rc0.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "acryl-datahub-cloud",
3
- "version": "0.3.14.1rc4",
3
+ "version": "0.3.15rc0",
4
4
  "install_requires": [
5
5
  "avro-gen3==0.7.16",
6
6
  "acryl-datahub"
@@ -391,7 +391,7 @@ class DataHubFormsNotificationsSource(Source):
391
391
  user_urns = []
392
392
  group_urns = []
393
393
 
394
- extra_fields = [f for f in DataHubDatasetSearchRow.__fields__]
394
+ extra_fields = [f for f in DataHubDatasetSearchRow.model_fields]
395
395
  results = self.graph.get_results_by_filter(
396
396
  extra_or_filters=self._get_incomplete_assets_for_form(form_urn, form.type),
397
397
  extra_source_fields=extra_fields,
@@ -5,7 +5,7 @@ import pathlib
5
5
  import tempfile
6
6
  import time
7
7
  from enum import Enum
8
- from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
8
+ from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple, Union, cast
9
9
 
10
10
  import boto3
11
11
  import duckdb
@@ -73,7 +73,9 @@ class FileStoreBackedDatasetConfig(ConfigModel):
73
73
  store_platform: str = "s3"
74
74
  file_name: str = "data"
75
75
  file_extension: str = "parquet"
76
- file_compression: str = "snappy"
76
+ file_compression: Literal[
77
+ "gzip", "bz2", "brotli", "lz4", "zstd", "snappy", "none"
78
+ ] = "snappy"
77
79
  file_overwrite_existing: bool = True
78
80
  snapshot_partitioning_strategy: str = PartitioningStrategy.DATE
79
81
  generate_presigned_url: bool = True
@@ -119,9 +121,14 @@ class DataHubBasedS3Dataset:
119
121
  self.local_file_path: str = (
120
122
  config.file if config.file else self._initialize_local_file()
121
123
  )
122
- self.file_writer = None
124
+ self.file_writer: Optional[pq.ParquetWriter] = None
123
125
  self.schema = (
124
- pa.schema([(x.name, x.type) for x in self.dataset_metadata.schemaFields])
126
+ pa.schema(
127
+ [
128
+ pa.field(x.name, BaseModelRow.string_to_pyarrow_type(x.type))
129
+ for x in self.dataset_metadata.schemaFields
130
+ ]
131
+ )
125
132
  if self.dataset_metadata.schemaFields
126
133
  else None
127
134
  )
@@ -163,14 +170,28 @@ class DataHubBasedS3Dataset:
163
170
  self.schema = row.arrow_schema()
164
171
  else:
165
172
  # hail mary: infer schema from the first row and cast everything to string
166
- self.schema = pa.schema([(key, pa.string()) for key in row])
173
+ self.schema = pa.schema([pa.field(key, pa.string()) for key in row])
167
174
  self.stringify_row = True
168
175
 
169
176
  self._initialize_local_file()
177
+ # Map compression names to PyArrow format (most are direct mappings)
178
+ compression_map = {
179
+ "gzip": "gzip",
180
+ "bz2": "brotli", # PyArrow doesn't support bz2, use brotli
181
+ "brotli": "brotli",
182
+ "lz4": "lz4",
183
+ "zstd": "zstd",
184
+ "snappy": "snappy",
185
+ "none": "none",
186
+ }
187
+ compression = cast(
188
+ Literal["gzip", "bz2", "brotli", "lz4", "zstd", "snappy", "none"],
189
+ compression_map.get(self.config.file_compression, "snappy"),
190
+ )
170
191
  self.file_writer = pq.ParquetWriter(
171
192
  self.local_file_path,
172
193
  self.schema,
173
- compression=self.config.file_compression,
194
+ compression=compression,
174
195
  )
175
196
  if isinstance(row, (BaseModel, BaseModelRow)):
176
197
  # for anything extending BaseModel, we want to use the dict representation
@@ -396,7 +417,9 @@ class DataHubBasedS3Dataset:
396
417
  assert dataset_profiles.fieldProfiles is not None
397
418
  dataset_profiles.fieldProfiles.append(field_profile)
398
419
  logger.info("Generated dataset profile")
399
- schema_metadata = self._generate_schema_metadata(columns)
420
+ schema_metadata = self._generate_schema_metadata(
421
+ [(col[0], col[1]) for col in columns]
422
+ )
400
423
  return dataset_profiles, schema_metadata
401
424
 
402
425
  def register_dataset(
@@ -306,7 +306,7 @@ class DataHubFormReportingData(FormData):
306
306
  on_asset_scanned: Optional[Callable[[str], Any]] = None,
307
307
  on_form_scanned: Optional[Callable[[str], Any]] = None,
308
308
  ) -> Iterable[FormReportingRow]:
309
- extra_fields = [f for f in self.DataHubDatasetSearchRow.__fields__]
309
+ extra_fields = [f for f in self.DataHubDatasetSearchRow.model_fields]
310
310
  # TODO: Replace with the new search/filter SDK.
311
311
  result = self.graph.get_results_by_filter(
312
312
  extra_or_filters=self.get_form_existence_or_filters(),
@@ -42,7 +42,7 @@ from datahub.ingestion.api.decorators import (
42
42
  platform_name,
43
43
  support_status,
44
44
  )
45
- from datahub.ingestion.api.source import MetadataWorkUnitProcessor, SourceReport
45
+ from datahub.ingestion.api.source import MetadataWorkUnitProcessor
46
46
  from datahub.ingestion.api.source_helpers import auto_workunit_reporter
47
47
  from datahub.ingestion.api.workunit import MetadataWorkUnit
48
48
  from datahub.ingestion.graph.client import DatahubClientConfig
@@ -239,7 +239,7 @@ def exp_cdf(series: polars.Series) -> polars.Series:
239
239
 
240
240
 
241
241
  @dataclass
242
- class DatahubUsageFeatureReport(IngestionStageReport, StatefulIngestionReport):
242
+ class DatahubUsageFeatureReport(StatefulIngestionReport, IngestionStageReport):
243
243
  dataset_platforms_count: Dict[str, int] = field(
244
244
  default_factory=lambda: defaultdict(lambda: 0)
245
245
  )
@@ -738,17 +738,20 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
738
738
  return pa.dictionary(index_type=pa.int32(), value_type=pa.string())
739
739
  elif isinstance(polars_dtype, polars.Struct):
740
740
  return pa.struct(
741
- {
742
- field.name: convert_dtype(field.dtype)
741
+ [
742
+ pa.field(field.name, convert_dtype(field.dtype))
743
743
  for field in polars_dtype.fields
744
- }
744
+ ]
745
745
  )
746
746
  elif isinstance(polars_dtype, polars.List):
747
747
  return pa.list_(convert_dtype(polars_dtype.inner))
748
748
  else:
749
749
  raise ValueError(f"Unsupported Polars dtype: {polars_dtype}")
750
750
 
751
- fields = [(name, convert_dtype(dtype)) for name, dtype in polars_schema.items()]
751
+ fields = [
752
+ pa.field(name, convert_dtype(dtype))
753
+ for name, dtype in polars_schema.items()
754
+ ]
752
755
  return pa.schema(fields)
753
756
 
754
757
  def batch_write_parquet(
@@ -971,26 +974,27 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
971
974
 
972
975
  def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
973
976
  if self.config.user_usage_enabled:
974
- self.report.new_stage("generate user usage")
975
- yield from self.generate_user_usage_mcps()
977
+ with self.report.new_stage("generate user usage"):
978
+ yield from self.generate_user_usage_mcps()
976
979
 
977
980
  if self.config.dataset_usage_enabled:
978
- self.report.new_stage("generate dataset usage")
979
- yield from self.generate_dataset_usage_mcps()
981
+ with self.report.new_stage("generate dataset usage"):
982
+ yield from self.generate_dataset_usage_mcps()
980
983
 
981
984
  if self.config.dashboard_usage_enabled:
982
- self.report.new_stage("generate dashboard usage")
983
- yield from self.generate_dashboard_usage_mcps()
985
+ with self.report.new_stage("generate dashboard usage"):
986
+ yield from self.generate_dashboard_usage_mcps()
984
987
 
985
988
  if self.config.chart_usage_enabled:
986
- self.report.new_stage("generate chart usage")
987
- yield from self.generate_chart_usage_mcps()
989
+ with self.report.new_stage("generate chart usage"):
990
+ yield from self.generate_chart_usage_mcps()
988
991
 
989
992
  if self.config.query_usage_enabled:
990
- self.report.new_stage("generate query usage")
991
- yield from self.generate_query_usage_mcps()
993
+ with self.report.new_stage("generate query usage"):
994
+ yield from self.generate_query_usage_mcps()
992
995
 
993
- self.report.new_stage("end so time is calculated for last stage")
996
+ with self.report.new_stage("end so time is calculated for last stage"):
997
+ pass
994
998
 
995
999
  def generate_mcp_from_lazyframe(
996
1000
  self, lazy_frame: polars.LazyFrame
@@ -2091,5 +2095,5 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
2091
2095
  )
2092
2096
  time.sleep(delay)
2093
2097
 
2094
- def get_report(self) -> SourceReport:
2098
+ def get_report(self) -> "DatahubUsageFeatureReport":
2095
2099
  return self.report
@@ -32,15 +32,31 @@ class BaseModelRow(BaseModel):
32
32
  else:
33
33
  raise ValueError(f"No mapping for type {type_}")
34
34
 
35
+ @staticmethod
36
+ def string_to_pyarrow_type(type_string: str) -> pa.DataType:
37
+ """Convert string representation back to pyarrow type by converting to Python type first."""
38
+ # Mapping of pyarrow string representations to Python types
39
+ type_mapping = {
40
+ "string": str,
41
+ "int64": int,
42
+ "float64": float,
43
+ "bool": bool,
44
+ "timestamp[ns]": datetime.datetime,
45
+ "date32[day]": datetime.date,
46
+ }
47
+
48
+ python_type = type_mapping.get(
49
+ type_string, str
50
+ ) # Default to str for unknown types
51
+ return BaseModelRow.pydantic_type_to_pyarrow(python_type)
52
+
35
53
  @classmethod
36
54
  def arrow_schema(cls) -> pa.Schema:
37
55
  fields = []
38
- for field_name, field_model in cls.__fields__.items():
39
- pyarrow_type = BaseModelRow.pydantic_type_to_pyarrow(
40
- field_model.outer_type_
41
- )
56
+ for field_name, field_model in cls.model_fields.items():
57
+ pyarrow_type = BaseModelRow.pydantic_type_to_pyarrow(field_model.annotation)
42
58
  pyarrow_field = pa.field(field_name, pyarrow_type)
43
- if not field_model.required:
59
+ if not field_model.is_required():
44
60
  pyarrow_field = pyarrow_field.with_nullable(True)
45
61
  else:
46
62
  pyarrow_field = pyarrow_field.with_nullable(False)
@@ -50,10 +66,8 @@ class BaseModelRow(BaseModel):
50
66
  @classmethod
51
67
  def datahub_schema(cls) -> List[SchemaField]:
52
68
  fields = []
53
- for field_name, field_model in cls.__fields__.items():
54
- pyarrow_type = BaseModelRow.pydantic_type_to_pyarrow(
55
- field_model.outer_type_
56
- )
69
+ for field_name, field_model in cls.model_fields.items():
70
+ pyarrow_type = BaseModelRow.pydantic_type_to_pyarrow(field_model.annotation)
57
71
  fields.append(SchemaField(name=field_name, type=str(pyarrow_type)))
58
72
  return fields
59
73
 
@@ -3,7 +3,6 @@ import os
3
3
  import time
4
4
  from collections import defaultdict
5
5
  from dataclasses import dataclass
6
- from datetime import datetime, timezone
7
6
  from typing import Any, Callable, Dict, Iterable, List, Set
8
7
 
9
8
  from opensearchpy import OpenSearch
@@ -53,6 +52,12 @@ class LineageFeaturesSourceConfig(ConfigModel):
53
52
  retry_delay_seconds: int = 5
54
53
  retry_backoff_multiplier: float = 2.0
55
54
 
55
+ # Cleanup old features when they have not been updated for this many days
56
+ # This is required because we only emit this feature for cases where we find a lineage
57
+ # in the graph index
58
+ cleanup_batch_size: int = 100
59
+ cleanup_old_features_days: int = 2
60
+
56
61
  @validator("max_retries")
57
62
  def validate_max_retries(cls, v: int) -> int:
58
63
  if v < 1:
@@ -79,6 +84,12 @@ class LineageExtractGraphSourceReport(SourceReport, IngestionStageReport):
79
84
  downstream_count: int = 0
80
85
  edges_scanned: int = 0
81
86
  skipped_materialized_urns_count: int = 0
87
+ zero_upstream_count: int = 0
88
+ zero_downstream_count: int = 0
89
+ has_asset_level_lineage_count: int = 0
90
+ zero_asset_level_lineage_count: int = 0
91
+ cleanup_old_features_time: int = 0
92
+ cleanup_old_features_count: int = 0
82
93
 
83
94
 
84
95
  @platform_name(id="datahub", platform_name="DataHub")
@@ -255,7 +266,6 @@ class DataHubLineageFeaturesSource(Source):
255
266
  with self.report.new_stage("Load valid URNs"):
256
267
  self.populate_valid_urns()
257
268
 
258
- timestamp = datetime.now(tz=timezone.utc)
259
269
  server = self._create_opensearch_client_with_retry()
260
270
 
261
271
  query = {
@@ -326,7 +336,58 @@ class DataHubLineageFeaturesSource(Source):
326
336
  self._update_report()
327
337
  self._delete_pit_with_retry(server, pit)
328
338
 
329
- self.report.new_stage("start emission of lineage features")
339
+ with self.report.new_stage("emission of lineage features"):
340
+ yield from self._emit_lineage_features()
341
+
342
+ with self.report.new_stage("cleanup old lineage features"):
343
+ yield from self._cleanup_old_features()
344
+
345
+ def _cleanup_old_features(self) -> Iterable[MetadataWorkUnit]:
346
+ """
347
+ This is required because we only emit this feature for cases where we find a lineage
348
+ in the graph index
349
+ """
350
+ cutoff_time = int(
351
+ (time.time() - (self.config.cleanup_old_features_days * 24 * 60 * 60))
352
+ * 1000
353
+ )
354
+ self.report.cleanup_old_features_time = cutoff_time
355
+
356
+ for urn in self.ctx.require_graph("Cleanup old features").get_urns_by_filter(
357
+ extraFilters=[
358
+ {
359
+ "field": "hasAssetLevelLineageFeature",
360
+ "negated": False,
361
+ "condition": "EQUAL",
362
+ "values": ["true"],
363
+ },
364
+ {
365
+ "field": "lineageFeaturesComputedAt",
366
+ "negated": False,
367
+ "condition": "LESS_THAN",
368
+ "values": [str(cutoff_time)],
369
+ },
370
+ ],
371
+ batch_size=self.config.cleanup_batch_size,
372
+ ):
373
+ # Emit lineage features with zero upstreams and downstreams for cleanup
374
+ wu = MetadataChangeProposalWrapper(
375
+ entityUrn=urn,
376
+ aspect=LineageFeaturesClass(
377
+ upstreamCount=0,
378
+ downstreamCount=0,
379
+ hasAssetLevelLineage=False,
380
+ computedAt=AuditStampClass(
381
+ time=int(time.time() * 1000),
382
+ actor=SYSTEM_ACTOR,
383
+ ),
384
+ ),
385
+ ).as_workunit()
386
+ self.report.cleanup_old_features_count += 1
387
+ self.report.report_workunit(wu)
388
+ yield wu
389
+
390
+ def _emit_lineage_features(self) -> Iterable[MetadataWorkUnit]:
330
391
  # In Python 3.9, can be replaced by `self.self.upstream_counts.keys() | self.downstream_counts.keys()`
331
392
  for urn in set(self.upstream_counts.keys()).union(
332
393
  self.downstream_counts.keys()
@@ -337,21 +398,31 @@ class DataHubLineageFeaturesSource(Source):
337
398
  logger.debug(
338
399
  f"{urn}: {self.upstream_counts[urn]}, {self.downstream_counts[urn]}"
339
400
  )
401
+ if self.upstream_counts[urn] == 0:
402
+ self.report.zero_upstream_count += 1
403
+ if self.downstream_counts[urn] == 0:
404
+ self.report.zero_downstream_count += 1
405
+ has_asset_level_lineage = (
406
+ self.upstream_counts[urn] > 0 or self.downstream_counts[urn] > 0
407
+ )
408
+ if has_asset_level_lineage:
409
+ self.report.has_asset_level_lineage_count += 1
410
+ else:
411
+ self.report.zero_asset_level_lineage_count += 1
340
412
  wu = MetadataChangeProposalWrapper(
341
413
  entityUrn=urn,
342
414
  aspect=LineageFeaturesClass(
343
415
  upstreamCount=self.upstream_counts[urn],
344
416
  downstreamCount=self.downstream_counts[urn],
417
+ hasAssetLevelLineage=has_asset_level_lineage,
345
418
  computedAt=AuditStampClass(
346
- time=int(timestamp.timestamp() * 1000),
419
+ time=int(time.time() * 1000),
347
420
  actor=SYSTEM_ACTOR,
348
421
  ),
349
422
  ),
350
423
  ).as_workunit()
351
424
  self.report.report_workunit(wu)
352
425
  yield wu
353
- # So previous stage's calculations are done
354
- self.report.new_stage("end emission of lineage features")
355
426
 
356
427
  def get_report(self) -> SourceReport:
357
428
  return self.report
@@ -2391,6 +2391,62 @@ class ActionRequestUrn(_SpecificUrn):
2391
2391
  def id(self) -> str:
2392
2392
  return self._entity_ids[0]
2393
2393
 
2394
+ if TYPE_CHECKING:
2395
+ from datahub.metadata.schema_classes import DataHubFileKeyClass
2396
+
2397
+ class DataHubFileUrn(_SpecificUrn):
2398
+ ENTITY_TYPE: ClassVar[Literal["dataHubFile"]] = "dataHubFile"
2399
+ _URN_PARTS: ClassVar[int] = 1
2400
+
2401
+ def __init__(self, id: Union["DataHubFileUrn", str], *, _allow_coercion: bool = True) -> None:
2402
+ if _allow_coercion:
2403
+ # Field coercion logic (if any is required).
2404
+ if isinstance(id, str):
2405
+ if id.startswith('urn:li:'):
2406
+ try:
2407
+ id = DataHubFileUrn.from_string(id)
2408
+ except InvalidUrnError:
2409
+ raise InvalidUrnError(f'Expecting a DataHubFileUrn but got {id}')
2410
+ else:
2411
+ id = UrnEncoder.encode_string(id)
2412
+
2413
+ # Validation logic.
2414
+ if not id:
2415
+ raise InvalidUrnError("DataHubFileUrn id cannot be empty")
2416
+ if isinstance(id, DataHubFileUrn):
2417
+ id = id.id
2418
+ elif isinstance(id, Urn):
2419
+ raise InvalidUrnError(f'Expecting a DataHubFileUrn but got {id}')
2420
+ if UrnEncoder.contains_reserved_char(id):
2421
+ raise InvalidUrnError(f'DataHubFileUrn id contains reserved characters')
2422
+
2423
+ super().__init__(self.ENTITY_TYPE, [id])
2424
+
2425
+ @classmethod
2426
+ def _parse_ids(cls, entity_ids: List[str]) -> "DataHubFileUrn":
2427
+ if len(entity_ids) != cls._URN_PARTS:
2428
+ raise InvalidUrnError(f"DataHubFileUrn should have {cls._URN_PARTS} parts, got {len(entity_ids)}: {entity_ids}")
2429
+ return cls(id=entity_ids[0], _allow_coercion=False)
2430
+
2431
+ @classmethod
2432
+ def underlying_key_aspect_type(cls) -> Type["DataHubFileKeyClass"]:
2433
+ from datahub.metadata.schema_classes import DataHubFileKeyClass
2434
+
2435
+ return DataHubFileKeyClass
2436
+
2437
+ def to_key_aspect(self) -> "DataHubFileKeyClass":
2438
+ from datahub.metadata.schema_classes import DataHubFileKeyClass
2439
+
2440
+ return DataHubFileKeyClass(id=self.id)
2441
+
2442
+ @classmethod
2443
+ def from_key_aspect(cls, key_aspect: "DataHubFileKeyClass") -> "DataHubFileUrn":
2444
+ return cls(id=key_aspect.id)
2445
+
2446
+ @property
2447
+ def id(self) -> str:
2448
+ return self._entity_ids[0]
2449
+
2394
2450
  if TYPE_CHECKING:
2395
2451
  from datahub.metadata.schema_classes import DataProcessInstanceKeyClass
2396
2452
 
@@ -3537,6 +3593,10 @@ class DataJobUrn(_SpecificUrn):
3537
3593
  def get_data_flow_urn(self) -> "DataFlowUrn":
3538
3594
  return DataFlowUrn.from_string(self.flow)
3539
3595
 
3596
+ @property
3597
+ def orchestrator(self) -> str:
3598
+ return self.get_data_flow_urn().orchestrator
3599
+
3540
3600
  @deprecated(reason="Use .job_id instead")
3541
3601
  def get_job_id(self) -> str:
3542
3602
  return self.job_id
@@ -7,6 +7,7 @@
7
7
  # pylint: skip-file
8
8
  # fmt: off
9
9
  # isort: skip_file
10
+ from .....schema_classes import ExecutionRequestArtifactsLocationClass
10
11
  from .....schema_classes import ExecutionRequestInputClass
11
12
  from .....schema_classes import ExecutionRequestResultClass
12
13
  from .....schema_classes import ExecutionRequestSignalClass
@@ -14,6 +15,7 @@ from .....schema_classes import ExecutionRequestSourceClass
14
15
  from .....schema_classes import StructuredExecutionReportClass
15
16
 
16
17
 
18
+ ExecutionRequestArtifactsLocation = ExecutionRequestArtifactsLocationClass
17
19
  ExecutionRequestInput = ExecutionRequestInputClass
18
20
  ExecutionRequestResult = ExecutionRequestResultClass
19
21
  ExecutionRequestSignal = ExecutionRequestSignalClass
@@ -0,0 +1,19 @@
1
+ # mypy: ignore-errors
2
+ # flake8: noqa
3
+
4
+ # This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py
5
+ # Do not modify manually!
6
+
7
+ # pylint: skip-file
8
+ # fmt: off
9
+ # isort: skip_file
10
+ from .....schema_classes import BucketStorageLocationClass
11
+ from .....schema_classes import DataHubFileInfoClass
12
+ from .....schema_classes import FileUploadScenarioClass
13
+
14
+
15
+ BucketStorageLocation = BucketStorageLocationClass
16
+ DataHubFileInfo = DataHubFileInfoClass
17
+ FileUploadScenario = FileUploadScenarioClass
18
+
19
+ # fmt: on
@@ -22,6 +22,7 @@ from ......schema_classes import DataFlowKeyClass
22
22
  from ......schema_classes import DataHubAccessTokenKeyClass
23
23
  from ......schema_classes import DataHubActionKeyClass
24
24
  from ......schema_classes import DataHubConnectionKeyClass
25
+ from ......schema_classes import DataHubFileKeyClass
25
26
  from ......schema_classes import DataHubIngestionSourceKeyClass
26
27
  from ......schema_classes import DataHubMetricCubeKeyClass
27
28
  from ......schema_classes import DataHubOpenAPISchemaKeyClass
@@ -92,6 +93,7 @@ DataFlowKey = DataFlowKeyClass
92
93
  DataHubAccessTokenKey = DataHubAccessTokenKeyClass
93
94
  DataHubActionKey = DataHubActionKeyClass
94
95
  DataHubConnectionKey = DataHubConnectionKeyClass
96
+ DataHubFileKey = DataHubFileKeyClass
95
97
  DataHubIngestionSourceKey = DataHubIngestionSourceKeyClass
96
98
  DataHubMetricCubeKey = DataHubMetricCubeKeyClass
97
99
  DataHubOpenAPISchemaKey = DataHubOpenAPISchemaKeyClass
@@ -8,11 +8,13 @@
8
8
  # fmt: off
9
9
  # isort: skip_file
10
10
  from .....schema_classes import ActorsClass
11
+ from .....schema_classes import RoleGroupClass
11
12
  from .....schema_classes import RolePropertiesClass
12
13
  from .....schema_classes import RoleUserClass
13
14
 
14
15
 
15
16
  Actors = ActorsClass
17
+ RoleGroup = RoleGroupClass
16
18
  RoleProperties = RolePropertiesClass
17
19
  RoleUser = RoleUserClass
18
20
 
@@ -23,6 +23,8 @@ from ......schema_classes import GlobalSettingsInfoClass
23
23
  from ......schema_classes import GlobalViewsSettingsClass
24
24
  from ......schema_classes import GlobalVisualSettingsClass
25
25
  from ......schema_classes import HelpLinkClass
26
+ from ......schema_classes import OAuthProviderClass
27
+ from ......schema_classes import OAuthSettingsClass
26
28
  from ......schema_classes import OidcSettingsClass
27
29
  from ......schema_classes import SlackIntegrationSettingsClass
28
30
  from ......schema_classes import SsoSettingsClass
@@ -47,6 +49,8 @@ GlobalSettingsInfo = GlobalSettingsInfoClass
47
49
  GlobalViewsSettings = GlobalViewsSettingsClass
48
50
  GlobalVisualSettings = GlobalVisualSettingsClass
49
51
  HelpLink = HelpLinkClass
52
+ OAuthProvider = OAuthProviderClass
53
+ OAuthSettings = OAuthSettingsClass
50
54
  OidcSettings = OidcSettingsClass
51
55
  SlackIntegrationSettings = SlackIntegrationSettingsClass
52
56
  SsoSettings = SsoSettingsClass