acryl-datahub-cloud 0.3.12.4rc3__py3-none-any.whl → 0.3.13rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

Files changed (35) hide show
  1. acryl_datahub_cloud/_codegen_config.json +1 -1
  2. acryl_datahub_cloud/lineage_features/source.py +8 -2
  3. acryl_datahub_cloud/metadata/_urns/urn_defs.py +112 -0
  4. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  5. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +4 -0
  6. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/module/__init__.py +27 -0
  7. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +4 -0
  8. acryl_datahub_cloud/metadata/com/linkedin/pegasus2avro/template/__init__.py +25 -0
  9. acryl_datahub_cloud/metadata/schema.avsc +443 -0
  10. acryl_datahub_cloud/metadata/schema_classes.py +682 -1
  11. acryl_datahub_cloud/metadata/schemas/CorpUserSettings.avsc +41 -0
  12. acryl_datahub_cloud/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  13. acryl_datahub_cloud/metadata/schemas/DataHubPageModuleProperties.avsc +200 -0
  14. acryl_datahub_cloud/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  15. acryl_datahub_cloud/metadata/schemas/DataHubPageTemplateProperties.avsc +175 -0
  16. acryl_datahub_cloud/metadata/schemas/DataJobInputOutput.avsc +8 -0
  17. acryl_datahub_cloud/metadata/schemas/GlobalSettingsInfo.avsc +62 -0
  18. acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +9 -0
  19. acryl_datahub_cloud/metadata/schemas/UpstreamLineage.avsc +9 -0
  20. acryl_datahub_cloud/sdk/assertion/__init__.py +49 -0
  21. acryl_datahub_cloud/sdk/assertion/assertion_base.py +65 -806
  22. acryl_datahub_cloud/sdk/assertion/freshness_assertion.py +201 -0
  23. acryl_datahub_cloud/sdk/assertion/smart_freshness_assertion.py +165 -0
  24. acryl_datahub_cloud/sdk/assertion/smart_volume_assertion.py +162 -0
  25. acryl_datahub_cloud/sdk/assertion/sql_assertion.py +256 -0
  26. acryl_datahub_cloud/sdk/assertion/volume_assertion.py +156 -0
  27. acryl_datahub_cloud/sdk/assertion_input/assertion_input.py +0 -344
  28. acryl_datahub_cloud/sdk/assertion_input/smart_freshness_assertion_input.py +220 -0
  29. acryl_datahub_cloud/sdk/assertion_input/smart_volume_assertion_input.py +191 -0
  30. acryl_datahub_cloud/sdk/assertions_client.py +6 -2
  31. {acryl_datahub_cloud-0.3.12.4rc3.dist-info → acryl_datahub_cloud-0.3.13rc1.dist-info}/METADATA +50 -48
  32. {acryl_datahub_cloud-0.3.12.4rc3.dist-info → acryl_datahub_cloud-0.3.13rc1.dist-info}/RECORD +35 -22
  33. {acryl_datahub_cloud-0.3.12.4rc3.dist-info → acryl_datahub_cloud-0.3.13rc1.dist-info}/WHEEL +0 -0
  34. {acryl_datahub_cloud-0.3.12.4rc3.dist-info → acryl_datahub_cloud-0.3.13rc1.dist-info}/entry_points.txt +0 -0
  35. {acryl_datahub_cloud-0.3.12.4rc3.dist-info → acryl_datahub_cloud-0.3.13rc1.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "acryl-datahub-cloud",
3
- "version": "0.3.12.4rc3",
3
+ "version": "0.3.13rc1",
4
4
  "install_requires": [
5
5
  "avro-gen3==0.7.16",
6
6
  "acryl-datahub"
@@ -45,6 +45,7 @@ SYSTEM_ACTOR = "urn:li:corpuser:__datahub_system"
45
45
 
46
46
  class LineageFeaturesSourceConfig(ConfigModel):
47
47
  enabled: bool = True
48
+ materialize_entities: bool = False
48
49
  search_index: ElasticSearchClientConfig = ElasticSearchClientConfig()
49
50
  query_timeout: int = 30
50
51
  extract_batch_size: int = 3000
@@ -77,6 +78,7 @@ class LineageExtractGraphSourceReport(SourceReport, IngestionStageReport):
77
78
  upstream_count: int = 0
78
79
  downstream_count: int = 0
79
80
  edges_scanned: int = 0
81
+ skipped_materialized_urns_count: int = 0
80
82
 
81
83
 
82
84
  @platform_name(id="datahub", platform_name="DataHub")
@@ -321,15 +323,17 @@ class DataHubLineageFeaturesSource(Source):
321
323
  f"Failed to cleanup PIT after error: {cleanup_error}"
322
324
  )
323
325
  raise
324
- # So previous stage's calculations are done
325
- self.report.new_stage("Extract lineage features End")
326
326
  self._update_report()
327
327
  self._delete_pit_with_retry(server, pit)
328
328
 
329
+ self.report.new_stage("start emission of lineage features")
329
330
  # In Python 3.9, can be replaced by `self.self.upstream_counts.keys() | self.downstream_counts.keys()`
330
331
  for urn in set(self.upstream_counts.keys()).union(
331
332
  self.downstream_counts.keys()
332
333
  ):
334
+ if (not self.config.materialize_entities) and urn not in self.valid_urns:
335
+ self.report.skipped_materialized_urns_count += 1
336
+ continue
333
337
  logger.debug(
334
338
  f"{urn}: {self.upstream_counts[urn]}, {self.downstream_counts[urn]}"
335
339
  )
@@ -346,6 +350,8 @@ class DataHubLineageFeaturesSource(Source):
346
350
  ).as_workunit()
347
351
  self.report.report_workunit(wu)
348
352
  yield wu
353
+ # So previous stage's calculations are done
354
+ self.report.new_stage("end emission of lineage features")
349
355
 
350
356
  def get_report(self) -> SourceReport:
351
357
  return self.report
@@ -1882,6 +1882,62 @@ class ChartUrn(_SpecificUrn):
1882
1882
  def chart_id(self) -> str:
1883
1883
  return self._entity_ids[1]
1884
1884
 
1885
+ if TYPE_CHECKING:
1886
+ from datahub.metadata.schema_classes import DataHubPageTemplateKeyClass
1887
+
1888
+ class DataHubPageTemplateUrn(_SpecificUrn):
1889
+ ENTITY_TYPE: ClassVar[Literal["dataHubPageTemplate"]] = "dataHubPageTemplate"
1890
+ _URN_PARTS: ClassVar[int] = 1
1891
+
1892
+ def __init__(self, id: Union["DataHubPageTemplateUrn", str], *, _allow_coercion: bool = True) -> None:
1893
+ if _allow_coercion:
1894
+ # Field coercion logic (if any is required).
1895
+ if isinstance(id, str):
1896
+ if id.startswith('urn:li:'):
1897
+ try:
1898
+ id = DataHubPageTemplateUrn.from_string(id)
1899
+ except InvalidUrnError:
1900
+ raise InvalidUrnError(f'Expecting a DataHubPageTemplateUrn but got {id}')
1901
+ else:
1902
+ id = UrnEncoder.encode_string(id)
1903
+
1904
+ # Validation logic.
1905
+ if not id:
1906
+ raise InvalidUrnError("DataHubPageTemplateUrn id cannot be empty")
1907
+ if isinstance(id, DataHubPageTemplateUrn):
1908
+ id = id.id
1909
+ elif isinstance(id, Urn):
1910
+ raise InvalidUrnError(f'Expecting a DataHubPageTemplateUrn but got {id}')
1911
+ if UrnEncoder.contains_reserved_char(id):
1912
+ raise InvalidUrnError(f'DataHubPageTemplateUrn id contains reserved characters')
1913
+
1914
+ super().__init__(self.ENTITY_TYPE, [id])
1915
+
1916
+ @classmethod
1917
+ def _parse_ids(cls, entity_ids: List[str]) -> "DataHubPageTemplateUrn":
1918
+ if len(entity_ids) != cls._URN_PARTS:
1919
+ raise InvalidUrnError(f"DataHubPageTemplateUrn should have {cls._URN_PARTS} parts, got {len(entity_ids)}: {entity_ids}")
1920
+ return cls(id=entity_ids[0], _allow_coercion=False)
1921
+
1922
+ @classmethod
1923
+ def underlying_key_aspect_type(cls) -> Type["DataHubPageTemplateKeyClass"]:
1924
+ from datahub.metadata.schema_classes import DataHubPageTemplateKeyClass
1925
+
1926
+ return DataHubPageTemplateKeyClass
1927
+
1928
+ def to_key_aspect(self) -> "DataHubPageTemplateKeyClass":
1929
+ from datahub.metadata.schema_classes import DataHubPageTemplateKeyClass
1930
+
1931
+ return DataHubPageTemplateKeyClass(id=self.id)
1932
+
1933
+ @classmethod
1934
+ def from_key_aspect(cls, key_aspect: "DataHubPageTemplateKeyClass") -> "DataHubPageTemplateUrn":
1935
+ return cls(id=key_aspect.id)
1936
+
1937
+ @property
1938
+ def id(self) -> str:
1939
+ return self._entity_ids[0]
1940
+
1885
1941
  if TYPE_CHECKING:
1886
1942
  from datahub.metadata.schema_classes import OwnershipTypeKeyClass
1887
1943
 
@@ -3830,6 +3886,62 @@ class ContainerUrn(_SpecificUrn):
3830
3886
  def guid(self) -> str:
3831
3887
  return self._entity_ids[0]
3832
3888
 
3889
+ if TYPE_CHECKING:
3890
+ from datahub.metadata.schema_classes import DataHubPageModuleKeyClass
3891
+
3892
+ class DataHubPageModuleUrn(_SpecificUrn):
3893
+ ENTITY_TYPE: ClassVar[Literal["dataHubPageModule"]] = "dataHubPageModule"
3894
+ _URN_PARTS: ClassVar[int] = 1
3895
+
3896
+ def __init__(self, id: Union["DataHubPageModuleUrn", str], *, _allow_coercion: bool = True) -> None:
3897
+ if _allow_coercion:
3898
+ # Field coercion logic (if any is required).
3899
+ if isinstance(id, str):
3900
+ if id.startswith('urn:li:'):
3901
+ try:
3902
+ id = DataHubPageModuleUrn.from_string(id)
3903
+ except InvalidUrnError:
3904
+ raise InvalidUrnError(f'Expecting a DataHubPageModuleUrn but got {id}')
3905
+ else:
3906
+ id = UrnEncoder.encode_string(id)
3907
+
3908
+ # Validation logic.
3909
+ if not id:
3910
+ raise InvalidUrnError("DataHubPageModuleUrn id cannot be empty")
3911
+ if isinstance(id, DataHubPageModuleUrn):
3912
+ id = id.id
3913
+ elif isinstance(id, Urn):
3914
+ raise InvalidUrnError(f'Expecting a DataHubPageModuleUrn but got {id}')
3915
+ if UrnEncoder.contains_reserved_char(id):
3916
+ raise InvalidUrnError(f'DataHubPageModuleUrn id contains reserved characters')
3917
+
3918
+ super().__init__(self.ENTITY_TYPE, [id])
3919
+
3920
+ @classmethod
3921
+ def _parse_ids(cls, entity_ids: List[str]) -> "DataHubPageModuleUrn":
3922
+ if len(entity_ids) != cls._URN_PARTS:
3923
+ raise InvalidUrnError(f"DataHubPageModuleUrn should have {cls._URN_PARTS} parts, got {len(entity_ids)}: {entity_ids}")
3924
+ return cls(id=entity_ids[0], _allow_coercion=False)
3925
+
3926
+ @classmethod
3927
+ def underlying_key_aspect_type(cls) -> Type["DataHubPageModuleKeyClass"]:
3928
+ from datahub.metadata.schema_classes import DataHubPageModuleKeyClass
3929
+
3930
+ return DataHubPageModuleKeyClass
3931
+
3932
+ def to_key_aspect(self) -> "DataHubPageModuleKeyClass":
3933
+ from datahub.metadata.schema_classes import DataHubPageModuleKeyClass
3934
+
3935
+ return DataHubPageModuleKeyClass(id=self.id)
3936
+
3937
+ @classmethod
3938
+ def from_key_aspect(cls, key_aspect: "DataHubPageModuleKeyClass") -> "DataHubPageModuleUrn":
3939
+ return cls(id=key_aspect.id)
3940
+
3941
+ @property
3942
+ def id(self) -> str:
3943
+ return self._entity_ids[0]
3944
+
3833
3945
  if TYPE_CHECKING:
3834
3946
  from datahub.metadata.schema_classes import EntityTypeKeyClass
3835
3947
 
@@ -13,6 +13,7 @@ from .....schema_classes import CorpGroupSettingsClass
13
13
  from .....schema_classes import CorpUserAppearanceSettingsClass
14
14
  from .....schema_classes import CorpUserCredentialsClass
15
15
  from .....schema_classes import CorpUserEditableInfoClass
16
+ from .....schema_classes import CorpUserHomePageSettingsClass
16
17
  from .....schema_classes import CorpUserInfoClass
17
18
  from .....schema_classes import CorpUserSettingsClass
18
19
  from .....schema_classes import CorpUserStatusClass
@@ -29,6 +30,7 @@ CorpGroupSettings = CorpGroupSettingsClass
29
30
  CorpUserAppearanceSettings = CorpUserAppearanceSettingsClass
30
31
  CorpUserCredentials = CorpUserCredentialsClass
31
32
  CorpUserEditableInfo = CorpUserEditableInfoClass
33
+ CorpUserHomePageSettings = CorpUserHomePageSettingsClass
32
34
  CorpUserInfo = CorpUserInfoClass
33
35
  CorpUserSettings = CorpUserSettingsClass
34
36
  CorpUserStatus = CorpUserStatusClass
@@ -24,6 +24,8 @@ from ......schema_classes import DataHubConnectionKeyClass
24
24
  from ......schema_classes import DataHubIngestionSourceKeyClass
25
25
  from ......schema_classes import DataHubMetricCubeKeyClass
26
26
  from ......schema_classes import DataHubOpenAPISchemaKeyClass
27
+ from ......schema_classes import DataHubPageModuleKeyClass
28
+ from ......schema_classes import DataHubPageTemplateKeyClass
27
29
  from ......schema_classes import DataHubPersonaKeyClass
28
30
  from ......schema_classes import DataHubPolicyKeyClass
29
31
  from ......schema_classes import DataHubRetentionKeyClass
@@ -91,6 +93,8 @@ DataHubConnectionKey = DataHubConnectionKeyClass
91
93
  DataHubIngestionSourceKey = DataHubIngestionSourceKeyClass
92
94
  DataHubMetricCubeKey = DataHubMetricCubeKeyClass
93
95
  DataHubOpenAPISchemaKey = DataHubOpenAPISchemaKeyClass
96
+ DataHubPageModuleKey = DataHubPageModuleKeyClass
97
+ DataHubPageTemplateKey = DataHubPageTemplateKeyClass
94
98
  DataHubPersonaKey = DataHubPersonaKeyClass
95
99
  DataHubPolicyKey = DataHubPolicyKeyClass
96
100
  DataHubRetentionKey = DataHubRetentionKeyClass
@@ -0,0 +1,27 @@
1
+ # mypy: ignore-errors
2
+ # flake8: noqa
3
+
4
+ # This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py
5
+ # Do not modify manually!
6
+
7
+ # pylint: skip-file
8
+ # fmt: off
9
+ # isort: skip_file
10
+ from .....schema_classes import DataHubPageModuleParamsClass
11
+ from .....schema_classes import DataHubPageModulePropertiesClass
12
+ from .....schema_classes import DataHubPageModuleTypeClass
13
+ from .....schema_classes import DataHubPageModuleVisibilityClass
14
+ from .....schema_classes import LinkModuleParamsClass
15
+ from .....schema_classes import PageModuleScopeClass
16
+ from .....schema_classes import RichTextModuleParamsClass
17
+
18
+
19
+ DataHubPageModuleParams = DataHubPageModuleParamsClass
20
+ DataHubPageModuleProperties = DataHubPageModulePropertiesClass
21
+ DataHubPageModuleType = DataHubPageModuleTypeClass
22
+ DataHubPageModuleVisibility = DataHubPageModuleVisibilityClass
23
+ LinkModuleParams = LinkModuleParamsClass
24
+ PageModuleScope = PageModuleScopeClass
25
+ RichTextModuleParams = RichTextModuleParamsClass
26
+
27
+ # fmt: on
@@ -7,9 +7,11 @@
7
7
  # pylint: skip-file
8
8
  # fmt: off
9
9
  # isort: skip_file
10
+ from ......schema_classes import ApplicationsSettingsClass
10
11
  from ......schema_classes import DocPropagationFeatureSettingsClass
11
12
  from ......schema_classes import DocumentationAiSettingsClass
12
13
  from ......schema_classes import EmailIntegrationSettingsClass
14
+ from ......schema_classes import GlobalHomePageSettingsClass
13
15
  from ......schema_classes import GlobalIncidentsSettingsClass
14
16
  from ......schema_classes import GlobalIntegrationSettingsClass
15
17
  from ......schema_classes import GlobalNotificationSettingsClass
@@ -22,9 +24,11 @@ from ......schema_classes import SlackIntegrationSettingsClass
22
24
  from ......schema_classes import SsoSettingsClass
23
25
 
24
26
 
27
+ ApplicationsSettings = ApplicationsSettingsClass
25
28
  DocPropagationFeatureSettings = DocPropagationFeatureSettingsClass
26
29
  DocumentationAiSettings = DocumentationAiSettingsClass
27
30
  EmailIntegrationSettings = EmailIntegrationSettingsClass
31
+ GlobalHomePageSettings = GlobalHomePageSettingsClass
28
32
  GlobalIncidentsSettings = GlobalIncidentsSettingsClass
29
33
  GlobalIntegrationSettings = GlobalIntegrationSettingsClass
30
34
  GlobalNotificationSettings = GlobalNotificationSettingsClass
@@ -0,0 +1,25 @@
1
+ # mypy: ignore-errors
2
+ # flake8: noqa
3
+
4
+ # This file is autogenerated by /metadata-ingestion/scripts/avro_codegen.py
5
+ # Do not modify manually!
6
+
7
+ # pylint: skip-file
8
+ # fmt: off
9
+ # isort: skip_file
10
+ from .....schema_classes import DataHubPageTemplatePropertiesClass
11
+ from .....schema_classes import DataHubPageTemplateRowClass
12
+ from .....schema_classes import DataHubPageTemplateSurfaceClass
13
+ from .....schema_classes import DataHubPageTemplateVisibilityClass
14
+ from .....schema_classes import PageTemplateScopeClass
15
+ from .....schema_classes import PageTemplateSurfaceTypeClass
16
+
17
+
18
+ DataHubPageTemplateProperties = DataHubPageTemplatePropertiesClass
19
+ DataHubPageTemplateRow = DataHubPageTemplateRowClass
20
+ DataHubPageTemplateSurface = DataHubPageTemplateSurfaceClass
21
+ DataHubPageTemplateVisibility = DataHubPageTemplateVisibilityClass
22
+ PageTemplateScope = PageTemplateScopeClass
23
+ PageTemplateSurfaceType = PageTemplateSurfaceTypeClass
24
+
25
+ # fmt: on