acryl-datahub 1.1.0rc3__py3-none-any.whl → 1.1.0.1rc6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (87) hide show
  1. {acryl_datahub-1.1.0rc3.dist-info → acryl_datahub-1.1.0.1rc6.dist-info}/METADATA +2515 -2513
  2. {acryl_datahub-1.1.0rc3.dist-info → acryl_datahub-1.1.0.1rc6.dist-info}/RECORD +87 -70
  3. {acryl_datahub-1.1.0rc3.dist-info → acryl_datahub-1.1.0.1rc6.dist-info}/WHEEL +1 -1
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/dataset/dataset.py +9 -8
  6. datahub/api/entities/external/__init__.py +0 -0
  7. datahub/api/entities/external/external_entities.py +239 -0
  8. datahub/api/entities/external/external_tag.py +145 -0
  9. datahub/api/entities/external/restricted_text.py +247 -0
  10. datahub/api/entities/external/unity_catalog_external_entites.py +170 -0
  11. datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
  12. datahub/cli/delete_cli.py +4 -4
  13. datahub/cli/ingest_cli.py +9 -1
  14. datahub/emitter/mce_builder.py +3 -1
  15. datahub/emitter/response_helper.py +86 -1
  16. datahub/emitter/rest_emitter.py +1 -1
  17. datahub/ingestion/graph/client.py +3 -3
  18. datahub/ingestion/source/apply/datahub_apply.py +4 -4
  19. datahub/ingestion/source/data_lake_common/data_lake_utils.py +22 -10
  20. datahub/ingestion/source/data_lake_common/object_store.py +644 -0
  21. datahub/ingestion/source/datahub/config.py +11 -0
  22. datahub/ingestion/source/datahub/datahub_database_reader.py +186 -33
  23. datahub/ingestion/source/datahub/datahub_source.py +1 -1
  24. datahub/ingestion/source/dbt/dbt_common.py +30 -11
  25. datahub/ingestion/source/gcs/gcs_source.py +22 -7
  26. datahub/ingestion/source/gcs/gcs_utils.py +36 -9
  27. datahub/ingestion/source/hex/query_fetcher.py +9 -3
  28. datahub/ingestion/source/openapi.py +12 -0
  29. datahub/ingestion/source/openapi_parser.py +56 -37
  30. datahub/ingestion/source/s3/source.py +65 -6
  31. datahub/ingestion/source/snowflake/snowflake_config.py +13 -0
  32. datahub/ingestion/source/snowflake/snowflake_queries.py +44 -21
  33. datahub/ingestion/source/snowflake/snowflake_query.py +0 -7
  34. datahub/ingestion/source/snowflake/snowflake_v2.py +17 -6
  35. datahub/ingestion/source/sql/athena.py +1 -0
  36. datahub/ingestion/source/sql/hive.py +2 -3
  37. datahub/ingestion/source/sql/sql_common.py +98 -34
  38. datahub/ingestion/source/sql/sql_types.py +5 -2
  39. datahub/ingestion/source/unity/config.py +5 -0
  40. datahub/ingestion/source/unity/proxy.py +117 -0
  41. datahub/ingestion/source/unity/source.py +167 -15
  42. datahub/ingestion/source/unity/tag_entities.py +295 -0
  43. datahub/metadata/_internal_schema_classes.py +667 -522
  44. datahub/metadata/_urns/urn_defs.py +1804 -1748
  45. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  46. datahub/metadata/schema.avsc +17358 -17584
  47. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  48. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  49. datahub/metadata/schemas/Applications.avsc +38 -0
  50. datahub/metadata/schemas/ChartKey.avsc +1 -0
  51. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  52. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  53. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  54. datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  55. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  56. datahub/metadata/schemas/DataProductKey.avsc +1 -0
  57. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  58. datahub/metadata/schemas/DatasetKey.avsc +1 -0
  59. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  60. datahub/metadata/schemas/GlossaryTermKey.avsc +1 -0
  61. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  62. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  63. datahub/metadata/schemas/MLModelGroupKey.avsc +1 -0
  64. datahub/metadata/schemas/MLModelKey.avsc +1 -0
  65. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  66. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  67. datahub/metadata/schemas/__init__.py +3 -3
  68. datahub/sdk/__init__.py +6 -0
  69. datahub/sdk/_all_entities.py +11 -0
  70. datahub/sdk/_shared.py +118 -1
  71. datahub/sdk/chart.py +315 -0
  72. datahub/sdk/container.py +7 -0
  73. datahub/sdk/dashboard.py +432 -0
  74. datahub/sdk/dataflow.py +309 -0
  75. datahub/sdk/datajob.py +342 -0
  76. datahub/sdk/dataset.py +8 -2
  77. datahub/sdk/entity_client.py +90 -2
  78. datahub/sdk/lineage_client.py +681 -82
  79. datahub/sdk/main_client.py +27 -8
  80. datahub/sdk/mlmodel.py +101 -38
  81. datahub/sdk/mlmodelgroup.py +7 -0
  82. datahub/sql_parsing/sql_parsing_aggregator.py +1 -1
  83. datahub/testing/mce_helpers.py +421 -0
  84. datahub/testing/sdk_v2_helpers.py +18 -0
  85. {acryl_datahub-1.1.0rc3.dist-info → acryl_datahub-1.1.0.1rc6.dist-info}/entry_points.txt +0 -0
  86. {acryl_datahub-1.1.0rc3.dist-info → acryl_datahub-1.1.0.1rc6.dist-info}/licenses/LICENSE +0 -0
  87. {acryl_datahub-1.1.0rc3.dist-info → acryl_datahub-1.1.0.1rc6.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,31 @@
1
+ {
2
+ "type": "record",
3
+ "Aspect": {
4
+ "name": "applicationKey",
5
+ "keyForEntity": "application",
6
+ "entityCategory": "core",
7
+ "entityAspects": [
8
+ "applicationProperties",
9
+ "ownership",
10
+ "glossaryTerms",
11
+ "globalTags",
12
+ "domains",
13
+ "institutionalMemory",
14
+ "status",
15
+ "structuredProperties",
16
+ "forms",
17
+ "testResults",
18
+ "subTypes"
19
+ ]
20
+ },
21
+ "name": "ApplicationKey",
22
+ "namespace": "com.linkedin.pegasus2avro.application",
23
+ "fields": [
24
+ {
25
+ "type": "string",
26
+ "name": "id",
27
+ "doc": "A unique id for the Application."
28
+ }
29
+ ],
30
+ "doc": "Key for a Query"
31
+ }
@@ -0,0 +1,72 @@
1
+ {
2
+ "type": "record",
3
+ "Aspect": {
4
+ "name": "applicationProperties"
5
+ },
6
+ "name": "ApplicationProperties",
7
+ "namespace": "com.linkedin.pegasus2avro.application",
8
+ "fields": [
9
+ {
10
+ "Searchable": {
11
+ "/*": {
12
+ "fieldType": "TEXT",
13
+ "queryByDefault": true
14
+ }
15
+ },
16
+ "type": {
17
+ "type": "map",
18
+ "values": "string"
19
+ },
20
+ "name": "customProperties",
21
+ "default": {},
22
+ "doc": "Custom property bag."
23
+ },
24
+ {
25
+ "Searchable": {
26
+ "fieldType": "KEYWORD"
27
+ },
28
+ "java": {
29
+ "class": "com.linkedin.pegasus2avro.common.url.Url",
30
+ "coercerClass": "com.linkedin.pegasus2avro.common.url.UrlCoercer"
31
+ },
32
+ "type": [
33
+ "null",
34
+ "string"
35
+ ],
36
+ "name": "externalUrl",
37
+ "default": null,
38
+ "doc": "URL where the reference exist"
39
+ },
40
+ {
41
+ "Searchable": {
42
+ "boostScore": 10.0,
43
+ "enableAutocomplete": true,
44
+ "fieldNameAliases": [
45
+ "_entityName"
46
+ ],
47
+ "fieldType": "WORD_GRAM"
48
+ },
49
+ "type": [
50
+ "null",
51
+ "string"
52
+ ],
53
+ "name": "name",
54
+ "default": null,
55
+ "doc": "Display name of the Application"
56
+ },
57
+ {
58
+ "Searchable": {
59
+ "fieldType": "TEXT",
60
+ "hasValuesFieldName": "hasDescription"
61
+ },
62
+ "type": [
63
+ "null",
64
+ "string"
65
+ ],
66
+ "name": "description",
67
+ "default": null,
68
+ "doc": "Documentation of the application"
69
+ }
70
+ ],
71
+ "doc": "The main properties of an Application"
72
+ }
@@ -0,0 +1,38 @@
1
+ {
2
+ "type": "record",
3
+ "Aspect": {
4
+ "name": "applications"
5
+ },
6
+ "name": "Applications",
7
+ "namespace": "com.linkedin.pegasus2avro.application",
8
+ "fields": [
9
+ {
10
+ "Relationship": {
11
+ "/*": {
12
+ "entityTypes": [
13
+ "application"
14
+ ],
15
+ "name": "AssociatedWith"
16
+ }
17
+ },
18
+ "Searchable": {
19
+ "/*": {
20
+ "addToFilters": true,
21
+ "fieldName": "applications",
22
+ "fieldType": "URN",
23
+ "filterNameOverride": "Application",
24
+ "hasValuesFieldName": "hasApplication"
25
+ }
26
+ },
27
+ "type": {
28
+ "type": "array",
29
+ "items": "string"
30
+ },
31
+ "name": "applications",
32
+ "doc": "The Applications attached to an Asset",
33
+ "Urn": "Urn",
34
+ "urn_is_array": true
35
+ }
36
+ ],
37
+ "doc": "Links from an Asset to its Applications"
38
+ }
@@ -13,6 +13,7 @@
13
13
  "embed",
14
14
  "browsePaths",
15
15
  "domains",
16
+ "applications",
16
17
  "container",
17
18
  "deprecation",
18
19
  "ownership",
@@ -18,6 +18,7 @@
18
18
  "browsePaths",
19
19
  "status",
20
20
  "domains",
21
+ "applications",
21
22
  "browsePathsV2",
22
23
  "structuredProperties",
23
24
  "forms",
@@ -6,6 +6,7 @@
6
6
  "entityCategory": "_unset_",
7
7
  "entityAspects": [
8
8
  "domains",
9
+ "applications",
9
10
  "container",
10
11
  "deprecation",
11
12
  "dashboardUsageStatistics",
@@ -6,6 +6,7 @@
6
6
  "entityCategory": "core",
7
7
  "entityAspects": [
8
8
  "domains",
9
+ "applications",
9
10
  "deprecation",
10
11
  "versionInfo",
11
12
  "dataFlowInfo",
@@ -5,7 +5,8 @@
5
5
  "keyForEntity": "dataHubIngestionSource",
6
6
  "entityCategory": "internal",
7
7
  "entityAspects": [
8
- "dataHubIngestionSourceInfo"
8
+ "dataHubIngestionSourceInfo",
9
+ "ownership"
9
10
  ]
10
11
  },
11
12
  "name": "DataHubIngestionSourceKey",
@@ -8,6 +8,7 @@
8
8
  "datahubIngestionRunSummary",
9
9
  "datahubIngestionCheckpoint",
10
10
  "domains",
11
+ "applications",
11
12
  "deprecation",
12
13
  "versionInfo",
13
14
  "dataJobInfo",
@@ -9,6 +9,7 @@
9
9
  "glossaryTerms",
10
10
  "globalTags",
11
11
  "domains",
12
+ "applications",
12
13
  "dataProductProperties",
13
14
  "institutionalMemory",
14
15
  "status",
@@ -65,7 +65,7 @@
65
65
  ],
66
66
  "name": "description",
67
67
  "default": null,
68
- "doc": "Documentation of the dataset"
68
+ "doc": "Documentation of the data product"
69
69
  },
70
70
  {
71
71
  "Relationship": {
@@ -11,6 +11,7 @@
11
11
  "datasetUsageStatistics",
12
12
  "operation",
13
13
  "domains",
14
+ "applications",
14
15
  "schemaMetadata",
15
16
  "status",
16
17
  "container",
@@ -20,6 +20,11 @@
20
20
  "doc": "Arguments provided to the task"
21
21
  },
22
22
  {
23
+ "Searchable": {
24
+ "fieldName": "executorId",
25
+ "fieldType": "KEYWORD",
26
+ "queryByDefault": false
27
+ },
23
28
  "type": "string",
24
29
  "name": "executorId",
25
30
  "doc": "Advanced: specify a specific executor to route the request to. If none is provided, a \"default\" executor is used."
@@ -12,6 +12,7 @@
12
12
  "ownership",
13
13
  "deprecation",
14
14
  "domains",
15
+ "applications",
15
16
  "status",
16
17
  "browsePaths",
17
18
  "structuredProperties",
@@ -8,6 +8,7 @@
8
8
  "glossaryTerms",
9
9
  "editableMlFeatureProperties",
10
10
  "domains",
11
+ "applications",
11
12
  "mlFeatureProperties",
12
13
  "ownership",
13
14
  "institutionalMemory",
@@ -8,6 +8,7 @@
8
8
  "glossaryTerms",
9
9
  "editableMlFeatureTableProperties",
10
10
  "domains",
11
+ "applications",
11
12
  "mlFeatureTableProperties",
12
13
  "ownership",
13
14
  "institutionalMemory",
@@ -8,6 +8,7 @@
8
8
  "glossaryTerms",
9
9
  "editableMlModelGroupProperties",
10
10
  "domains",
11
+ "applications",
11
12
  "mlModelGroupProperties",
12
13
  "ownership",
13
14
  "status",
@@ -8,6 +8,7 @@
8
8
  "glossaryTerms",
9
9
  "editableMlModelProperties",
10
10
  "domains",
11
+ "applications",
11
12
  "ownership",
12
13
  "mlModelProperties",
13
14
  "intendedUse",
@@ -8,6 +8,7 @@
8
8
  "glossaryTerms",
9
9
  "editableMlPrimaryKeyProperties",
10
10
  "domains",
11
+ "applications",
11
12
  "mlPrimaryKeyProperties",
12
13
  "ownership",
13
14
  "institutionalMemory",
@@ -15,6 +15,7 @@
15
15
  "browsePaths",
16
16
  "institutionalMemory",
17
17
  "domains",
18
+ "applications",
18
19
  "subTypes",
19
20
  "dataPlatformInstance",
20
21
  "browsePathsV2",
@@ -15,10 +15,10 @@ import pathlib
15
15
  def _load_schema(schema_name: str) -> str:
16
16
  return (pathlib.Path(__file__).parent / f"{schema_name}.avsc").read_text()
17
17
 
18
- def getMetadataChangeProposalSchema() -> str:
19
- return _load_schema("MetadataChangeProposal")
20
-
21
18
  def getMetadataChangeEventSchema() -> str:
22
19
  return _load_schema("MetadataChangeEvent")
23
20
 
21
+ def getMetadataChangeProposalSchema() -> str:
22
+ return _load_schema("MetadataChangeProposal")
23
+
24
24
  # fmt: on
datahub/sdk/__init__.py CHANGED
@@ -18,9 +18,15 @@ from datahub.metadata.urns import (
18
18
  SchemaFieldUrn,
19
19
  TagUrn,
20
20
  )
21
+ from datahub.sdk.chart import Chart
21
22
  from datahub.sdk.container import Container
23
+ from datahub.sdk.dashboard import Dashboard
24
+ from datahub.sdk.dataflow import DataFlow
25
+ from datahub.sdk.datajob import DataJob
22
26
  from datahub.sdk.dataset import Dataset
23
27
  from datahub.sdk.main_client import DataHubClient
28
+ from datahub.sdk.mlmodel import MLModel
29
+ from datahub.sdk.mlmodelgroup import MLModelGroup
24
30
  from datahub.sdk.search_filters import Filter, FilterDsl
25
31
 
26
32
  # We want to print out the warning if people do `from datahub.sdk import X`.
@@ -1,19 +1,30 @@
1
1
  from typing import Dict, List, Type
2
2
 
3
+ from datahub.sdk.chart import Chart
3
4
  from datahub.sdk.container import Container
5
+ from datahub.sdk.dashboard import Dashboard
6
+ from datahub.sdk.dataflow import DataFlow
7
+ from datahub.sdk.datajob import DataJob
4
8
  from datahub.sdk.dataset import Dataset
5
9
  from datahub.sdk.entity import Entity
6
10
  from datahub.sdk.mlmodel import MLModel
7
11
  from datahub.sdk.mlmodelgroup import MLModelGroup
8
12
 
13
+ # Base entity classes that don't have circular dependencies
14
+ # Those that do are imported in the EntityClient where needed
9
15
  # TODO: Is there a better way to declare this?
10
16
  ENTITY_CLASSES_LIST: List[Type[Entity]] = [
11
17
  Container,
12
18
  Dataset,
13
19
  MLModel,
14
20
  MLModelGroup,
21
+ DataFlow,
22
+ DataJob,
23
+ Dashboard,
24
+ Chart,
15
25
  ]
16
26
 
27
+ # Create the mapping of entity types to classes
17
28
  ENTITY_CLASSES: Dict[str, Type[Entity]] = {
18
29
  cls.get_urn_type().ENTITY_TYPE: cls for cls in ENTITY_CLASSES_LIST
19
30
  }
datahub/sdk/_shared.py CHANGED
@@ -26,9 +26,12 @@ from datahub.emitter.mce_builder import (
26
26
  from datahub.emitter.mcp_builder import ContainerKey
27
27
  from datahub.errors import MultipleSubtypesWarning, SdkUsageError
28
28
  from datahub.metadata.urns import (
29
+ ChartUrn,
29
30
  ContainerUrn,
30
31
  CorpGroupUrn,
31
32
  CorpUserUrn,
33
+ DashboardUrn,
34
+ DataFlowUrn,
32
35
  DataJobUrn,
33
36
  DataPlatformInstanceUrn,
34
37
  DataPlatformUrn,
@@ -37,6 +40,7 @@ from datahub.metadata.urns import (
37
40
  DomainUrn,
38
41
  GlossaryTermUrn,
39
42
  OwnershipTypeUrn,
43
+ StructuredPropertyUrn,
40
44
  TagUrn,
41
45
  Urn,
42
46
  VersionSetUrn,
@@ -47,12 +51,21 @@ from datahub.utilities.urns.error import InvalidUrnError
47
51
 
48
52
  if TYPE_CHECKING:
49
53
  from datahub.sdk.container import Container
50
-
51
54
  UrnOrStr: TypeAlias = Union[Urn, str]
55
+ ChartUrnOrStr: TypeAlias = Union[str, ChartUrn]
52
56
  DatasetUrnOrStr: TypeAlias = Union[str, DatasetUrn]
53
57
  DatajobUrnOrStr: TypeAlias = Union[str, DataJobUrn]
58
+ DataflowUrnOrStr: TypeAlias = Union[str, DataFlowUrn]
59
+ DashboardUrnOrStr: TypeAlias = Union[str, DashboardUrn]
60
+ DataPlatformInstanceUrnOrStr: TypeAlias = Union[str, DataPlatformInstanceUrn]
61
+ DataPlatformUrnOrStr: TypeAlias = Union[str, DataPlatformUrn]
54
62
 
55
63
  ActorUrn: TypeAlias = Union[CorpUserUrn, CorpGroupUrn]
64
+ StructuredPropertyUrnOrStr: TypeAlias = Union[str, StructuredPropertyUrn]
65
+ StructuredPropertyValueType: TypeAlias = Union[str, float, int]
66
+ StructuredPropertyInputType: TypeAlias = Dict[
67
+ StructuredPropertyUrnOrStr, Sequence[StructuredPropertyValueType]
68
+ ]
56
69
 
57
70
  TrainingMetricsInputType: TypeAlias = Union[
58
71
  List[models.MLMetricClass], Dict[str, Optional[str]]
@@ -716,3 +729,107 @@ class HasVersion(Entity):
716
729
  a for a in version_props.aliases if a.versionTag != alias
717
730
  ]
718
731
  self._set_aspect(version_props)
732
+
733
+
734
+ class HasStructuredProperties(Entity):
735
+ """
736
+ Mixin for entities that support structured properties
737
+ """
738
+
739
+ __slots__ = ()
740
+
741
+ @property
742
+ def structured_properties(
743
+ self,
744
+ ) -> Optional[List[models.StructuredPropertyValueAssignmentClass]]:
745
+ """
746
+ Retrieve structured properties for the entity
747
+
748
+ Returns:
749
+ Optional list of structured property value assignments
750
+ """
751
+ sp_aspect = self._get_aspect(models.StructuredPropertiesClass)
752
+ return sp_aspect.properties if sp_aspect else None
753
+
754
+ def _ensure_structured_properties(self) -> models.StructuredPropertiesClass:
755
+ """
756
+ Ensure structured properties aspect exists, creating it if necessary
757
+
758
+ Returns:
759
+ StructuredPropertiesClass aspect
760
+ """
761
+ return self._setdefault_aspect(models.StructuredPropertiesClass(properties=[]))
762
+
763
+ def set_structured_property(
764
+ self,
765
+ property_urn: StructuredPropertyUrnOrStr,
766
+ values: Sequence[StructuredPropertyValueType],
767
+ ) -> None:
768
+ """
769
+ Update an existing structured property or add if it doesn't exist
770
+
771
+ Args:
772
+ property_urn: URN of the structured property
773
+ values: List of values for the property
774
+ """
775
+ # validate property_urn is a valid structured property urn
776
+ property_urn = StructuredPropertyUrn.from_string(property_urn)
777
+
778
+ properties = self._ensure_structured_properties()
779
+
780
+ # Find existing property assignment
781
+ existing_prop = next(
782
+ (
783
+ prop
784
+ for prop in properties.properties
785
+ if prop.propertyUrn == str(property_urn)
786
+ ),
787
+ None,
788
+ )
789
+ current_timestamp = make_ts_millis(datetime.now())
790
+
791
+ if existing_prop:
792
+ # Update existing property
793
+ existing_prop.values = list(values)
794
+ existing_prop.lastModified = models.AuditStampClass(
795
+ time=current_timestamp,
796
+ actor=DEFAULT_ACTOR_URN,
797
+ )
798
+ else:
799
+ # Create new property assignment
800
+ new_property = models.StructuredPropertyValueAssignmentClass(
801
+ propertyUrn=str(property_urn),
802
+ values=list(values),
803
+ created=models.AuditStampClass(
804
+ time=current_timestamp,
805
+ actor=DEFAULT_ACTOR_URN,
806
+ ),
807
+ lastModified=models.AuditStampClass(
808
+ time=current_timestamp,
809
+ actor=DEFAULT_ACTOR_URN,
810
+ ),
811
+ )
812
+ add_list_unique(
813
+ properties.properties,
814
+ key=lambda prop: prop.propertyUrn,
815
+ item=new_property,
816
+ )
817
+
818
+ self._set_aspect(properties)
819
+
820
+ def remove_structured_property(
821
+ self, property_urn: StructuredPropertyUrnOrStr
822
+ ) -> None:
823
+ """
824
+ Remove a structured property from the entity
825
+
826
+ Args:
827
+ property_urn: URN of the structured property to remove
828
+ """
829
+ remove_list_unique(
830
+ self._ensure_structured_properties().properties,
831
+ key=lambda prop: prop.propertyUrn,
832
+ item=models.StructuredPropertyValueAssignmentClass(
833
+ propertyUrn=str(property_urn), values=[]
834
+ ),
835
+ )