acryl-datahub 1.0.0rc17__py3-none-any.whl → 1.0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc17.dist-info → acryl_datahub-1.0.0.1.dist-info}/METADATA +2426 -2427
- {acryl_datahub-1.0.0rc17.dist-info → acryl_datahub-1.0.0.1.dist-info}/RECORD +106 -89
- {acryl_datahub-1.0.0rc17.dist-info → acryl_datahub-1.0.0.1.dist-info}/WHEEL +1 -1
- {acryl_datahub-1.0.0rc17.dist-info → acryl_datahub-1.0.0.1.dist-info}/entry_points.txt +2 -1
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +1 -28
- datahub/cli/specific/dataset_cli.py +26 -10
- datahub/emitter/mce_builder.py +1 -3
- datahub/emitter/mcp_builder.py +8 -0
- datahub/emitter/request_helper.py +19 -14
- datahub/emitter/response_helper.py +25 -18
- datahub/emitter/rest_emitter.py +23 -7
- datahub/errors.py +8 -0
- datahub/ingestion/api/source.py +7 -2
- datahub/ingestion/api/source_helpers.py +14 -2
- datahub/ingestion/extractor/schema_util.py +1 -0
- datahub/ingestion/graph/client.py +26 -20
- datahub/ingestion/graph/filters.py +62 -17
- datahub/ingestion/sink/datahub_rest.py +2 -2
- datahub/ingestion/source/cassandra/cassandra.py +1 -10
- datahub/ingestion/source/common/data_platforms.py +23 -0
- datahub/ingestion/source/common/gcp_credentials_config.py +6 -0
- datahub/ingestion/source/common/subtypes.py +17 -1
- datahub/ingestion/source/data_lake_common/path_spec.py +21 -1
- datahub/ingestion/source/dbt/dbt_common.py +6 -4
- datahub/ingestion/source/dbt/dbt_core.py +4 -6
- datahub/ingestion/source/dbt/dbt_tests.py +8 -6
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
- datahub/ingestion/source/dremio/dremio_entities.py +6 -5
- datahub/ingestion/source/dremio/dremio_source.py +96 -117
- datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
- datahub/ingestion/source/ge_data_profiler.py +11 -1
- datahub/ingestion/source/hex/__init__.py +0 -0
- datahub/ingestion/source/hex/api.py +394 -0
- datahub/ingestion/source/hex/constants.py +3 -0
- datahub/ingestion/source/hex/hex.py +167 -0
- datahub/ingestion/source/hex/mapper.py +372 -0
- datahub/ingestion/source/hex/model.py +68 -0
- datahub/ingestion/source/iceberg/iceberg.py +193 -140
- datahub/ingestion/source/iceberg/iceberg_profiler.py +21 -18
- datahub/ingestion/source/mlflow.py +217 -8
- datahub/ingestion/source/mode.py +11 -1
- datahub/ingestion/source/openapi.py +69 -34
- datahub/ingestion/source/powerbi/config.py +31 -4
- datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
- datahub/ingestion/source/powerbi/m_query/pattern_handler.py +111 -10
- datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
- datahub/ingestion/source/powerbi/powerbi.py +41 -24
- datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -11
- datahub/ingestion/source/redshift/lineage_v2.py +9 -1
- datahub/ingestion/source/redshift/query.py +1 -1
- datahub/ingestion/source/s3/source.py +11 -0
- datahub/ingestion/source/sigma/config.py +3 -4
- datahub/ingestion/source/sigma/sigma.py +10 -6
- datahub/ingestion/source/slack/slack.py +399 -82
- datahub/ingestion/source/snowflake/constants.py +1 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +14 -1
- datahub/ingestion/source/snowflake/snowflake_queries.py +16 -13
- datahub/ingestion/source/snowflake/snowflake_query.py +17 -0
- datahub/ingestion/source/snowflake/snowflake_report.py +3 -0
- datahub/ingestion/source/snowflake/snowflake_schema.py +29 -0
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +112 -42
- datahub/ingestion/source/snowflake/snowflake_utils.py +25 -1
- datahub/ingestion/source/sql/mssql/job_models.py +15 -1
- datahub/ingestion/source/sql/mssql/source.py +8 -4
- datahub/ingestion/source/sql/oracle.py +51 -4
- datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
- datahub/ingestion/source/sql/stored_procedures/base.py +242 -0
- datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +1 -29
- datahub/ingestion/source/superset.py +291 -35
- datahub/ingestion/source/usage/usage_common.py +0 -65
- datahub/ingestion/source/vertexai/__init__.py +0 -0
- datahub/ingestion/source/vertexai/vertexai.py +1055 -0
- datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
- datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +68 -0
- datahub/metadata/_schema_classes.py +472 -1
- datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
- datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
- datahub/metadata/schema.avsc +313 -2
- datahub/metadata/schemas/CorpUserEditableInfo.avsc +14 -0
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserSettings.avsc +95 -0
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
- datahub/metadata/schemas/Deprecation.avsc +2 -0
- datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
- datahub/metadata/schemas/MetadataChangeEvent.avsc +32 -0
- datahub/metadata/schemas/QueryProperties.avsc +20 -0
- datahub/metadata/schemas/Siblings.avsc +2 -0
- datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
- datahub/sdk/__init__.py +1 -0
- datahub/sdk/dataset.py +122 -0
- datahub/sdk/entity.py +99 -3
- datahub/sdk/entity_client.py +27 -3
- datahub/sdk/main_client.py +24 -1
- datahub/sdk/search_client.py +81 -8
- datahub/sdk/search_filters.py +94 -37
- datahub/sql_parsing/split_statements.py +17 -3
- datahub/sql_parsing/sql_parsing_aggregator.py +6 -0
- datahub/sql_parsing/tool_meta_extractor.py +27 -2
- datahub/testing/mcp_diff.py +1 -18
- datahub/utilities/threaded_iterator_executor.py +16 -3
- datahub/ingestion/source/vertexai.py +0 -697
- {acryl_datahub-1.0.0rc17.dist-info → acryl_datahub-1.0.0.1.dist-info/licenses}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc17.dist-info → acryl_datahub-1.0.0.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "record",
|
|
3
|
+
"Aspect": {
|
|
4
|
+
"name": "slackUserInfo"
|
|
5
|
+
},
|
|
6
|
+
"name": "SlackUserInfo",
|
|
7
|
+
"namespace": "com.linkedin.pegasus2avro.dataplatform.slack",
|
|
8
|
+
"fields": [
|
|
9
|
+
{
|
|
10
|
+
"Relationship": {
|
|
11
|
+
"entityTypes": [
|
|
12
|
+
"dataPlatformInstance"
|
|
13
|
+
],
|
|
14
|
+
"name": "PartOfSlackWorkspace"
|
|
15
|
+
},
|
|
16
|
+
"java": {
|
|
17
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
18
|
+
},
|
|
19
|
+
"type": "string",
|
|
20
|
+
"name": "slackInstance",
|
|
21
|
+
"doc": "The dataplatform instance that this Slack member belongs to.",
|
|
22
|
+
"Urn": "Urn",
|
|
23
|
+
"entityTypes": [
|
|
24
|
+
"dataPlatformInstance"
|
|
25
|
+
]
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
"type": "string",
|
|
29
|
+
"name": "id",
|
|
30
|
+
"doc": "The unique identifier for the Slack member."
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"type": "string",
|
|
34
|
+
"name": "name",
|
|
35
|
+
"doc": "The username of the Slack member."
|
|
36
|
+
},
|
|
37
|
+
{
|
|
38
|
+
"type": "string",
|
|
39
|
+
"name": "realName",
|
|
40
|
+
"doc": "The real name of the Slack member."
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
"type": "string",
|
|
44
|
+
"name": "displayName",
|
|
45
|
+
"doc": "The display name of the Slack member."
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
"type": [
|
|
49
|
+
"null",
|
|
50
|
+
"string"
|
|
51
|
+
],
|
|
52
|
+
"name": "email",
|
|
53
|
+
"default": null,
|
|
54
|
+
"doc": "The email associated with the Slack member."
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
"type": "string",
|
|
58
|
+
"name": "teamId",
|
|
59
|
+
"doc": "The ID associated with the Slack team."
|
|
60
|
+
},
|
|
61
|
+
{
|
|
62
|
+
"type": "boolean",
|
|
63
|
+
"name": "isDeleted",
|
|
64
|
+
"doc": "Whether the member is deleted or not."
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
"type": "boolean",
|
|
68
|
+
"name": "isAdmin",
|
|
69
|
+
"doc": "Whether the member is an admin."
|
|
70
|
+
},
|
|
71
|
+
{
|
|
72
|
+
"type": "boolean",
|
|
73
|
+
"name": "isOwner",
|
|
74
|
+
"doc": "Whether the member is an owner."
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
"type": "boolean",
|
|
78
|
+
"name": "isPrimaryOwner",
|
|
79
|
+
"doc": "Whether the member is a primary owner."
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
"type": "boolean",
|
|
83
|
+
"name": "isBot",
|
|
84
|
+
"doc": "Whether the member is a bot."
|
|
85
|
+
},
|
|
86
|
+
{
|
|
87
|
+
"type": [
|
|
88
|
+
"null",
|
|
89
|
+
"string"
|
|
90
|
+
],
|
|
91
|
+
"name": "timezone",
|
|
92
|
+
"default": null,
|
|
93
|
+
"doc": "The timezone of the Slack member."
|
|
94
|
+
},
|
|
95
|
+
{
|
|
96
|
+
"type": [
|
|
97
|
+
"null",
|
|
98
|
+
"int"
|
|
99
|
+
],
|
|
100
|
+
"name": "timezoneOffset",
|
|
101
|
+
"default": null,
|
|
102
|
+
"doc": "The timezone offset of the Slack member."
|
|
103
|
+
},
|
|
104
|
+
{
|
|
105
|
+
"type": [
|
|
106
|
+
"null",
|
|
107
|
+
"string"
|
|
108
|
+
],
|
|
109
|
+
"name": "title",
|
|
110
|
+
"default": null,
|
|
111
|
+
"doc": "The title of the Slack member."
|
|
112
|
+
},
|
|
113
|
+
{
|
|
114
|
+
"type": [
|
|
115
|
+
"null",
|
|
116
|
+
"string"
|
|
117
|
+
],
|
|
118
|
+
"name": "phone",
|
|
119
|
+
"default": null,
|
|
120
|
+
"doc": "The phone number of the Slack member."
|
|
121
|
+
},
|
|
122
|
+
{
|
|
123
|
+
"type": [
|
|
124
|
+
"null",
|
|
125
|
+
"string"
|
|
126
|
+
],
|
|
127
|
+
"name": "profilePictureUrl",
|
|
128
|
+
"default": null,
|
|
129
|
+
"doc": "The URL of the member's profile picture."
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
"type": [
|
|
133
|
+
"null",
|
|
134
|
+
"string"
|
|
135
|
+
],
|
|
136
|
+
"name": "statusText",
|
|
137
|
+
"default": null,
|
|
138
|
+
"doc": "The status text of the Slack member."
|
|
139
|
+
},
|
|
140
|
+
{
|
|
141
|
+
"type": [
|
|
142
|
+
"null",
|
|
143
|
+
"string"
|
|
144
|
+
],
|
|
145
|
+
"name": "statusEmoji",
|
|
146
|
+
"default": null,
|
|
147
|
+
"doc": "The status emoji of the Slack member."
|
|
148
|
+
},
|
|
149
|
+
{
|
|
150
|
+
"type": [
|
|
151
|
+
"null",
|
|
152
|
+
"long"
|
|
153
|
+
],
|
|
154
|
+
"name": "lastUpdatedSeconds",
|
|
155
|
+
"default": null,
|
|
156
|
+
"doc": "The timestamp of when the member was last updated. (in seconds)"
|
|
157
|
+
}
|
|
158
|
+
],
|
|
159
|
+
"doc": "Information about a Slack user."
|
|
160
|
+
}
|
datahub/sdk/__init__.py
CHANGED
|
@@ -3,6 +3,7 @@ import types
|
|
|
3
3
|
import datahub.metadata.schema_classes as models
|
|
4
4
|
from datahub.errors import SdkUsageError
|
|
5
5
|
from datahub.ingestion.graph.config import DatahubClientConfig
|
|
6
|
+
from datahub.ingestion.graph.filters import FilterOperator
|
|
6
7
|
from datahub.metadata.urns import (
|
|
7
8
|
ChartUrn,
|
|
8
9
|
ContainerUrn,
|
datahub/sdk/dataset.py
CHANGED
|
@@ -430,10 +430,22 @@ class Dataset(
|
|
|
430
430
|
HasDomain,
|
|
431
431
|
Entity,
|
|
432
432
|
):
|
|
433
|
+
"""Represents a dataset in DataHub.
|
|
434
|
+
|
|
435
|
+
A dataset represents a collection of data, such as a table, view, or file.
|
|
436
|
+
This class provides methods for managing dataset metadata including schema,
|
|
437
|
+
lineage, and various aspects like ownership, tags, and terms.
|
|
438
|
+
"""
|
|
439
|
+
|
|
433
440
|
__slots__ = ()
|
|
434
441
|
|
|
435
442
|
@classmethod
|
|
436
443
|
def get_urn_type(cls) -> Type[DatasetUrn]:
|
|
444
|
+
"""Get the URN type for datasets.
|
|
445
|
+
|
|
446
|
+
Returns:
|
|
447
|
+
The DatasetUrn class.
|
|
448
|
+
"""
|
|
437
449
|
return DatasetUrn
|
|
438
450
|
|
|
439
451
|
def __init__(
|
|
@@ -466,6 +478,31 @@ class Dataset(
|
|
|
466
478
|
schema: Optional[SchemaFieldsInputType] = None,
|
|
467
479
|
upstreams: Optional[models.UpstreamLineageClass] = None,
|
|
468
480
|
):
|
|
481
|
+
"""Initialize a new Dataset instance.
|
|
482
|
+
|
|
483
|
+
Args:
|
|
484
|
+
platform: The platform this dataset belongs to (e.g. "mysql", "snowflake").
|
|
485
|
+
name: The name of the dataset.
|
|
486
|
+
platform_instance: Optional platform instance identifier.
|
|
487
|
+
env: The environment this dataset belongs to (default: DEFAULT_ENV).
|
|
488
|
+
description: Optional description of the dataset.
|
|
489
|
+
display_name: Optional display name for the dataset.
|
|
490
|
+
qualified_name: Optional qualified name for the dataset.
|
|
491
|
+
external_url: Optional URL to external documentation or source.
|
|
492
|
+
custom_properties: Optional dictionary of custom properties.
|
|
493
|
+
created: Optional creation timestamp.
|
|
494
|
+
last_modified: Optional last modification timestamp.
|
|
495
|
+
parent_container: Optional parent container for this dataset.
|
|
496
|
+
subtype: Optional subtype of the dataset.
|
|
497
|
+
owners: Optional list of owners.
|
|
498
|
+
links: Optional list of links.
|
|
499
|
+
tags: Optional list of tags.
|
|
500
|
+
terms: Optional list of glossary terms.
|
|
501
|
+
domain: Optional domain this dataset belongs to.
|
|
502
|
+
extra_aspects: Optional list of additional aspects.
|
|
503
|
+
schema: Optional schema definition for the dataset.
|
|
504
|
+
upstreams: Optional upstream lineage information.
|
|
505
|
+
"""
|
|
469
506
|
urn = DatasetUrn.create_from_ids(
|
|
470
507
|
platform_id=platform,
|
|
471
508
|
table_name=name,
|
|
@@ -539,6 +576,11 @@ class Dataset(
|
|
|
539
576
|
|
|
540
577
|
@property
|
|
541
578
|
def description(self) -> Optional[str]:
|
|
579
|
+
"""Get the description of the dataset.
|
|
580
|
+
|
|
581
|
+
Returns:
|
|
582
|
+
The description if set, None otherwise.
|
|
583
|
+
"""
|
|
542
584
|
editable_props = self._get_editable_props()
|
|
543
585
|
return first_non_null(
|
|
544
586
|
[
|
|
@@ -548,6 +590,15 @@ class Dataset(
|
|
|
548
590
|
)
|
|
549
591
|
|
|
550
592
|
def set_description(self, description: str) -> None:
|
|
593
|
+
"""Set the description of the dataset.
|
|
594
|
+
|
|
595
|
+
Args:
|
|
596
|
+
description: The description to set.
|
|
597
|
+
|
|
598
|
+
Note:
|
|
599
|
+
If called during ingestion, this will warn if overwriting
|
|
600
|
+
a non-ingestion description.
|
|
601
|
+
"""
|
|
551
602
|
if is_ingestion_attribution():
|
|
552
603
|
editable_props = self._get_editable_props()
|
|
553
604
|
if editable_props is not None and editable_props.description is not None:
|
|
@@ -565,41 +616,96 @@ class Dataset(
|
|
|
565
616
|
|
|
566
617
|
@property
|
|
567
618
|
def display_name(self) -> Optional[str]:
|
|
619
|
+
"""Get the display name of the dataset.
|
|
620
|
+
|
|
621
|
+
Returns:
|
|
622
|
+
The display name if set, None otherwise.
|
|
623
|
+
"""
|
|
568
624
|
return self._ensure_dataset_props().name
|
|
569
625
|
|
|
570
626
|
def set_display_name(self, display_name: str) -> None:
|
|
627
|
+
"""Set the display name of the dataset.
|
|
628
|
+
|
|
629
|
+
Args:
|
|
630
|
+
display_name: The display name to set.
|
|
631
|
+
"""
|
|
571
632
|
self._ensure_dataset_props().name = display_name
|
|
572
633
|
|
|
573
634
|
@property
|
|
574
635
|
def qualified_name(self) -> Optional[str]:
|
|
636
|
+
"""Get the qualified name of the dataset.
|
|
637
|
+
|
|
638
|
+
Returns:
|
|
639
|
+
The qualified name if set, None otherwise.
|
|
640
|
+
"""
|
|
575
641
|
return self._ensure_dataset_props().qualifiedName
|
|
576
642
|
|
|
577
643
|
def set_qualified_name(self, qualified_name: str) -> None:
|
|
644
|
+
"""Set the qualified name of the dataset.
|
|
645
|
+
|
|
646
|
+
Args:
|
|
647
|
+
qualified_name: The qualified name to set.
|
|
648
|
+
"""
|
|
578
649
|
self._ensure_dataset_props().qualifiedName = qualified_name
|
|
579
650
|
|
|
580
651
|
@property
|
|
581
652
|
def external_url(self) -> Optional[str]:
|
|
653
|
+
"""Get the external URL of the dataset.
|
|
654
|
+
|
|
655
|
+
Returns:
|
|
656
|
+
The external URL if set, None otherwise.
|
|
657
|
+
"""
|
|
582
658
|
return self._ensure_dataset_props().externalUrl
|
|
583
659
|
|
|
584
660
|
def set_external_url(self, external_url: str) -> None:
|
|
661
|
+
"""Set the external URL of the dataset.
|
|
662
|
+
|
|
663
|
+
Args:
|
|
664
|
+
external_url: The external URL to set.
|
|
665
|
+
"""
|
|
585
666
|
self._ensure_dataset_props().externalUrl = external_url
|
|
586
667
|
|
|
587
668
|
@property
|
|
588
669
|
def custom_properties(self) -> Dict[str, str]:
|
|
670
|
+
"""Get the custom properties of the dataset.
|
|
671
|
+
|
|
672
|
+
Returns:
|
|
673
|
+
Dictionary of custom properties.
|
|
674
|
+
"""
|
|
589
675
|
return self._ensure_dataset_props().customProperties
|
|
590
676
|
|
|
591
677
|
def set_custom_properties(self, custom_properties: Dict[str, str]) -> None:
|
|
678
|
+
"""Set the custom properties of the dataset.
|
|
679
|
+
|
|
680
|
+
Args:
|
|
681
|
+
custom_properties: Dictionary of custom properties to set.
|
|
682
|
+
"""
|
|
592
683
|
self._ensure_dataset_props().customProperties = custom_properties
|
|
593
684
|
|
|
594
685
|
@property
|
|
595
686
|
def created(self) -> Optional[datetime]:
|
|
687
|
+
"""Get the creation timestamp of the dataset.
|
|
688
|
+
|
|
689
|
+
Returns:
|
|
690
|
+
The creation timestamp if set, None otherwise.
|
|
691
|
+
"""
|
|
596
692
|
return parse_time_stamp(self._ensure_dataset_props().created)
|
|
597
693
|
|
|
598
694
|
def set_created(self, created: datetime) -> None:
|
|
695
|
+
"""Set the creation timestamp of the dataset.
|
|
696
|
+
|
|
697
|
+
Args:
|
|
698
|
+
created: The creation timestamp to set.
|
|
699
|
+
"""
|
|
599
700
|
self._ensure_dataset_props().created = make_time_stamp(created)
|
|
600
701
|
|
|
601
702
|
@property
|
|
602
703
|
def last_modified(self) -> Optional[datetime]:
|
|
704
|
+
"""Get the last modification timestamp of the dataset.
|
|
705
|
+
|
|
706
|
+
Returns:
|
|
707
|
+
The last modification timestamp if set, None otherwise.
|
|
708
|
+
"""
|
|
603
709
|
return parse_time_stamp(self._ensure_dataset_props().lastModified)
|
|
604
710
|
|
|
605
711
|
def set_last_modified(self, last_modified: datetime) -> None:
|
|
@@ -614,6 +720,11 @@ class Dataset(
|
|
|
614
720
|
@property
|
|
615
721
|
def schema(self) -> List[SchemaField]:
|
|
616
722
|
# TODO: Add some caching here to avoid iterating over the schema every time.
|
|
723
|
+
"""Get the schema fields of the dataset.
|
|
724
|
+
|
|
725
|
+
Returns:
|
|
726
|
+
List of SchemaField objects representing the dataset's schema.
|
|
727
|
+
"""
|
|
617
728
|
schema_dict = self._schema_dict()
|
|
618
729
|
return [SchemaField(self, field_path) for field_path in schema_dict]
|
|
619
730
|
|
|
@@ -669,6 +780,17 @@ class Dataset(
|
|
|
669
780
|
|
|
670
781
|
def __getitem__(self, field_path: str) -> SchemaField:
|
|
671
782
|
# TODO: Automatically deal with field path v2?
|
|
783
|
+
"""Get a schema field by its path.
|
|
784
|
+
|
|
785
|
+
Args:
|
|
786
|
+
field_path: The path of the field to retrieve.
|
|
787
|
+
|
|
788
|
+
Returns:
|
|
789
|
+
A SchemaField instance.
|
|
790
|
+
|
|
791
|
+
Raises:
|
|
792
|
+
SchemaFieldKeyError: If the field is not found.
|
|
793
|
+
"""
|
|
672
794
|
schema_dict = self._schema_dict()
|
|
673
795
|
if field_path not in schema_dict:
|
|
674
796
|
raise SchemaFieldKeyError(f"Field {field_path} not found in schema")
|
datahub/sdk/entity.py
CHANGED
|
@@ -20,9 +20,24 @@ ExtraAspectsType = Union[None, List[AspectTypeVar]]
|
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
class Entity:
|
|
23
|
+
"""Base class for all DataHub entities.
|
|
24
|
+
|
|
25
|
+
This class provides the core functionality for working with DataHub entities,
|
|
26
|
+
including aspect management and URN handling. It should not be instantiated directly;
|
|
27
|
+
instead, use one of its subclasses like Dataset or Container.
|
|
28
|
+
"""
|
|
29
|
+
|
|
23
30
|
__slots__ = ("_urn", "_prev_aspects", "_aspects")
|
|
24
31
|
|
|
25
32
|
def __init__(self, /, urn: Urn):
|
|
33
|
+
"""Initialize a new Entity instance.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
urn: The URN that uniquely identifies this entity.
|
|
37
|
+
|
|
38
|
+
Raises:
|
|
39
|
+
SdkUsageError: If this base class is instantiated directly.
|
|
40
|
+
"""
|
|
26
41
|
# This method is not meant for direct usage.
|
|
27
42
|
if type(self) is Entity:
|
|
28
43
|
raise SdkUsageError(f"{Entity.__name__} cannot be instantiated directly.")
|
|
@@ -36,6 +51,15 @@ class Entity:
|
|
|
36
51
|
|
|
37
52
|
@classmethod
|
|
38
53
|
def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
|
|
54
|
+
"""Create a new entity instance from graph data.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
urn: The URN of the entity.
|
|
58
|
+
current_aspects: The current aspects of the entity from the graph.
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
A new entity instance initialized with the graph data.
|
|
62
|
+
"""
|
|
39
63
|
# If an init method from a subclass adds required fields, it also needs to override this method.
|
|
40
64
|
# An alternative approach would call cls.__new__() to bypass the init method, but it's a bit
|
|
41
65
|
# too hacky for my taste.
|
|
@@ -43,6 +67,14 @@ class Entity:
|
|
|
43
67
|
return entity._init_from_graph(current_aspects)
|
|
44
68
|
|
|
45
69
|
def _init_from_graph(self, current_aspects: models.AspectBag) -> Self:
|
|
70
|
+
"""Initialize the entity with aspects from the graph.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
current_aspects: The current aspects of the entity from the graph.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
The entity instance with initialized aspects.
|
|
77
|
+
"""
|
|
46
78
|
self._prev_aspects = current_aspects
|
|
47
79
|
|
|
48
80
|
self._aspects = {}
|
|
@@ -54,14 +86,30 @@ class Entity:
|
|
|
54
86
|
|
|
55
87
|
@classmethod
|
|
56
88
|
@abc.abstractmethod
|
|
57
|
-
def get_urn_type(cls) -> Type[_SpecificUrn]:
|
|
89
|
+
def get_urn_type(cls) -> Type[_SpecificUrn]:
|
|
90
|
+
"""Get the URN type for this entity class.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
The URN type class that corresponds to this entity type.
|
|
94
|
+
"""
|
|
95
|
+
...
|
|
58
96
|
|
|
59
97
|
@classmethod
|
|
60
98
|
def entity_type_name(cls) -> str:
|
|
99
|
+
"""Get the entity type name.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
The string name of this entity type.
|
|
103
|
+
"""
|
|
61
104
|
return cls.get_urn_type().ENTITY_TYPE
|
|
62
105
|
|
|
63
106
|
@property
|
|
64
107
|
def urn(self) -> _SpecificUrn:
|
|
108
|
+
"""Get the entity's URN.
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
The URN that uniquely identifies this entity.
|
|
112
|
+
"""
|
|
65
113
|
return self._urn
|
|
66
114
|
|
|
67
115
|
def _get_aspect(
|
|
@@ -69,22 +117,51 @@ class Entity:
|
|
|
69
117
|
aspect_type: Type[AspectTypeVar],
|
|
70
118
|
/,
|
|
71
119
|
) -> Optional[AspectTypeVar]:
|
|
120
|
+
"""Get an aspect of the entity by its type.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
aspect_type: The type of aspect to retrieve.
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
The aspect if it exists, None otherwise.
|
|
127
|
+
"""
|
|
72
128
|
return self._aspects.get(aspect_type.ASPECT_NAME) # type: ignore
|
|
73
129
|
|
|
74
130
|
def _set_aspect(self, value: AspectTypeVar, /) -> None:
|
|
131
|
+
"""Set an aspect of the entity.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
value: The aspect to set.
|
|
135
|
+
"""
|
|
75
136
|
self._aspects[value.ASPECT_NAME] = value # type: ignore
|
|
76
137
|
|
|
77
138
|
def _setdefault_aspect(self, default_aspect: AspectTypeVar, /) -> AspectTypeVar:
|
|
139
|
+
"""Set a default aspect if it doesn't exist.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
default_aspect: The default aspect to set if none exists.
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
The existing aspect if one exists, otherwise the default aspect.
|
|
146
|
+
"""
|
|
78
147
|
# Similar semantics to dict.setdefault.
|
|
79
148
|
if existing_aspect := self._get_aspect(type(default_aspect)):
|
|
80
149
|
return existing_aspect
|
|
81
150
|
self._set_aspect(default_aspect)
|
|
82
151
|
return default_aspect
|
|
83
152
|
|
|
84
|
-
def
|
|
153
|
+
def as_mcps(
|
|
85
154
|
self,
|
|
86
155
|
change_type: Union[str, models.ChangeTypeClass] = models.ChangeTypeClass.UPSERT,
|
|
87
156
|
) -> List[MetadataChangeProposalWrapper]:
|
|
157
|
+
"""Convert the entity's aspects to MetadataChangeProposals.
|
|
158
|
+
|
|
159
|
+
Args:
|
|
160
|
+
change_type: The type of change to apply (default: UPSERT).
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
A list of MetadataChangeProposalWrapper objects.
|
|
164
|
+
"""
|
|
88
165
|
urn_str = str(self.urn)
|
|
89
166
|
|
|
90
167
|
mcps = []
|
|
@@ -100,13 +177,32 @@ class Entity:
|
|
|
100
177
|
return mcps
|
|
101
178
|
|
|
102
179
|
def as_workunits(self) -> List[MetadataWorkUnit]:
|
|
103
|
-
|
|
180
|
+
"""Convert the entity's aspects to MetadataWorkUnits.
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
A list of MetadataWorkUnit objects.
|
|
184
|
+
"""
|
|
185
|
+
return [mcp.as_workunit() for mcp in self.as_mcps()]
|
|
104
186
|
|
|
105
187
|
def _set_extra_aspects(self, extra_aspects: ExtraAspectsType) -> None:
|
|
188
|
+
"""Set additional aspects on the entity.
|
|
189
|
+
|
|
190
|
+
Args:
|
|
191
|
+
extra_aspects: List of additional aspects to set.
|
|
192
|
+
|
|
193
|
+
Note:
|
|
194
|
+
This method does not validate for conflicts between extra aspects
|
|
195
|
+
and standard aspects.
|
|
196
|
+
"""
|
|
106
197
|
# TODO: Add validation to ensure that an "extra aspect" does not conflict
|
|
107
198
|
# with / get overridden by a standard aspect.
|
|
108
199
|
for aspect in extra_aspects or []:
|
|
109
200
|
self._set_aspect(aspect)
|
|
110
201
|
|
|
111
202
|
def __repr__(self) -> str:
|
|
203
|
+
"""Get a string representation of the entity.
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
A string in the format "EntityClass('urn')".
|
|
207
|
+
"""
|
|
112
208
|
return f"{self.__class__.__name__}('{self.urn}')"
|
datahub/sdk/entity_client.py
CHANGED
|
@@ -24,7 +24,18 @@ if TYPE_CHECKING:
|
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
class EntityClient:
|
|
27
|
+
"""Client for managing DataHub entities.
|
|
28
|
+
|
|
29
|
+
This class provides methods for retrieving and managing DataHub entities
|
|
30
|
+
such as datasets, containers, and other metadata objects.
|
|
31
|
+
"""
|
|
32
|
+
|
|
27
33
|
def __init__(self, client: DataHubClient):
|
|
34
|
+
"""Private constructor - use :py:attr:`DataHubClient.entities` instead.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
client: The parent DataHubClient instance.
|
|
38
|
+
"""
|
|
28
39
|
self._client = client
|
|
29
40
|
|
|
30
41
|
# TODO: Make all of these methods sync by default.
|
|
@@ -40,6 +51,19 @@ class EntityClient:
|
|
|
40
51
|
@overload
|
|
41
52
|
def get(self, urn: Union[Urn, str]) -> Entity: ...
|
|
42
53
|
def get(self, urn: UrnOrStr) -> Entity:
|
|
54
|
+
"""Retrieve an entity by its urn.
|
|
55
|
+
|
|
56
|
+
Args:
|
|
57
|
+
urn: The urn of the entity to retrieve. Can be a string or :py:class:`Urn` object.
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
The retrieved entity instance.
|
|
61
|
+
|
|
62
|
+
Raises:
|
|
63
|
+
ItemNotFoundError: If the entity does not exist.
|
|
64
|
+
SdkUsageError: If the entity type is not yet supported.
|
|
65
|
+
InvalidUrnError: If the URN is invalid.
|
|
66
|
+
"""
|
|
43
67
|
if not isinstance(urn, Urn):
|
|
44
68
|
urn = Urn.from_string(urn)
|
|
45
69
|
|
|
@@ -71,7 +95,7 @@ class EntityClient:
|
|
|
71
95
|
changeType=models.ChangeTypeClass.CREATE_ENTITY,
|
|
72
96
|
)
|
|
73
97
|
)
|
|
74
|
-
mcps.extend(entity.
|
|
98
|
+
mcps.extend(entity.as_mcps(models.ChangeTypeClass.CREATE))
|
|
75
99
|
|
|
76
100
|
self._graph.emit_mcps(mcps)
|
|
77
101
|
|
|
@@ -84,7 +108,7 @@ class EntityClient:
|
|
|
84
108
|
)
|
|
85
109
|
# TODO: If there are no previous aspects but the entity exists, should we delete aspects that are not present here?
|
|
86
110
|
|
|
87
|
-
mcps = entity.
|
|
111
|
+
mcps = entity.as_mcps(models.ChangeTypeClass.UPSERT)
|
|
88
112
|
self._graph.emit_mcps(mcps)
|
|
89
113
|
|
|
90
114
|
def update(self, entity: Union[Entity, MetadataPatchProposal]) -> None:
|
|
@@ -99,7 +123,7 @@ class EntityClient:
|
|
|
99
123
|
# TODO: respect If-Unmodified-Since?
|
|
100
124
|
# -> probably add a "mode" parameter that can be "update" (e.g. if not modified) or "update_force"
|
|
101
125
|
|
|
102
|
-
mcps = entity.
|
|
126
|
+
mcps = entity.as_mcps(models.ChangeTypeClass.UPSERT)
|
|
103
127
|
self._graph.emit_mcps(mcps)
|
|
104
128
|
|
|
105
129
|
def _update_patch(
|
datahub/sdk/main_client.py
CHANGED
|
@@ -11,6 +11,17 @@ from datahub.sdk.search_client import SearchClient
|
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class DataHubClient:
|
|
14
|
+
"""Main client for interacting with DataHub.
|
|
15
|
+
|
|
16
|
+
This class provides the primary interface for interacting with DataHub,
|
|
17
|
+
including entity management, search, and resolution capabilities.
|
|
18
|
+
|
|
19
|
+
The client can be initialized in three ways:
|
|
20
|
+
1. With a server URL and optional token
|
|
21
|
+
2. With a DatahubClientConfig object
|
|
22
|
+
3. With an existing (legacy) :py:class:`DataHubGraph` instance
|
|
23
|
+
"""
|
|
24
|
+
|
|
14
25
|
@overload
|
|
15
26
|
def __init__(self, *, server: str, token: Optional[str] = None): ...
|
|
16
27
|
@overload
|
|
@@ -25,6 +36,17 @@ class DataHubClient:
|
|
|
25
36
|
graph: Optional[DataHubGraph] = None,
|
|
26
37
|
config: Optional[DatahubClientConfig] = None,
|
|
27
38
|
):
|
|
39
|
+
"""Initialize a new DataHubClient instance.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
server: The URL of the DataHub server (e.g. "http://localhost:8080").
|
|
43
|
+
token: Optional authentication token.
|
|
44
|
+
graph: An existing DataHubGraph instance to use.
|
|
45
|
+
config: A DatahubClientConfig object with connection details.
|
|
46
|
+
|
|
47
|
+
Raises:
|
|
48
|
+
SdkUsageError: If invalid combinations of arguments are provided.
|
|
49
|
+
"""
|
|
28
50
|
if server is not None:
|
|
29
51
|
if config is not None:
|
|
30
52
|
raise SdkUsageError("Cannot specify both server and config")
|
|
@@ -40,7 +62,8 @@ class DataHubClient:
|
|
|
40
62
|
|
|
41
63
|
self._graph = graph
|
|
42
64
|
|
|
43
|
-
|
|
65
|
+
def test_connection(self) -> None:
|
|
66
|
+
self._graph.test_connection()
|
|
44
67
|
|
|
45
68
|
@classmethod
|
|
46
69
|
def from_env(cls) -> "DataHubClient":
|