acryl-datahub 1.1.0.5rc3__py3-none-any.whl → 1.1.0.5rc5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.0.5rc3.dist-info → acryl_datahub-1.1.0.5rc5.dist-info}/METADATA +2575 -2575
- {acryl_datahub-1.1.0.5rc3.dist-info → acryl_datahub-1.1.0.5rc5.dist-info}/RECORD +52 -45
- datahub/_version.py +1 -1
- datahub/cli/check_cli.py +21 -4
- datahub/ingestion/api/decorators.py +14 -3
- datahub/ingestion/api/report.py +123 -2
- datahub/ingestion/api/source.py +45 -44
- datahub/ingestion/autogenerated/lineage_helper.py +193 -0
- datahub/ingestion/graph/client.py +71 -28
- datahub/ingestion/run/pipeline.py +6 -0
- datahub/ingestion/source/aws/glue.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -0
- datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
- datahub/ingestion/source/bigquery_v2/queries.py +4 -4
- datahub/ingestion/source/common/subtypes.py +43 -0
- datahub/ingestion/source/dbt/dbt_common.py +1 -1
- datahub/ingestion/source/fivetran/fivetran.py +34 -26
- datahub/ingestion/source/hex/api.py +26 -1
- datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
- datahub/ingestion/source/mock_data/datahub_mock_data.py +11 -15
- datahub/ingestion/source/salesforce.py +6 -3
- datahub/ingestion/source/slack/slack.py +2 -1
- datahub/ingestion/source/snowflake/snowflake_queries.py +1 -0
- datahub/ingestion/source/sql/athena.py +15 -3
- datahub/ingestion/source/sql/mssql/source.py +9 -0
- datahub/ingestion/source/sql/sql_common.py +3 -0
- datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
- datahub/ingestion/source/sql/teradata.py +4 -1
- datahub/ingestion/source/sql/vertica.py +9 -1
- datahub/ingestion/source/tableau/tableau.py +6 -1
- datahub/ingestion/source/unity/source.py +36 -20
- datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
- datahub/metadata/_internal_schema_classes.py +601 -0
- datahub/metadata/_urns/urn_defs.py +112 -0
- datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +4 -0
- datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +27 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +25 -0
- datahub/metadata/schema.avsc +383 -0
- datahub/metadata/schemas/CorpUserSettings.avsc +25 -0
- datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageModuleProperties.avsc +202 -0
- datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
- datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +175 -0
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +25 -0
- datahub/sdk/datajob.py +39 -15
- datahub/specific/dataproduct.py +4 -0
- {acryl_datahub-1.1.0.5rc3.dist-info → acryl_datahub-1.1.0.5rc5.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.0.5rc3.dist-info → acryl_datahub-1.1.0.5rc5.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.1.0.5rc3.dist-info → acryl_datahub-1.1.0.5rc5.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.0.5rc3.dist-info → acryl_datahub-1.1.0.5rc5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "record",
|
|
3
|
+
"Aspect": {
|
|
4
|
+
"name": "dataHubPageModuleKey",
|
|
5
|
+
"keyForEntity": "dataHubPageModule",
|
|
6
|
+
"entityCategory": "core",
|
|
7
|
+
"entityAspects": [
|
|
8
|
+
"dataHubPageModuleProperties"
|
|
9
|
+
]
|
|
10
|
+
},
|
|
11
|
+
"name": "DataHubPageModuleKey",
|
|
12
|
+
"namespace": "com.linkedin.pegasus2avro.metadata.key",
|
|
13
|
+
"fields": [
|
|
14
|
+
{
|
|
15
|
+
"type": "string",
|
|
16
|
+
"name": "id",
|
|
17
|
+
"doc": "Unique id for the module."
|
|
18
|
+
}
|
|
19
|
+
],
|
|
20
|
+
"doc": "Key for a DataHubPageModule"
|
|
21
|
+
}
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "record",
|
|
3
|
+
"Aspect": {
|
|
4
|
+
"name": "dataHubPageModuleProperties"
|
|
5
|
+
},
|
|
6
|
+
"name": "DataHubPageModuleProperties",
|
|
7
|
+
"namespace": "com.linkedin.pegasus2avro.module",
|
|
8
|
+
"fields": [
|
|
9
|
+
{
|
|
10
|
+
"type": "string",
|
|
11
|
+
"name": "name",
|
|
12
|
+
"doc": "The display name of this module"
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
"Searchable": {
|
|
16
|
+
"fieldType": "KEYWORD"
|
|
17
|
+
},
|
|
18
|
+
"type": {
|
|
19
|
+
"type": "enum",
|
|
20
|
+
"symbolDocs": {
|
|
21
|
+
"ASSET_COLLECTION": "A module with a collection of assets",
|
|
22
|
+
"DOMAINS": "Module displaying the top domains",
|
|
23
|
+
"HIERARCHY": "A module displaying a hierarchy to navigate",
|
|
24
|
+
"LINK": "Link type module",
|
|
25
|
+
"OWNED_ASSETS": "Module displaying assets owned by a user",
|
|
26
|
+
"RICH_TEXT": "Module containing rich text to be rendered",
|
|
27
|
+
"SUBSCRIBED_ASSETS": "Module displaying assets subscribed to by a given user"
|
|
28
|
+
},
|
|
29
|
+
"name": "DataHubPageModuleType",
|
|
30
|
+
"namespace": "com.linkedin.pegasus2avro.module",
|
|
31
|
+
"symbols": [
|
|
32
|
+
"LINK",
|
|
33
|
+
"RICH_TEXT",
|
|
34
|
+
"ASSET_COLLECTION",
|
|
35
|
+
"HIERARCHY",
|
|
36
|
+
"OWNED_ASSETS",
|
|
37
|
+
"SUBSCRIBED_ASSETS",
|
|
38
|
+
"DOMAINS"
|
|
39
|
+
],
|
|
40
|
+
"doc": "Enum containing the types of page modules that there are"
|
|
41
|
+
},
|
|
42
|
+
"name": "type",
|
|
43
|
+
"doc": "The type of this module - the purpose it serves"
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
"type": {
|
|
47
|
+
"type": "record",
|
|
48
|
+
"name": "DataHubPageModuleVisibility",
|
|
49
|
+
"namespace": "com.linkedin.pegasus2avro.module",
|
|
50
|
+
"fields": [
|
|
51
|
+
{
|
|
52
|
+
"Searchable": {
|
|
53
|
+
"fieldType": "KEYWORD"
|
|
54
|
+
},
|
|
55
|
+
"type": {
|
|
56
|
+
"type": "enum",
|
|
57
|
+
"symbolDocs": {
|
|
58
|
+
"GLOBAL": "This module is discoverable and can be used by any user on the platform",
|
|
59
|
+
"PERSONAL": "This module is used for individual use only"
|
|
60
|
+
},
|
|
61
|
+
"name": "PageModuleScope",
|
|
62
|
+
"namespace": "com.linkedin.pegasus2avro.module",
|
|
63
|
+
"symbols": [
|
|
64
|
+
"PERSONAL",
|
|
65
|
+
"GLOBAL"
|
|
66
|
+
]
|
|
67
|
+
},
|
|
68
|
+
"name": "scope",
|
|
69
|
+
"doc": "Audit stamp for when and by whom this module was created"
|
|
70
|
+
}
|
|
71
|
+
],
|
|
72
|
+
"doc": "Info about the visibility of this module"
|
|
73
|
+
},
|
|
74
|
+
"name": "visibility",
|
|
75
|
+
"doc": "Info about the visibility of this module"
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
"type": {
|
|
79
|
+
"type": "record",
|
|
80
|
+
"name": "DataHubPageModuleParams",
|
|
81
|
+
"namespace": "com.linkedin.pegasus2avro.module",
|
|
82
|
+
"fields": [
|
|
83
|
+
{
|
|
84
|
+
"type": [
|
|
85
|
+
"null",
|
|
86
|
+
{
|
|
87
|
+
"type": "record",
|
|
88
|
+
"name": "LinkModuleParams",
|
|
89
|
+
"namespace": "com.linkedin.pegasus2avro.module",
|
|
90
|
+
"fields": [
|
|
91
|
+
{
|
|
92
|
+
"java": {
|
|
93
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
94
|
+
},
|
|
95
|
+
"type": "string",
|
|
96
|
+
"name": "linkUrn",
|
|
97
|
+
"Urn": "Urn"
|
|
98
|
+
}
|
|
99
|
+
]
|
|
100
|
+
}
|
|
101
|
+
],
|
|
102
|
+
"name": "linkParams",
|
|
103
|
+
"default": null,
|
|
104
|
+
"doc": "The params required if the module is type LINK"
|
|
105
|
+
},
|
|
106
|
+
{
|
|
107
|
+
"type": [
|
|
108
|
+
"null",
|
|
109
|
+
{
|
|
110
|
+
"type": "record",
|
|
111
|
+
"name": "RichTextModuleParams",
|
|
112
|
+
"namespace": "com.linkedin.pegasus2avro.module",
|
|
113
|
+
"fields": [
|
|
114
|
+
{
|
|
115
|
+
"type": "string",
|
|
116
|
+
"name": "content"
|
|
117
|
+
}
|
|
118
|
+
]
|
|
119
|
+
}
|
|
120
|
+
],
|
|
121
|
+
"name": "richTextParams",
|
|
122
|
+
"default": null,
|
|
123
|
+
"doc": "The params required if the module is type RICH_TEXT"
|
|
124
|
+
}
|
|
125
|
+
],
|
|
126
|
+
"doc": "The specific parameters stored for a module"
|
|
127
|
+
},
|
|
128
|
+
"name": "params",
|
|
129
|
+
"doc": "The specific parameters stored for this module"
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
"Searchable": {
|
|
133
|
+
"/actor": {
|
|
134
|
+
"fieldName": "createdBy",
|
|
135
|
+
"fieldType": "URN"
|
|
136
|
+
},
|
|
137
|
+
"/time": {
|
|
138
|
+
"fieldName": "createdAt",
|
|
139
|
+
"fieldType": "DATETIME"
|
|
140
|
+
}
|
|
141
|
+
},
|
|
142
|
+
"type": {
|
|
143
|
+
"type": "record",
|
|
144
|
+
"name": "AuditStamp",
|
|
145
|
+
"namespace": "com.linkedin.pegasus2avro.common",
|
|
146
|
+
"fields": [
|
|
147
|
+
{
|
|
148
|
+
"type": "long",
|
|
149
|
+
"name": "time",
|
|
150
|
+
"doc": "When did the resource/association/sub-resource move into the specific lifecycle stage represented by this AuditEvent."
|
|
151
|
+
},
|
|
152
|
+
{
|
|
153
|
+
"java": {
|
|
154
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
155
|
+
},
|
|
156
|
+
"type": "string",
|
|
157
|
+
"name": "actor",
|
|
158
|
+
"doc": "The entity (e.g. a member URN) which will be credited for moving the resource/association/sub-resource into the specific lifecycle stage. It is also the one used to authorize the change.",
|
|
159
|
+
"Urn": "Urn"
|
|
160
|
+
},
|
|
161
|
+
{
|
|
162
|
+
"java": {
|
|
163
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
164
|
+
},
|
|
165
|
+
"type": [
|
|
166
|
+
"null",
|
|
167
|
+
"string"
|
|
168
|
+
],
|
|
169
|
+
"name": "impersonator",
|
|
170
|
+
"default": null,
|
|
171
|
+
"doc": "The entity (e.g. a service URN) which performs the change on behalf of the Actor and must be authorized to act as the Actor.",
|
|
172
|
+
"Urn": "Urn"
|
|
173
|
+
},
|
|
174
|
+
{
|
|
175
|
+
"type": [
|
|
176
|
+
"null",
|
|
177
|
+
"string"
|
|
178
|
+
],
|
|
179
|
+
"name": "message",
|
|
180
|
+
"default": null,
|
|
181
|
+
"doc": "Additional context around how DataHub was informed of the particular change. For example: was the change created by an automated process, or manually."
|
|
182
|
+
}
|
|
183
|
+
],
|
|
184
|
+
"doc": "Data captured on a resource/association/sub-resource level giving insight into when that resource/association/sub-resource moved into a particular lifecycle stage, and who acted to move it into that specific lifecycle stage."
|
|
185
|
+
},
|
|
186
|
+
"name": "created",
|
|
187
|
+
"doc": "Audit stamp for when and by whom this template was created"
|
|
188
|
+
},
|
|
189
|
+
{
|
|
190
|
+
"Searchable": {
|
|
191
|
+
"/time": {
|
|
192
|
+
"fieldName": "lastModifiedAt",
|
|
193
|
+
"fieldType": "DATETIME"
|
|
194
|
+
}
|
|
195
|
+
},
|
|
196
|
+
"type": "com.linkedin.pegasus2avro.common.AuditStamp",
|
|
197
|
+
"name": "lastModified",
|
|
198
|
+
"doc": "Audit stamp for when and by whom this template was last updated"
|
|
199
|
+
}
|
|
200
|
+
],
|
|
201
|
+
"doc": "The main properties of a DataHub page module"
|
|
202
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "record",
|
|
3
|
+
"Aspect": {
|
|
4
|
+
"name": "dataHubPageTemplateKey",
|
|
5
|
+
"keyForEntity": "dataHubPageTemplate",
|
|
6
|
+
"entityCategory": "core",
|
|
7
|
+
"entityAspects": [
|
|
8
|
+
"dataHubPageTemplateProperties"
|
|
9
|
+
]
|
|
10
|
+
},
|
|
11
|
+
"name": "DataHubPageTemplateKey",
|
|
12
|
+
"namespace": "com.linkedin.pegasus2avro.metadata.key",
|
|
13
|
+
"fields": [
|
|
14
|
+
{
|
|
15
|
+
"type": "string",
|
|
16
|
+
"name": "id",
|
|
17
|
+
"doc": "Unique id for the template."
|
|
18
|
+
}
|
|
19
|
+
],
|
|
20
|
+
"doc": "Key for a DataHubPageTemplate"
|
|
21
|
+
}
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "record",
|
|
3
|
+
"Aspect": {
|
|
4
|
+
"name": "dataHubPageTemplateProperties"
|
|
5
|
+
},
|
|
6
|
+
"name": "DataHubPageTemplateProperties",
|
|
7
|
+
"namespace": "com.linkedin.pegasus2avro.template",
|
|
8
|
+
"fields": [
|
|
9
|
+
{
|
|
10
|
+
"Relationship": {
|
|
11
|
+
"/*/modules/*": {
|
|
12
|
+
"entityTypes": [
|
|
13
|
+
"dataHubPageModule"
|
|
14
|
+
],
|
|
15
|
+
"name": "ContainedIn"
|
|
16
|
+
}
|
|
17
|
+
},
|
|
18
|
+
"type": {
|
|
19
|
+
"type": "array",
|
|
20
|
+
"items": {
|
|
21
|
+
"type": "record",
|
|
22
|
+
"name": "DataHubPageTemplateRow",
|
|
23
|
+
"namespace": "com.linkedin.pegasus2avro.template",
|
|
24
|
+
"fields": [
|
|
25
|
+
{
|
|
26
|
+
"type": {
|
|
27
|
+
"type": "array",
|
|
28
|
+
"items": "string"
|
|
29
|
+
},
|
|
30
|
+
"name": "modules",
|
|
31
|
+
"doc": "The modules that exist in this template row",
|
|
32
|
+
"Urn": "Urn",
|
|
33
|
+
"urn_is_array": true
|
|
34
|
+
}
|
|
35
|
+
],
|
|
36
|
+
"doc": "A row of modules contained in a template"
|
|
37
|
+
}
|
|
38
|
+
},
|
|
39
|
+
"name": "rows",
|
|
40
|
+
"doc": "The rows of modules contained in this template"
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
"type": {
|
|
44
|
+
"type": "record",
|
|
45
|
+
"name": "DataHubPageTemplateSurface",
|
|
46
|
+
"namespace": "com.linkedin.pegasus2avro.template",
|
|
47
|
+
"fields": [
|
|
48
|
+
{
|
|
49
|
+
"Searchable": {
|
|
50
|
+
"fieldType": "KEYWORD"
|
|
51
|
+
},
|
|
52
|
+
"type": {
|
|
53
|
+
"type": "enum",
|
|
54
|
+
"symbolDocs": {
|
|
55
|
+
"HOME_PAGE": "This template applies to what to display on the home page for users."
|
|
56
|
+
},
|
|
57
|
+
"name": "PageTemplateSurfaceType",
|
|
58
|
+
"namespace": "com.linkedin.pegasus2avro.template",
|
|
59
|
+
"symbols": [
|
|
60
|
+
"HOME_PAGE"
|
|
61
|
+
]
|
|
62
|
+
},
|
|
63
|
+
"name": "surfaceType",
|
|
64
|
+
"doc": "Where exactly is this template being used"
|
|
65
|
+
}
|
|
66
|
+
],
|
|
67
|
+
"doc": "Info about the surface area of the product that this template is deployed in"
|
|
68
|
+
},
|
|
69
|
+
"name": "surface",
|
|
70
|
+
"doc": "Info about the surface area of the product that this template is deployed in"
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
"type": {
|
|
74
|
+
"type": "record",
|
|
75
|
+
"name": "DataHubPageTemplateVisibility",
|
|
76
|
+
"namespace": "com.linkedin.pegasus2avro.template",
|
|
77
|
+
"fields": [
|
|
78
|
+
{
|
|
79
|
+
"Searchable": {
|
|
80
|
+
"fieldType": "KEYWORD"
|
|
81
|
+
},
|
|
82
|
+
"type": {
|
|
83
|
+
"type": "enum",
|
|
84
|
+
"symbolDocs": {
|
|
85
|
+
"GLOBAL": "This template is used across users",
|
|
86
|
+
"PERSONAL": "This template is used for individual use only"
|
|
87
|
+
},
|
|
88
|
+
"name": "PageTemplateScope",
|
|
89
|
+
"namespace": "com.linkedin.pegasus2avro.template",
|
|
90
|
+
"symbols": [
|
|
91
|
+
"PERSONAL",
|
|
92
|
+
"GLOBAL"
|
|
93
|
+
]
|
|
94
|
+
},
|
|
95
|
+
"name": "scope",
|
|
96
|
+
"doc": "The scope of this template and who can use/see it"
|
|
97
|
+
}
|
|
98
|
+
],
|
|
99
|
+
"doc": "Info about the visibility of this template"
|
|
100
|
+
},
|
|
101
|
+
"name": "visibility",
|
|
102
|
+
"doc": "Info about the visibility of this template"
|
|
103
|
+
},
|
|
104
|
+
{
|
|
105
|
+
"Searchable": {
|
|
106
|
+
"/actor": {
|
|
107
|
+
"fieldName": "createdBy",
|
|
108
|
+
"fieldType": "URN"
|
|
109
|
+
},
|
|
110
|
+
"/time": {
|
|
111
|
+
"fieldName": "createdAt",
|
|
112
|
+
"fieldType": "DATETIME"
|
|
113
|
+
}
|
|
114
|
+
},
|
|
115
|
+
"type": {
|
|
116
|
+
"type": "record",
|
|
117
|
+
"name": "AuditStamp",
|
|
118
|
+
"namespace": "com.linkedin.pegasus2avro.common",
|
|
119
|
+
"fields": [
|
|
120
|
+
{
|
|
121
|
+
"type": "long",
|
|
122
|
+
"name": "time",
|
|
123
|
+
"doc": "When did the resource/association/sub-resource move into the specific lifecycle stage represented by this AuditEvent."
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
"java": {
|
|
127
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
128
|
+
},
|
|
129
|
+
"type": "string",
|
|
130
|
+
"name": "actor",
|
|
131
|
+
"doc": "The entity (e.g. a member URN) which will be credited for moving the resource/association/sub-resource into the specific lifecycle stage. It is also the one used to authorize the change.",
|
|
132
|
+
"Urn": "Urn"
|
|
133
|
+
},
|
|
134
|
+
{
|
|
135
|
+
"java": {
|
|
136
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
137
|
+
},
|
|
138
|
+
"type": [
|
|
139
|
+
"null",
|
|
140
|
+
"string"
|
|
141
|
+
],
|
|
142
|
+
"name": "impersonator",
|
|
143
|
+
"default": null,
|
|
144
|
+
"doc": "The entity (e.g. a service URN) which performs the change on behalf of the Actor and must be authorized to act as the Actor.",
|
|
145
|
+
"Urn": "Urn"
|
|
146
|
+
},
|
|
147
|
+
{
|
|
148
|
+
"type": [
|
|
149
|
+
"null",
|
|
150
|
+
"string"
|
|
151
|
+
],
|
|
152
|
+
"name": "message",
|
|
153
|
+
"default": null,
|
|
154
|
+
"doc": "Additional context around how DataHub was informed of the particular change. For example: was the change created by an automated process, or manually."
|
|
155
|
+
}
|
|
156
|
+
],
|
|
157
|
+
"doc": "Data captured on a resource/association/sub-resource level giving insight into when that resource/association/sub-resource moved into a particular lifecycle stage, and who acted to move it into that specific lifecycle stage."
|
|
158
|
+
},
|
|
159
|
+
"name": "created",
|
|
160
|
+
"doc": "Audit stamp for when and by whom this template was created"
|
|
161
|
+
},
|
|
162
|
+
{
|
|
163
|
+
"Searchable": {
|
|
164
|
+
"/time": {
|
|
165
|
+
"fieldName": "lastModifiedAt",
|
|
166
|
+
"fieldType": "DATETIME"
|
|
167
|
+
}
|
|
168
|
+
},
|
|
169
|
+
"type": "com.linkedin.pegasus2avro.common.AuditStamp",
|
|
170
|
+
"name": "lastModified",
|
|
171
|
+
"doc": "Audit stamp for when and by whom this template was last updated"
|
|
172
|
+
}
|
|
173
|
+
],
|
|
174
|
+
"doc": "The main properties of a DataHub page template"
|
|
175
|
+
}
|
|
@@ -282,6 +282,31 @@
|
|
|
282
282
|
"columnPropagationEnabled": true
|
|
283
283
|
},
|
|
284
284
|
"doc": "Settings related to the documentation propagation feature"
|
|
285
|
+
},
|
|
286
|
+
{
|
|
287
|
+
"type": [
|
|
288
|
+
"null",
|
|
289
|
+
{
|
|
290
|
+
"type": "record",
|
|
291
|
+
"name": "GlobalHomePageSettings",
|
|
292
|
+
"namespace": "com.linkedin.pegasus2avro.settings.global",
|
|
293
|
+
"fields": [
|
|
294
|
+
{
|
|
295
|
+
"java": {
|
|
296
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
297
|
+
},
|
|
298
|
+
"type": "string",
|
|
299
|
+
"name": "defaultTemplate",
|
|
300
|
+
"doc": "The urn that will be rendered in the UI by default for all users",
|
|
301
|
+
"Urn": "Urn"
|
|
302
|
+
}
|
|
303
|
+
],
|
|
304
|
+
"doc": "Global settings related to the home page for an instance"
|
|
305
|
+
}
|
|
306
|
+
],
|
|
307
|
+
"name": "homePage",
|
|
308
|
+
"default": null,
|
|
309
|
+
"doc": "Global settings related to the home page for an instance"
|
|
285
310
|
}
|
|
286
311
|
],
|
|
287
312
|
"doc": "DataHub Global platform settings. Careful - these should not be modified by the outside world!"
|
datahub/sdk/datajob.py
CHANGED
|
@@ -6,6 +6,7 @@ from typing import Dict, List, Optional, Type
|
|
|
6
6
|
|
|
7
7
|
from typing_extensions import Self
|
|
8
8
|
|
|
9
|
+
import datahub.emitter.mce_builder as builder
|
|
9
10
|
import datahub.metadata.schema_classes as models
|
|
10
11
|
from datahub.cli.cli_utils import first_non_null
|
|
11
12
|
from datahub.errors import IngestionAttributionWarning
|
|
@@ -64,7 +65,7 @@ class DataJob(
|
|
|
64
65
|
"""Get the URN type for data jobs."""
|
|
65
66
|
return DataJobUrn
|
|
66
67
|
|
|
67
|
-
def __init__(
|
|
68
|
+
def __init__( # noqa: C901
|
|
68
69
|
self,
|
|
69
70
|
*,
|
|
70
71
|
name: str,
|
|
@@ -86,6 +87,7 @@ class DataJob(
|
|
|
86
87
|
domain: Optional[DomainInputType] = None,
|
|
87
88
|
inlets: Optional[List[DatasetUrnOrStr]] = None,
|
|
88
89
|
outlets: Optional[List[DatasetUrnOrStr]] = None,
|
|
90
|
+
fine_grained_lineages: Optional[List[models.FineGrainedLineageClass]] = None,
|
|
89
91
|
structured_properties: Optional[StructuredPropertyInputType] = None,
|
|
90
92
|
extra_aspects: ExtraAspectsType = None,
|
|
91
93
|
):
|
|
@@ -103,12 +105,14 @@ class DataJob(
|
|
|
103
105
|
ValueError: If neither flow nor (flow_urn and platform_instance) are provided
|
|
104
106
|
"""
|
|
105
107
|
if flow is None:
|
|
106
|
-
if flow_urn is None
|
|
108
|
+
if flow_urn is None:
|
|
107
109
|
raise ValueError(
|
|
108
110
|
"You must provide either: 1. a DataFlow object, or 2. a DataFlowUrn (and a platform_instance config if required)"
|
|
109
111
|
)
|
|
110
112
|
flow_urn = DataFlowUrn.from_string(flow_urn)
|
|
111
|
-
if flow_urn.flow_id.startswith(
|
|
113
|
+
if platform_instance and flow_urn.flow_id.startswith(
|
|
114
|
+
f"{platform_instance}."
|
|
115
|
+
):
|
|
112
116
|
flow_name = flow_urn.flow_id[len(platform_instance) + 1 :]
|
|
113
117
|
else:
|
|
114
118
|
flow_name = flow_urn.flow_id
|
|
@@ -133,8 +137,6 @@ class DataJob(
|
|
|
133
137
|
)
|
|
134
138
|
self._setdefault_aspect(job_info)
|
|
135
139
|
self._ensure_datajob_props().flowUrn = str(flow.urn)
|
|
136
|
-
|
|
137
|
-
# Set properties if provided
|
|
138
140
|
if description is not None:
|
|
139
141
|
self.set_description(description)
|
|
140
142
|
if external_url is not None:
|
|
@@ -145,8 +147,6 @@ class DataJob(
|
|
|
145
147
|
self.set_created(created)
|
|
146
148
|
if last_modified is not None:
|
|
147
149
|
self.set_last_modified(last_modified)
|
|
148
|
-
|
|
149
|
-
# Set standard aspects
|
|
150
150
|
if subtype is not None:
|
|
151
151
|
self.set_subtype(subtype)
|
|
152
152
|
if owners is not None:
|
|
@@ -159,13 +159,19 @@ class DataJob(
|
|
|
159
159
|
self.set_terms(terms)
|
|
160
160
|
if domain is not None:
|
|
161
161
|
self.set_domain(domain)
|
|
162
|
+
if structured_properties is not None:
|
|
163
|
+
for key, value in structured_properties.items():
|
|
164
|
+
self.set_structured_property(property_urn=key, values=value)
|
|
162
165
|
if inlets is not None:
|
|
163
166
|
self.set_inlets(inlets)
|
|
164
167
|
if outlets is not None:
|
|
165
168
|
self.set_outlets(outlets)
|
|
166
|
-
if
|
|
167
|
-
|
|
168
|
-
|
|
169
|
+
if fine_grained_lineages is not None:
|
|
170
|
+
self.set_fine_grained_lineages(fine_grained_lineages)
|
|
171
|
+
|
|
172
|
+
if self.flow_urn.cluster.upper() in builder.ALL_ENV_TYPES:
|
|
173
|
+
env = self.flow_urn.cluster.upper()
|
|
174
|
+
self._ensure_datajob_props().env = env
|
|
169
175
|
|
|
170
176
|
@classmethod
|
|
171
177
|
def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
|
|
@@ -201,9 +207,7 @@ class DataJob(
|
|
|
201
207
|
) -> Optional[models.DataJobInputOutputClass]:
|
|
202
208
|
return self._get_aspect(models.DataJobInputOutputClass)
|
|
203
209
|
|
|
204
|
-
def _ensure_datajob_inputoutput_props(
|
|
205
|
-
self,
|
|
206
|
-
) -> models.DataJobInputOutputClass:
|
|
210
|
+
def _ensure_datajob_inputoutput_props(self) -> models.DataJobInputOutputClass:
|
|
207
211
|
return self._setdefault_aspect(
|
|
208
212
|
models.DataJobInputOutputClass(inputDatasets=[], outputDatasets=[])
|
|
209
213
|
)
|
|
@@ -307,8 +311,6 @@ class DataJob(
|
|
|
307
311
|
browse_path.append(
|
|
308
312
|
models.BrowsePathEntryClass(id=entry.id, urn=entry.urn)
|
|
309
313
|
)
|
|
310
|
-
|
|
311
|
-
# Add the job itself to the path
|
|
312
314
|
browse_path.append(models.BrowsePathEntryClass(id=flow.name, urn=str(flow.urn)))
|
|
313
315
|
# Set the browse path aspect
|
|
314
316
|
self._set_aspect(models.BrowsePathsV2Class(path=browse_path))
|
|
@@ -341,3 +343,25 @@ class DataJob(
|
|
|
341
343
|
self._ensure_datajob_inputoutput_props().outputDatasets.append(
|
|
342
344
|
str(outlet_urn)
|
|
343
345
|
)
|
|
346
|
+
|
|
347
|
+
@property
|
|
348
|
+
def fine_grained_lineages(self) -> List[models.FineGrainedLineageClass]:
|
|
349
|
+
io_aspect = self._get_datajob_inputoutput_props()
|
|
350
|
+
return (
|
|
351
|
+
io_aspect.fineGrainedLineages
|
|
352
|
+
if io_aspect and io_aspect.fineGrainedLineages
|
|
353
|
+
else []
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
def set_fine_grained_lineages(
|
|
357
|
+
self, lineages: List[models.FineGrainedLineageClass]
|
|
358
|
+
) -> None:
|
|
359
|
+
io_aspect = self._ensure_datajob_inputoutput_props()
|
|
360
|
+
if io_aspect.fineGrainedLineages is None:
|
|
361
|
+
io_aspect.fineGrainedLineages = []
|
|
362
|
+
io_aspect.fineGrainedLineages.extend(lineages)
|
|
363
|
+
|
|
364
|
+
@property
|
|
365
|
+
def env(self) -> Optional[str]:
|
|
366
|
+
"""Get the environment of the data job."""
|
|
367
|
+
return str(self._ensure_datajob_props().env)
|
datahub/specific/dataproduct.py
CHANGED
|
@@ -9,6 +9,9 @@ from datahub.metadata.schema_classes import (
|
|
|
9
9
|
)
|
|
10
10
|
from datahub.specific.aspect_helpers.custom_properties import HasCustomPropertiesPatch
|
|
11
11
|
from datahub.specific.aspect_helpers.ownership import HasOwnershipPatch
|
|
12
|
+
from datahub.specific.aspect_helpers.structured_properties import (
|
|
13
|
+
HasStructuredPropertiesPatch,
|
|
14
|
+
)
|
|
12
15
|
from datahub.specific.aspect_helpers.tags import HasTagsPatch
|
|
13
16
|
from datahub.specific.aspect_helpers.terms import HasTermsPatch
|
|
14
17
|
|
|
@@ -16,6 +19,7 @@ from datahub.specific.aspect_helpers.terms import HasTermsPatch
|
|
|
16
19
|
class DataProductPatchBuilder(
|
|
17
20
|
HasOwnershipPatch,
|
|
18
21
|
HasCustomPropertiesPatch,
|
|
22
|
+
HasStructuredPropertiesPatch,
|
|
19
23
|
HasTagsPatch,
|
|
20
24
|
HasTermsPatch,
|
|
21
25
|
MetadataPatchProposal,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|