acryl-datahub 1.2.0.8rc4__py3-none-any.whl → 1.2.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.8rc4.dist-info → acryl_datahub-1.2.0.9.dist-info}/METADATA +2646 -2646
- {acryl_datahub-1.2.0.8rc4.dist-info → acryl_datahub-1.2.0.9.dist-info}/RECORD +26 -26
- datahub/_version.py +1 -1
- datahub/ingestion/api/sink.py +27 -2
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -1
- datahub/ingestion/run/pipeline.py +13 -11
- datahub/ingestion/sink/datahub_kafka.py +1 -0
- datahub/ingestion/sink/datahub_rest.py +4 -0
- datahub/ingestion/sink/file.py +1 -0
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +11 -0
- datahub/ingestion/source/fivetran/config.py +1 -0
- datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
- datahub/ingestion/source/sql/sql_types.py +22 -0
- datahub/metadata/_internal_schema_classes.py +41 -1
- datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
- datahub/metadata/schema.avsc +38 -1
- datahub/metadata/schemas/Actors.avsc +38 -1
- datahub/sdk/_shared.py +7 -5
- datahub/sdk/chart.py +3 -3
- datahub/sdk/dashboard.py +7 -7
- datahub/sdk/dataset.py +4 -0
- {acryl_datahub-1.2.0.8rc4.dist-info → acryl_datahub-1.2.0.9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.8rc4.dist-info → acryl_datahub-1.2.0.9.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.2.0.8rc4.dist-info → acryl_datahub-1.2.0.9.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.8rc4.dist-info → acryl_datahub-1.2.0.9.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
import time
|
|
3
2
|
from dataclasses import dataclass
|
|
4
|
-
from typing import
|
|
3
|
+
from typing import Dict, Iterable, List, Optional, Tuple
|
|
5
4
|
|
|
6
5
|
import pandas as pd
|
|
7
6
|
from neo4j import GraphDatabase
|
|
@@ -11,11 +10,6 @@ from datahub.configuration.source_common import (
|
|
|
11
10
|
EnvConfigMixin,
|
|
12
11
|
PlatformInstanceConfigMixin,
|
|
13
12
|
)
|
|
14
|
-
from datahub.emitter.mce_builder import (
|
|
15
|
-
make_data_platform_urn,
|
|
16
|
-
make_dataset_urn_with_platform_instance,
|
|
17
|
-
)
|
|
18
|
-
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
19
13
|
from datahub.ingestion.api.common import PipelineContext
|
|
20
14
|
from datahub.ingestion.api.decorators import (
|
|
21
15
|
SupportStatus,
|
|
@@ -28,7 +22,6 @@ from datahub.ingestion.api.source import (
|
|
|
28
22
|
MetadataWorkUnitProcessor,
|
|
29
23
|
SourceCapability,
|
|
30
24
|
)
|
|
31
|
-
from datahub.ingestion.api.source_helpers import auto_workunit
|
|
32
25
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
33
26
|
from datahub.ingestion.source.common.subtypes import DatasetSubTypes
|
|
34
27
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
@@ -40,36 +33,14 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
|
40
33
|
StatefulIngestionReport,
|
|
41
34
|
StatefulIngestionSourceBase,
|
|
42
35
|
)
|
|
43
|
-
from datahub.
|
|
44
|
-
from datahub.metadata.schema_classes import (
|
|
45
|
-
AuditStampClass,
|
|
46
|
-
BooleanTypeClass,
|
|
47
|
-
DatasetPropertiesClass,
|
|
48
|
-
DateTypeClass,
|
|
49
|
-
NullTypeClass,
|
|
50
|
-
NumberTypeClass,
|
|
51
|
-
OtherSchemaClass,
|
|
52
|
-
SchemaFieldClass,
|
|
53
|
-
SchemaMetadataClass,
|
|
54
|
-
StringTypeClass,
|
|
55
|
-
SubTypesClass,
|
|
56
|
-
UnionTypeClass,
|
|
57
|
-
)
|
|
36
|
+
from datahub.sdk.dataset import Dataset
|
|
58
37
|
|
|
59
38
|
log = logging.getLogger(__name__)
|
|
60
39
|
logging.basicConfig(level=logging.INFO)
|
|
61
40
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
"integer": NumberTypeClass,
|
|
66
|
-
"local_date_time": DateTypeClass,
|
|
67
|
-
"float": NumberTypeClass,
|
|
68
|
-
"string": StringTypeClass,
|
|
69
|
-
"date": DateTypeClass,
|
|
70
|
-
"node": StringTypeClass,
|
|
71
|
-
"relationship": StringTypeClass,
|
|
72
|
-
}
|
|
41
|
+
# Neo4j object types
|
|
42
|
+
_NODE = "node"
|
|
43
|
+
_RELATIONSHIP = "relationship"
|
|
73
44
|
|
|
74
45
|
|
|
75
46
|
class Neo4jConfig(
|
|
@@ -78,7 +49,6 @@ class Neo4jConfig(
|
|
|
78
49
|
username: str = Field(description="Neo4j Username")
|
|
79
50
|
password: str = Field(description="Neo4j Password")
|
|
80
51
|
uri: str = Field(description="The URI for the Neo4j server")
|
|
81
|
-
env: str = Field(description="Neo4j env")
|
|
82
52
|
|
|
83
53
|
stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = None
|
|
84
54
|
|
|
@@ -96,8 +66,6 @@ class Neo4jSourceReport(StatefulIngestionReport):
|
|
|
96
66
|
)
|
|
97
67
|
@support_status(SupportStatus.CERTIFIED)
|
|
98
68
|
class Neo4jSource(StatefulIngestionSourceBase):
|
|
99
|
-
NODE = "node"
|
|
100
|
-
RELATIONSHIP = "relationship"
|
|
101
69
|
config: Neo4jConfig
|
|
102
70
|
report: Neo4jSourceReport
|
|
103
71
|
|
|
@@ -113,78 +81,59 @@ class Neo4jSource(StatefulIngestionSourceBase):
|
|
|
113
81
|
config = Neo4jConfig.parse_obj(config_dict)
|
|
114
82
|
return cls(config, ctx)
|
|
115
83
|
|
|
116
|
-
def
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
nativeDataType=col_type,
|
|
131
|
-
description=col_type.upper()
|
|
132
|
-
if col_type in (self.NODE, self.RELATIONSHIP)
|
|
133
|
-
else col_type,
|
|
134
|
-
lastModified=AuditStampClass(
|
|
135
|
-
time=round(time.time() * 1000), actor="urn:li:corpuser:ingestion"
|
|
136
|
-
),
|
|
84
|
+
def create_schema_field_tuple(
|
|
85
|
+
self, col_name: str, col_type: str, obj_type: Optional[str]
|
|
86
|
+
) -> Tuple[str, str, str]:
|
|
87
|
+
"""Convert Neo4j property to (field_name, field_type, description) tuple."""
|
|
88
|
+
# Special case: when a node has a relationship-typed property, treat it as a node reference
|
|
89
|
+
# This ensures relationship properties within nodes are described as "NODE" rather than "RELATIONSHIP"
|
|
90
|
+
column_type = (
|
|
91
|
+
_NODE if obj_type == _NODE and col_type == _RELATIONSHIP else col_type
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
description = (
|
|
95
|
+
column_type.upper()
|
|
96
|
+
if column_type in (_NODE, _RELATIONSHIP)
|
|
97
|
+
else column_type
|
|
137
98
|
)
|
|
138
99
|
|
|
139
|
-
|
|
100
|
+
return (col_name, column_type, description)
|
|
101
|
+
|
|
102
|
+
def get_subtype_from_obj_type(self, obj_type: str) -> str:
|
|
103
|
+
"""Map Neo4j object type to DataHub subtype."""
|
|
104
|
+
if obj_type == _NODE:
|
|
105
|
+
return DatasetSubTypes.NEO4J_NODE
|
|
106
|
+
elif obj_type == _RELATIONSHIP:
|
|
107
|
+
return DatasetSubTypes.NEO4J_RELATIONSHIP
|
|
108
|
+
return DatasetSubTypes.NEO4J_NODE # default fallback
|
|
109
|
+
|
|
110
|
+
def create_neo4j_dataset(
|
|
140
111
|
self,
|
|
141
112
|
dataset: str,
|
|
113
|
+
columns: list,
|
|
114
|
+
obj_type: Optional[str] = None,
|
|
142
115
|
description: Optional[str] = None,
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
dataset_properties = DatasetPropertiesClass(
|
|
146
|
-
description=description,
|
|
147
|
-
customProperties=custom_properties,
|
|
148
|
-
)
|
|
149
|
-
yield MetadataChangeProposalWrapper(
|
|
150
|
-
entityUrn=make_dataset_urn_with_platform_instance(
|
|
151
|
-
platform=self.platform,
|
|
152
|
-
name=dataset,
|
|
153
|
-
platform_instance=self.config.platform_instance,
|
|
154
|
-
env=self.config.env,
|
|
155
|
-
),
|
|
156
|
-
aspect=dataset_properties,
|
|
157
|
-
).as_workunit()
|
|
158
|
-
|
|
159
|
-
def generate_neo4j_object(
|
|
160
|
-
self, dataset: str, columns: list, obj_type: Optional[str] = None
|
|
161
|
-
) -> Optional[MetadataChangeProposalWrapper]:
|
|
116
|
+
) -> Optional[Dataset]:
|
|
117
|
+
"""Create Dataset entity with Neo4j schema and metadata."""
|
|
162
118
|
try:
|
|
163
|
-
|
|
164
|
-
self.
|
|
119
|
+
schema_fields = [
|
|
120
|
+
self.create_schema_field_tuple(
|
|
121
|
+
col_name=key, col_type=value.lower(), obj_type=obj_type
|
|
122
|
+
)
|
|
165
123
|
for d in columns
|
|
166
124
|
for key, value in d.items()
|
|
167
125
|
]
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
platform=make_data_platform_urn(self.platform),
|
|
178
|
-
version=0,
|
|
179
|
-
hash="",
|
|
180
|
-
platformSchema=OtherSchemaClass(rawSchema=""),
|
|
181
|
-
lastModified=AuditStampClass(
|
|
182
|
-
time=round(time.time() * 1000),
|
|
183
|
-
actor="urn:li:corpuser:ingestion",
|
|
184
|
-
),
|
|
185
|
-
fields=fields,
|
|
186
|
-
),
|
|
126
|
+
|
|
127
|
+
return Dataset(
|
|
128
|
+
platform=self.platform,
|
|
129
|
+
name=dataset,
|
|
130
|
+
platform_instance=self.config.platform_instance,
|
|
131
|
+
env=self.config.env,
|
|
132
|
+
schema=schema_fields,
|
|
133
|
+
subtype=self.get_subtype_from_obj_type(obj_type or _NODE),
|
|
134
|
+
description=description,
|
|
187
135
|
)
|
|
136
|
+
|
|
188
137
|
except Exception as e:
|
|
189
138
|
log.error(e)
|
|
190
139
|
self.report.report_failure(
|
|
@@ -199,21 +148,24 @@ class Neo4jSource(StatefulIngestionSourceBase):
|
|
|
199
148
|
self.config.uri, auth=(self.config.username, self.config.password)
|
|
200
149
|
)
|
|
201
150
|
"""
|
|
202
|
-
This process retrieves the metadata for Neo4j objects using an APOC query,
|
|
203
|
-
with two columns: key and value. The key represents
|
|
204
|
-
corresponding metadata.
|
|
151
|
+
This process retrieves the metadata for Neo4j objects using an APOC query,
|
|
152
|
+
which returns a dictionary with two columns: key and value. The key represents
|
|
153
|
+
the Neo4j object, while the value contains the corresponding metadata.
|
|
205
154
|
|
|
206
|
-
When data is returned from Neo4j, much of the relationship metadata is stored
|
|
207
|
-
metadata. Consequently, the objects are organized
|
|
208
|
-
relationships.
|
|
155
|
+
When data is returned from Neo4j, much of the relationship metadata is stored
|
|
156
|
+
with the relevant node's metadata. Consequently, the objects are organized
|
|
157
|
+
into two separate dataframes: one for nodes and one for relationships.
|
|
209
158
|
|
|
210
|
-
In the node dataframe, several fields are extracted and added as new columns.
|
|
211
|
-
dataframe, certain fields are parsed out,
|
|
159
|
+
In the node dataframe, several fields are extracted and added as new columns.
|
|
160
|
+
Similarly, in the relationship dataframe, certain fields are parsed out,
|
|
161
|
+
while others require metadata from the nodes dataframe.
|
|
212
162
|
|
|
213
|
-
Once the data is parsed and these two dataframes are created, we combine
|
|
214
|
-
single dataframe, which will be used to
|
|
163
|
+
Once the data is parsed and these two dataframes are created, we combine
|
|
164
|
+
a subset of their columns into a single dataframe, which will be used to
|
|
165
|
+
create the DataHub objects.
|
|
215
166
|
|
|
216
|
-
See the docs for examples of metadata:
|
|
167
|
+
See the docs for examples of metadata:
|
|
168
|
+
metadata-ingestion/docs/sources/neo4j/neo4j.md
|
|
217
169
|
"""
|
|
218
170
|
try:
|
|
219
171
|
log.info(f"{query}")
|
|
@@ -238,7 +190,7 @@ class Neo4jSource(StatefulIngestionSourceBase):
|
|
|
238
190
|
return None
|
|
239
191
|
|
|
240
192
|
def process_nodes(self, data: list) -> pd.DataFrame:
|
|
241
|
-
nodes = [record for record in data if record["value"]["type"] ==
|
|
193
|
+
nodes = [record for record in data if record["value"]["type"] == _NODE]
|
|
242
194
|
node_df = pd.DataFrame(
|
|
243
195
|
nodes,
|
|
244
196
|
columns=["key", "value"],
|
|
@@ -261,9 +213,7 @@ class Neo4jSource(StatefulIngestionSourceBase):
|
|
|
261
213
|
return node_df
|
|
262
214
|
|
|
263
215
|
def process_relationships(self, data: list, node_df: pd.DataFrame) -> pd.DataFrame:
|
|
264
|
-
rels = [
|
|
265
|
-
record for record in data if record["value"]["type"] == self.RELATIONSHIP
|
|
266
|
-
]
|
|
216
|
+
rels = [record for record in data if record["value"]["type"] == _RELATIONSHIP]
|
|
267
217
|
rel_df = pd.DataFrame(rels, columns=["key", "value"])
|
|
268
218
|
rel_df["obj_type"] = rel_df["value"].apply(
|
|
269
219
|
lambda record: self.get_obj_type(record)
|
|
@@ -331,51 +281,40 @@ class Neo4jSource(StatefulIngestionSourceBase):
|
|
|
331
281
|
]
|
|
332
282
|
|
|
333
283
|
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
334
|
-
|
|
335
|
-
"CALL apoc.meta.schema() YIELD value UNWIND keys(value) AS key
|
|
284
|
+
query = (
|
|
285
|
+
"CALL apoc.meta.schema() YIELD value UNWIND keys(value) AS key "
|
|
286
|
+
"RETURN key, value[key] AS value;"
|
|
336
287
|
)
|
|
288
|
+
df = self.get_neo4j_metadata(query)
|
|
337
289
|
if df is None:
|
|
338
290
|
log.warning("No metadata retrieved from Neo4j")
|
|
339
291
|
return
|
|
340
292
|
|
|
341
293
|
for _, row in df.iterrows():
|
|
342
294
|
try:
|
|
343
|
-
|
|
344
|
-
columns=row["property_data_types"],
|
|
345
|
-
dataset=row["key"],
|
|
346
|
-
)
|
|
347
|
-
if neo4j_obj:
|
|
348
|
-
yield from auto_workunit([neo4j_obj])
|
|
349
|
-
|
|
350
|
-
yield MetadataChangeProposalWrapper(
|
|
351
|
-
entityUrn=make_dataset_urn_with_platform_instance(
|
|
352
|
-
platform=self.platform,
|
|
353
|
-
name=row["key"],
|
|
354
|
-
platform_instance=self.config.platform_instance,
|
|
355
|
-
env=self.config.env,
|
|
356
|
-
),
|
|
357
|
-
aspect=SubTypesClass(
|
|
358
|
-
typeNames=[
|
|
359
|
-
DatasetSubTypes.NEO4J_NODE
|
|
360
|
-
if row["obj_type"] == self.NODE
|
|
361
|
-
else DatasetSubTypes.NEO4J_RELATIONSHIP
|
|
362
|
-
]
|
|
363
|
-
),
|
|
364
|
-
).as_workunit()
|
|
365
|
-
|
|
366
|
-
yield from self.add_properties(
|
|
295
|
+
dataset_obj = self.create_neo4j_dataset(
|
|
367
296
|
dataset=row["key"],
|
|
368
|
-
|
|
297
|
+
columns=row["property_data_types"],
|
|
298
|
+
obj_type=row["obj_type"],
|
|
369
299
|
description=row["description"],
|
|
370
300
|
)
|
|
371
301
|
|
|
302
|
+
if dataset_obj:
|
|
303
|
+
yield from dataset_obj.as_workunits()
|
|
304
|
+
self.report.obj_created += 1
|
|
305
|
+
else:
|
|
306
|
+
log.warning(f"Failed to create dataset object for {row['key']}")
|
|
307
|
+
self.report.obj_failures += 1
|
|
308
|
+
|
|
372
309
|
except Exception as e:
|
|
373
|
-
log.
|
|
374
|
-
self.report.
|
|
375
|
-
|
|
310
|
+
log.warning(f"Failed to process row {row['key']}: {str(e)}")
|
|
311
|
+
self.report.report_warning(
|
|
312
|
+
title="Error processing Neo4j metadata",
|
|
313
|
+
message="Some entities will be missed",
|
|
376
314
|
context=row["key"],
|
|
377
315
|
exc=e,
|
|
378
316
|
)
|
|
317
|
+
self.report.obj_failures += 1
|
|
379
318
|
|
|
380
319
|
def get_report(self) -> "Neo4jSourceReport":
|
|
381
320
|
return self.report
|
|
@@ -459,6 +459,25 @@ VERTICA_SQL_TYPES_MAP: Dict[str, Any] = {
|
|
|
459
459
|
"uuid": StringType,
|
|
460
460
|
}
|
|
461
461
|
|
|
462
|
+
# Neo4j property types mapping
|
|
463
|
+
# https://neo4j.com/docs/cypher-manual/current/values-and-types/property-structural-constructed/
|
|
464
|
+
NEO4J_TYPES_MAP: Dict[str, Any] = {
|
|
465
|
+
"boolean": BooleanType,
|
|
466
|
+
"date": DateType,
|
|
467
|
+
"duration": TimeType, # Neo4j duration represents a temporal amount
|
|
468
|
+
"float": NumberType,
|
|
469
|
+
"integer": NumberType,
|
|
470
|
+
"list": ArrayType,
|
|
471
|
+
"local_date_time": TimeType,
|
|
472
|
+
"local_time": TimeType,
|
|
473
|
+
"point": StringType, # Neo4j point - spatial coordinate, represented as string
|
|
474
|
+
"string": StringType,
|
|
475
|
+
"zoned_date_time": TimeType,
|
|
476
|
+
"zoned_time": TimeType,
|
|
477
|
+
"node": StringType, # Neo4j object type
|
|
478
|
+
"relationship": StringType, # Neo4j object type
|
|
479
|
+
}
|
|
480
|
+
|
|
462
481
|
|
|
463
482
|
_merged_mapping = {
|
|
464
483
|
"boolean": BooleanType,
|
|
@@ -478,6 +497,7 @@ _merged_mapping = {
|
|
|
478
497
|
**TRINO_SQL_TYPES_MAP,
|
|
479
498
|
**ATHENA_SQL_TYPES_MAP,
|
|
480
499
|
**VERTICA_SQL_TYPES_MAP,
|
|
500
|
+
**NEO4J_TYPES_MAP,
|
|
481
501
|
}
|
|
482
502
|
|
|
483
503
|
|
|
@@ -487,6 +507,8 @@ def resolve_sql_type(
|
|
|
487
507
|
) -> Optional[DATAHUB_FIELD_TYPE]:
|
|
488
508
|
# In theory, we should use the platform-specific mapping where available.
|
|
489
509
|
# However, the types don't ever conflict, so the merged mapping is fine.
|
|
510
|
+
# Wrong assumption - there ARE conflicts as the test_type_conflicts_across_platforms in test_sql_types.py shows.
|
|
511
|
+
# TODO: revisit this and make platform-specific mappings work.
|
|
490
512
|
TypeClass: Optional[Type[DATAHUB_FIELD_TYPE]] = (
|
|
491
513
|
_merged_mapping.get(column_type) if column_type else None
|
|
492
514
|
)
|
|
@@ -23033,7 +23033,7 @@ class VersionBasedRetentionClass(DictWrapper):
|
|
|
23033
23033
|
|
|
23034
23034
|
|
|
23035
23035
|
class ActorsClass(_Aspect):
|
|
23036
|
-
"""Provisioned users of a role"""
|
|
23036
|
+
"""Provisioned users and groups of a role"""
|
|
23037
23037
|
|
|
23038
23038
|
|
|
23039
23039
|
ASPECT_NAME = 'actors'
|
|
@@ -23042,13 +23042,16 @@ class ActorsClass(_Aspect):
|
|
|
23042
23042
|
|
|
23043
23043
|
def __init__(self,
|
|
23044
23044
|
users: Union[None, List["RoleUserClass"]]=None,
|
|
23045
|
+
groups: Union[None, List["RoleGroupClass"]]=None,
|
|
23045
23046
|
):
|
|
23046
23047
|
super().__init__()
|
|
23047
23048
|
|
|
23048
23049
|
self.users = users
|
|
23050
|
+
self.groups = groups
|
|
23049
23051
|
|
|
23050
23052
|
def _restore_defaults(self) -> None:
|
|
23051
23053
|
self.users = self.RECORD_SCHEMA.fields_dict["users"].default
|
|
23054
|
+
self.groups = self.RECORD_SCHEMA.fields_dict["groups"].default
|
|
23052
23055
|
|
|
23053
23056
|
|
|
23054
23057
|
@property
|
|
@@ -23061,6 +23064,41 @@ class ActorsClass(_Aspect):
|
|
|
23061
23064
|
self._inner_dict['users'] = value
|
|
23062
23065
|
|
|
23063
23066
|
|
|
23067
|
+
@property
|
|
23068
|
+
def groups(self) -> Union[None, List["RoleGroupClass"]]:
|
|
23069
|
+
"""List of provisioned groups of a role"""
|
|
23070
|
+
return self._inner_dict.get('groups') # type: ignore
|
|
23071
|
+
|
|
23072
|
+
@groups.setter
|
|
23073
|
+
def groups(self, value: Union[None, List["RoleGroupClass"]]) -> None:
|
|
23074
|
+
self._inner_dict['groups'] = value
|
|
23075
|
+
|
|
23076
|
+
|
|
23077
|
+
class RoleGroupClass(DictWrapper):
|
|
23078
|
+
"""Provisioned groups of a role"""
|
|
23079
|
+
|
|
23080
|
+
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.role.RoleGroup")
|
|
23081
|
+
def __init__(self,
|
|
23082
|
+
group: str,
|
|
23083
|
+
):
|
|
23084
|
+
super().__init__()
|
|
23085
|
+
|
|
23086
|
+
self.group = group
|
|
23087
|
+
|
|
23088
|
+
def _restore_defaults(self) -> None:
|
|
23089
|
+
self.group = str()
|
|
23090
|
+
|
|
23091
|
+
|
|
23092
|
+
@property
|
|
23093
|
+
def group(self) -> str:
|
|
23094
|
+
"""Link provisioned corp group for a role"""
|
|
23095
|
+
return self._inner_dict.get('group') # type: ignore
|
|
23096
|
+
|
|
23097
|
+
@group.setter
|
|
23098
|
+
def group(self, value: str) -> None:
|
|
23099
|
+
self._inner_dict['group'] = value
|
|
23100
|
+
|
|
23101
|
+
|
|
23064
23102
|
class RolePropertiesClass(_Aspect):
|
|
23065
23103
|
"""Information about a ExternalRoleProperties"""
|
|
23066
23104
|
|
|
@@ -27551,6 +27589,7 @@ __SCHEMA_TYPES = {
|
|
|
27551
27589
|
'com.linkedin.pegasus2avro.retention.TimeBasedRetention': TimeBasedRetentionClass,
|
|
27552
27590
|
'com.linkedin.pegasus2avro.retention.VersionBasedRetention': VersionBasedRetentionClass,
|
|
27553
27591
|
'com.linkedin.pegasus2avro.role.Actors': ActorsClass,
|
|
27592
|
+
'com.linkedin.pegasus2avro.role.RoleGroup': RoleGroupClass,
|
|
27554
27593
|
'com.linkedin.pegasus2avro.role.RoleProperties': RolePropertiesClass,
|
|
27555
27594
|
'com.linkedin.pegasus2avro.role.RoleUser': RoleUserClass,
|
|
27556
27595
|
'com.linkedin.pegasus2avro.schema.ArrayType': ArrayTypeClass,
|
|
@@ -28067,6 +28106,7 @@ __SCHEMA_TYPES = {
|
|
|
28067
28106
|
'TimeBasedRetention': TimeBasedRetentionClass,
|
|
28068
28107
|
'VersionBasedRetention': VersionBasedRetentionClass,
|
|
28069
28108
|
'Actors': ActorsClass,
|
|
28109
|
+
'RoleGroup': RoleGroupClass,
|
|
28070
28110
|
'RoleProperties': RolePropertiesClass,
|
|
28071
28111
|
'RoleUser': RoleUserClass,
|
|
28072
28112
|
'ArrayType': ArrayTypeClass,
|
|
@@ -8,11 +8,13 @@
|
|
|
8
8
|
# fmt: off
|
|
9
9
|
# isort: skip_file
|
|
10
10
|
from .....schema_classes import ActorsClass
|
|
11
|
+
from .....schema_classes import RoleGroupClass
|
|
11
12
|
from .....schema_classes import RolePropertiesClass
|
|
12
13
|
from .....schema_classes import RoleUserClass
|
|
13
14
|
|
|
14
15
|
|
|
15
16
|
Actors = ActorsClass
|
|
17
|
+
RoleGroup = RoleGroupClass
|
|
16
18
|
RoleProperties = RolePropertiesClass
|
|
17
19
|
RoleUser = RoleUserClass
|
|
18
20
|
|
datahub/metadata/schema.avsc
CHANGED
|
@@ -3734,9 +3734,46 @@
|
|
|
3734
3734
|
"name": "users",
|
|
3735
3735
|
"default": null,
|
|
3736
3736
|
"doc": "List of provisioned users of a role"
|
|
3737
|
+
},
|
|
3738
|
+
{
|
|
3739
|
+
"type": [
|
|
3740
|
+
"null",
|
|
3741
|
+
{
|
|
3742
|
+
"type": "array",
|
|
3743
|
+
"items": {
|
|
3744
|
+
"type": "record",
|
|
3745
|
+
"name": "RoleGroup",
|
|
3746
|
+
"namespace": "com.linkedin.pegasus2avro.role",
|
|
3747
|
+
"fields": [
|
|
3748
|
+
{
|
|
3749
|
+
"Relationship": {
|
|
3750
|
+
"entityTypes": [
|
|
3751
|
+
"corpGroup"
|
|
3752
|
+
],
|
|
3753
|
+
"name": "Has"
|
|
3754
|
+
},
|
|
3755
|
+
"java": {
|
|
3756
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
3757
|
+
},
|
|
3758
|
+
"Urn": "Urn",
|
|
3759
|
+
"entityTypes": [
|
|
3760
|
+
"corpGroup"
|
|
3761
|
+
],
|
|
3762
|
+
"type": "string",
|
|
3763
|
+
"name": "group",
|
|
3764
|
+
"doc": "Link provisioned corp group for a role"
|
|
3765
|
+
}
|
|
3766
|
+
],
|
|
3767
|
+
"doc": "Provisioned groups of a role"
|
|
3768
|
+
}
|
|
3769
|
+
}
|
|
3770
|
+
],
|
|
3771
|
+
"name": "groups",
|
|
3772
|
+
"default": null,
|
|
3773
|
+
"doc": "List of provisioned groups of a role"
|
|
3737
3774
|
}
|
|
3738
3775
|
],
|
|
3739
|
-
"doc": "Provisioned users of a role"
|
|
3776
|
+
"doc": "Provisioned users and groups of a role"
|
|
3740
3777
|
},
|
|
3741
3778
|
{
|
|
3742
3779
|
"type": "record",
|
|
@@ -42,7 +42,44 @@
|
|
|
42
42
|
"name": "users",
|
|
43
43
|
"default": null,
|
|
44
44
|
"doc": "List of provisioned users of a role"
|
|
45
|
+
},
|
|
46
|
+
{
|
|
47
|
+
"type": [
|
|
48
|
+
"null",
|
|
49
|
+
{
|
|
50
|
+
"type": "array",
|
|
51
|
+
"items": {
|
|
52
|
+
"type": "record",
|
|
53
|
+
"name": "RoleGroup",
|
|
54
|
+
"namespace": "com.linkedin.pegasus2avro.role",
|
|
55
|
+
"fields": [
|
|
56
|
+
{
|
|
57
|
+
"Relationship": {
|
|
58
|
+
"entityTypes": [
|
|
59
|
+
"corpGroup"
|
|
60
|
+
],
|
|
61
|
+
"name": "Has"
|
|
62
|
+
},
|
|
63
|
+
"java": {
|
|
64
|
+
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
65
|
+
},
|
|
66
|
+
"type": "string",
|
|
67
|
+
"name": "group",
|
|
68
|
+
"doc": "Link provisioned corp group for a role",
|
|
69
|
+
"Urn": "Urn",
|
|
70
|
+
"entityTypes": [
|
|
71
|
+
"corpGroup"
|
|
72
|
+
]
|
|
73
|
+
}
|
|
74
|
+
],
|
|
75
|
+
"doc": "Provisioned groups of a role"
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
],
|
|
79
|
+
"name": "groups",
|
|
80
|
+
"default": null,
|
|
81
|
+
"doc": "List of provisioned groups of a role"
|
|
45
82
|
}
|
|
46
83
|
],
|
|
47
|
-
"doc": "Provisioned users of a role"
|
|
84
|
+
"doc": "Provisioned users and groups of a role"
|
|
48
85
|
}
|
datahub/sdk/_shared.py
CHANGED
|
@@ -179,7 +179,7 @@ OwnerInputType: TypeAlias = Union[
|
|
|
179
179
|
Tuple[ActorUrn, OwnershipTypeType],
|
|
180
180
|
models.OwnerClass,
|
|
181
181
|
]
|
|
182
|
-
OwnersInputType: TypeAlias =
|
|
182
|
+
OwnersInputType: TypeAlias = Sequence[OwnerInputType]
|
|
183
183
|
|
|
184
184
|
|
|
185
185
|
class HasOwnership(Entity):
|
|
@@ -280,7 +280,9 @@ class HasOwnership(Entity):
|
|
|
280
280
|
# If you pass in a ContainerKey, we can use parent_key() to build the browse path.
|
|
281
281
|
# If you pass in a list of urns, we'll use that as the browse path. Any non-urn strings
|
|
282
282
|
# will be treated as raw ids.
|
|
283
|
-
ParentContainerInputType: TypeAlias = Union[
|
|
283
|
+
ParentContainerInputType: TypeAlias = Union[
|
|
284
|
+
"Container", ContainerKey, Sequence[UrnOrStr]
|
|
285
|
+
]
|
|
284
286
|
|
|
285
287
|
|
|
286
288
|
class HasContainer(Entity):
|
|
@@ -340,7 +342,7 @@ class HasContainer(Entity):
|
|
|
340
342
|
)
|
|
341
343
|
for entry in parsed_path
|
|
342
344
|
]
|
|
343
|
-
elif container
|
|
345
|
+
elif isinstance(container, ContainerKey):
|
|
344
346
|
container_urn = container.as_urn()
|
|
345
347
|
|
|
346
348
|
browse_path_reversed = [container_urn]
|
|
@@ -399,7 +401,7 @@ class HasContainer(Entity):
|
|
|
399
401
|
|
|
400
402
|
|
|
401
403
|
TagInputType: TypeAlias = Union[str, TagUrn, models.TagAssociationClass]
|
|
402
|
-
TagsInputType: TypeAlias =
|
|
404
|
+
TagsInputType: TypeAlias = Sequence[TagInputType]
|
|
403
405
|
|
|
404
406
|
|
|
405
407
|
class HasTags(Entity):
|
|
@@ -454,7 +456,7 @@ class HasTags(Entity):
|
|
|
454
456
|
TermInputType: TypeAlias = Union[
|
|
455
457
|
str, GlossaryTermUrn, models.GlossaryTermAssociationClass
|
|
456
458
|
]
|
|
457
|
-
TermsInputType: TypeAlias =
|
|
459
|
+
TermsInputType: TypeAlias = Sequence[TermInputType]
|
|
458
460
|
|
|
459
461
|
|
|
460
462
|
class HasTerms(Entity):
|
datahub/sdk/chart.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from datetime import datetime
|
|
4
|
-
from typing import Dict, List, Optional, Type, Union
|
|
4
|
+
from typing import Dict, List, Optional, Sequence, Type, Union
|
|
5
5
|
|
|
6
6
|
from deprecated.sphinx import deprecated
|
|
7
7
|
from typing_extensions import Self
|
|
@@ -73,7 +73,7 @@ class Chart(
|
|
|
73
73
|
last_refreshed: Optional[datetime] = None,
|
|
74
74
|
chart_type: Optional[Union[str, models.ChartTypeClass]] = None,
|
|
75
75
|
access: Optional[str] = None,
|
|
76
|
-
input_datasets: Optional[
|
|
76
|
+
input_datasets: Optional[Sequence[Union[DatasetUrnOrStr, Dataset]]] = None,
|
|
77
77
|
# Standard aspects.
|
|
78
78
|
parent_container: ParentContainerInputType | Unset = unset,
|
|
79
79
|
subtype: Optional[str] = None,
|
|
@@ -291,7 +291,7 @@ class Chart(
|
|
|
291
291
|
return [DatasetUrn.from_string(input_urn) for input_urn in (props.inputs or [])]
|
|
292
292
|
|
|
293
293
|
def set_input_datasets(
|
|
294
|
-
self, input_datasets:
|
|
294
|
+
self, input_datasets: Sequence[Union[DatasetUrnOrStr, Dataset]]
|
|
295
295
|
) -> None:
|
|
296
296
|
"""Set the input datasets of the chart."""
|
|
297
297
|
# Convert all inputs to strings
|