acryl-datahub 0.15.0rc17__py3-none-any.whl → 0.15.0rc19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0rc17.dist-info → acryl_datahub-0.15.0rc19.dist-info}/METADATA +2404 -2404
- {acryl_datahub-0.15.0rc17.dist-info → acryl_datahub-0.15.0rc19.dist-info}/RECORD +12 -12
- datahub/__init__.py +1 -1
- datahub/ingestion/source/metadata/business_glossary.py +35 -18
- datahub/ingestion/source/snowflake/snowflake_utils.py +1 -1
- datahub/metadata/schema.avsc +4 -0
- datahub/metadata/schemas/FormInfo.avsc +4 -0
- datahub/sql_parsing/sql_parsing_aggregator.py +1 -2
- datahub/sql_parsing/sqlglot_utils.py +8 -2
- {acryl_datahub-0.15.0rc17.dist-info → acryl_datahub-0.15.0rc19.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0rc17.dist-info → acryl_datahub-0.15.0rc19.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0rc17.dist-info → acryl_datahub-0.15.0rc19.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
datahub/__init__.py,sha256=
|
|
1
|
+
datahub/__init__.py,sha256=zTa1Zc6cS51RVM7kIIa6JgOSFayPVXd-AmsJeebmbNQ,575
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
3
|
datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
|
|
4
4
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -346,7 +346,7 @@ datahub/ingestion/source/looker/str_functions.py,sha256=zceEX2ka_4WaWwWgEdyknUSz
|
|
|
346
346
|
datahub/ingestion/source/looker/urn_functions.py,sha256=4VvqEfGvIMq3rNHHps0-HlPurMPnpqdxNtDAOOHIZww,528
|
|
347
347
|
datahub/ingestion/source/looker/view_upstream.py,sha256=rAWKif3UngeRGS_CMQqjihKs3Lxiu6DecLAdtdxvqOw,26096
|
|
348
348
|
datahub/ingestion/source/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
349
|
-
datahub/ingestion/source/metadata/business_glossary.py,sha256=
|
|
349
|
+
datahub/ingestion/source/metadata/business_glossary.py,sha256=yySwJp2SCUQp8hRwN2lQuSqvOQowIhCKDKj9syhlTZA,18210
|
|
350
350
|
datahub/ingestion/source/metadata/lineage.py,sha256=XiZGuY6k3O9qBmgo7AzosIndJHwrvEhapVLdRlDxCuc,9507
|
|
351
351
|
datahub/ingestion/source/neo4j/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
352
352
|
datahub/ingestion/source/neo4j/neo4j_source.py,sha256=L9WiZ5yZrIDMrgj3gYU9j6zz3TRMXYpcWxeTegD7sFg,12409
|
|
@@ -438,7 +438,7 @@ datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=ud3Ah4qHrmSfpD8Od-
|
|
|
438
438
|
datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYhWeK4XafJq3ssFL9kcS03jEeWT4,5506
|
|
439
439
|
datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=fyfWmFVz2WZrpTJWNIe9m0WpDHgeFrGPf8diORJZUwo,6212
|
|
440
440
|
datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=PEmYNMXJRUvLQmVd8juVqjokfuSPuH9ppcM0ruXamxA,24807
|
|
441
|
-
datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=
|
|
441
|
+
datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=YczNEupY89jeegjR2_1pT4bPi9wQ69EIhGpzyCe9Jdg,12600
|
|
442
442
|
datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=lo_3asTuIZbF-LuEUcYL-9NIZ720n7oB9mYA6WVTWA4,31960
|
|
443
443
|
datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
444
444
|
datahub/ingestion/source/sql/athena.py,sha256=G3cIY8H_76lIUAzQWW2kLnZOEsfbakmojxbiHb3dYZ8,24059
|
|
@@ -560,7 +560,7 @@ datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1
|
|
|
560
560
|
datahub/lite/lite_util.py,sha256=pgBpT3vTO1YCQ2njZRNyicSkHYeEmQCt41BaXU8WvMo,4503
|
|
561
561
|
datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
562
562
|
datahub/metadata/_schema_classes.py,sha256=iPeBXGvbNEm0vw5pYwunnvx7bTtBdmIQVtzMOlS6bSI,955042
|
|
563
|
-
datahub/metadata/schema.avsc,sha256=
|
|
563
|
+
datahub/metadata/schema.avsc,sha256=Xx93OdPzQfBb2CtntIYE-HAeKNg-JZcCtRU95v7ZZCs,677728
|
|
564
564
|
datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
|
|
565
565
|
datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
|
|
566
566
|
datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
@@ -757,7 +757,7 @@ datahub/metadata/schemas/ExecutionRequestKey.avsc,sha256=SvjnlTAGYsSnvVE0rZ9-7UP
|
|
|
757
757
|
datahub/metadata/schemas/ExecutionRequestResult.avsc,sha256=kg3xMNr9kYLPnFsV-iqcGm1sh1muQVGJvxUt15L1yKo,2333
|
|
758
758
|
datahub/metadata/schemas/ExecutionRequestSignal.avsc,sha256=dsIUa6tfVSXqYOgh4cW6_Hzi8RjHuJJoO-mBAuZukpA,2515
|
|
759
759
|
datahub/metadata/schemas/Filter.avsc,sha256=PU-aGkc2-sI3ZXY7ci-Y0A7zp1jux3VW_6c8MJRAokg,5933
|
|
760
|
-
datahub/metadata/schemas/FormInfo.avsc,sha256=
|
|
760
|
+
datahub/metadata/schemas/FormInfo.avsc,sha256=FbN34htiCgm3LqKDL3sVsJhMUHIyc5jYpGJtYm7Ysd4,6270
|
|
761
761
|
datahub/metadata/schemas/FormKey.avsc,sha256=1-wE28B8T3WJ3JtexreNtFvP3To3n7U-jvYudCuSM9o,437
|
|
762
762
|
datahub/metadata/schemas/Forms.avsc,sha256=shmkhRoHN2gTaTsqGrGDRoNwe_z-nrFbbLjH9MtVDCs,10955
|
|
763
763
|
datahub/metadata/schemas/GlobalSettingsInfo.avsc,sha256=OVMM6FwhHhufHkezYcVePK0zI2llzFYLVFJhmAiHoiI,10102
|
|
@@ -869,11 +869,11 @@ datahub/sql_parsing/datajob.py,sha256=1X8KpEk-y3_8xJuA_Po27EHZgOcxK9QADI6Om9gSGn
|
|
|
869
869
|
datahub/sql_parsing/query_types.py,sha256=FKjDzszZzsrCfYfm7dgD6T_8865qxWl767fdGyHWBh4,2720
|
|
870
870
|
datahub/sql_parsing/schema_resolver.py,sha256=9INZWdxA2dMSLK6RXaVqjbjyLY_VKMhCkQv_Xd6Ln3I,10848
|
|
871
871
|
datahub/sql_parsing/split_statements.py,sha256=uZhAXLaRxDfmK0lPBW2oM_YVdJfSMhdgndnfd9iIXuA,5001
|
|
872
|
-
datahub/sql_parsing/sql_parsing_aggregator.py,sha256=
|
|
872
|
+
datahub/sql_parsing/sql_parsing_aggregator.py,sha256=F-aj7yqOwbo7FpxduFO5a7cLWkojL_Npv3_dlfHPNGY,69877
|
|
873
873
|
datahub/sql_parsing/sql_parsing_common.py,sha256=h_V_m54hJ9EUh5kczq7cYOIeNeo4bgf0Px0H-Nq-UIg,2602
|
|
874
874
|
datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
|
|
875
875
|
datahub/sql_parsing/sqlglot_lineage.py,sha256=CLDOc0HNqL_539eahOP3QOoldIYC6CF29id4Xe3TlEM,47018
|
|
876
|
-
datahub/sql_parsing/sqlglot_utils.py,sha256=
|
|
876
|
+
datahub/sql_parsing/sqlglot_utils.py,sha256=n6yufzEGwSlFeCSU540hEldIuab0q8KGqm9x0vSawkc,14699
|
|
877
877
|
datahub/sql_parsing/tool_meta_extractor.py,sha256=pE-pkRKBfNTXEJkaQM9NlG807mc-X6OtetgskJySCs8,2908
|
|
878
878
|
datahub/telemetry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
879
879
|
datahub/telemetry/stats.py,sha256=YltbtC3fe6rl1kcxn1A-mSnVpECTPm5k-brrUt7QxTI,967
|
|
@@ -974,8 +974,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
974
974
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
975
975
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
976
976
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
977
|
-
acryl_datahub-0.15.
|
|
978
|
-
acryl_datahub-0.15.
|
|
979
|
-
acryl_datahub-0.15.
|
|
980
|
-
acryl_datahub-0.15.
|
|
981
|
-
acryl_datahub-0.15.
|
|
977
|
+
acryl_datahub-0.15.0rc19.dist-info/METADATA,sha256=q_LaYt6m4WYgYyJo2ZA3Gj3a7kk5MKwiWvJheDfSPc8,173559
|
|
978
|
+
acryl_datahub-0.15.0rc19.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
979
|
+
acryl_datahub-0.15.0rc19.dist-info/entry_points.txt,sha256=Yj0PWB0LQOq4Rj2fyR6ETx4BUGw4TOcNL0ZNoAZ9kQg,9504
|
|
980
|
+
acryl_datahub-0.15.0rc19.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
981
|
+
acryl_datahub-0.15.0rc19.dist-info/RECORD,,
|
datahub/__init__.py
CHANGED
|
@@ -45,6 +45,9 @@ class Owners(ConfigModel):
|
|
|
45
45
|
groups: Optional[List[str]] = None
|
|
46
46
|
|
|
47
47
|
|
|
48
|
+
OwnersMultipleTypes = Union[List[Owners], Owners]
|
|
49
|
+
|
|
50
|
+
|
|
48
51
|
class KnowledgeCard(ConfigModel):
|
|
49
52
|
url: Optional[str] = None
|
|
50
53
|
label: Optional[str] = None
|
|
@@ -57,7 +60,7 @@ class GlossaryTermConfig(ConfigModel):
|
|
|
57
60
|
term_source: Optional[str] = None
|
|
58
61
|
source_ref: Optional[str] = None
|
|
59
62
|
source_url: Optional[str] = None
|
|
60
|
-
owners: Optional[
|
|
63
|
+
owners: Optional[OwnersMultipleTypes] = None
|
|
61
64
|
inherits: Optional[List[str]] = None
|
|
62
65
|
contains: Optional[List[str]] = None
|
|
63
66
|
values: Optional[List[str]] = None
|
|
@@ -74,7 +77,7 @@ class GlossaryNodeConfig(ConfigModel):
|
|
|
74
77
|
id: Optional[str] = None
|
|
75
78
|
name: str
|
|
76
79
|
description: str
|
|
77
|
-
owners: Optional[
|
|
80
|
+
owners: Optional[OwnersMultipleTypes] = None
|
|
78
81
|
terms: Optional[List["GlossaryTermConfig"]] = None
|
|
79
82
|
nodes: Optional[List["GlossaryNodeConfig"]] = None
|
|
80
83
|
knowledge_links: Optional[List[KnowledgeCard]] = None
|
|
@@ -88,7 +91,7 @@ class DefaultConfig(ConfigModel):
|
|
|
88
91
|
"""Holds defaults for populating fields in glossary terms"""
|
|
89
92
|
|
|
90
93
|
source: Optional[str] = None
|
|
91
|
-
owners:
|
|
94
|
+
owners: OwnersMultipleTypes
|
|
92
95
|
url: Optional[str] = None
|
|
93
96
|
source_type: str = "INTERNAL"
|
|
94
97
|
|
|
@@ -153,30 +156,44 @@ def make_glossary_term_urn(
|
|
|
153
156
|
return "urn:li:glossaryTerm:" + create_id(path, default_id, enable_auto_id)
|
|
154
157
|
|
|
155
158
|
|
|
156
|
-
def
|
|
157
|
-
|
|
159
|
+
def get_owners_multiple_types(owners: OwnersMultipleTypes) -> models.OwnershipClass:
|
|
160
|
+
"""Allows owner types to be a list and maintains backward compatibility"""
|
|
161
|
+
if isinstance(owners, Owners):
|
|
162
|
+
return models.OwnershipClass(owners=list(get_owners(owners)))
|
|
163
|
+
|
|
164
|
+
owners_meta: List[models.OwnerClass] = []
|
|
165
|
+
for owner in owners:
|
|
166
|
+
owners_meta.extend(get_owners(owner))
|
|
167
|
+
|
|
168
|
+
return models.OwnershipClass(owners=owners_meta)
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def get_owners(owners: Owners) -> Iterable[models.OwnerClass]:
|
|
172
|
+
actual_type = owners.type or models.OwnershipTypeClass.DEVELOPER
|
|
173
|
+
|
|
174
|
+
if actual_type.startswith("urn:li:ownershipType:"):
|
|
175
|
+
ownership_type: str = "CUSTOM"
|
|
176
|
+
ownership_type_urn: Optional[str] = actual_type
|
|
177
|
+
else:
|
|
178
|
+
ownership_type, ownership_type_urn = validate_ownership_type(actual_type)
|
|
179
|
+
|
|
158
180
|
if owners.typeUrn is not None:
|
|
159
181
|
ownership_type_urn = owners.typeUrn
|
|
160
|
-
|
|
182
|
+
|
|
161
183
|
if owners.users is not None:
|
|
162
|
-
|
|
163
|
-
models.OwnerClass(
|
|
184
|
+
for o in owners.users:
|
|
185
|
+
yield models.OwnerClass(
|
|
164
186
|
owner=make_user_urn(o),
|
|
165
187
|
type=ownership_type,
|
|
166
188
|
typeUrn=ownership_type_urn,
|
|
167
189
|
)
|
|
168
|
-
for o in owners.users
|
|
169
|
-
]
|
|
170
190
|
if owners.groups is not None:
|
|
171
|
-
|
|
172
|
-
models.OwnerClass(
|
|
191
|
+
for o in owners.groups:
|
|
192
|
+
yield models.OwnerClass(
|
|
173
193
|
owner=make_group_urn(o),
|
|
174
194
|
type=ownership_type,
|
|
175
195
|
typeUrn=ownership_type_urn,
|
|
176
196
|
)
|
|
177
|
-
for o in owners.groups
|
|
178
|
-
]
|
|
179
|
-
return models.OwnershipClass(owners=owners_meta)
|
|
180
197
|
|
|
181
198
|
|
|
182
199
|
def get_mces(
|
|
@@ -185,7 +202,7 @@ def get_mces(
|
|
|
185
202
|
ingestion_config: BusinessGlossarySourceConfig,
|
|
186
203
|
ctx: PipelineContext,
|
|
187
204
|
) -> Iterable[Union[MetadataChangeProposalWrapper, models.MetadataChangeEventClass]]:
|
|
188
|
-
root_owners =
|
|
205
|
+
root_owners = get_owners_multiple_types(glossary.owners)
|
|
189
206
|
|
|
190
207
|
if glossary.nodes:
|
|
191
208
|
for node in glossary.nodes:
|
|
@@ -270,7 +287,7 @@ def get_mces_from_node(
|
|
|
270
287
|
node_owners = parentOwners
|
|
271
288
|
if glossaryNode.owners is not None:
|
|
272
289
|
assert glossaryNode.owners is not None
|
|
273
|
-
node_owners =
|
|
290
|
+
node_owners = get_owners_multiple_types(glossaryNode.owners)
|
|
274
291
|
|
|
275
292
|
node_snapshot = models.GlossaryNodeSnapshotClass(
|
|
276
293
|
urn=node_urn,
|
|
@@ -426,7 +443,7 @@ def get_mces_from_term(
|
|
|
426
443
|
ownership: models.OwnershipClass = parentOwnership
|
|
427
444
|
if glossaryTerm.owners is not None:
|
|
428
445
|
assert glossaryTerm.owners is not None
|
|
429
|
-
ownership =
|
|
446
|
+
ownership = get_owners_multiple_types(glossaryTerm.owners)
|
|
430
447
|
aspects.append(ownership)
|
|
431
448
|
|
|
432
449
|
if glossaryTerm.domain is not None:
|
|
@@ -119,7 +119,6 @@ class SnowflakeFilter:
|
|
|
119
119
|
) -> bool:
|
|
120
120
|
if not dataset_type or not dataset_name:
|
|
121
121
|
return True
|
|
122
|
-
dataset_params = dataset_name.split(".")
|
|
123
122
|
if dataset_type.lower() not in (
|
|
124
123
|
SnowflakeObjectDomain.TABLE,
|
|
125
124
|
SnowflakeObjectDomain.EXTERNAL_TABLE,
|
|
@@ -131,6 +130,7 @@ class SnowflakeFilter:
|
|
|
131
130
|
if _is_sys_table(dataset_name):
|
|
132
131
|
return False
|
|
133
132
|
|
|
133
|
+
dataset_params = _split_qualified_name(dataset_name)
|
|
134
134
|
if len(dataset_params) != 3:
|
|
135
135
|
self.structured_reporter.info(
|
|
136
136
|
title="Unexpected dataset pattern",
|
datahub/metadata/schema.avsc
CHANGED
|
@@ -18518,6 +18518,10 @@
|
|
|
18518
18518
|
"namespace": "com.linkedin.pegasus2avro.form",
|
|
18519
18519
|
"fields": [
|
|
18520
18520
|
{
|
|
18521
|
+
"Searchable": {
|
|
18522
|
+
"fieldName": "structuredPropertyPromptUrns",
|
|
18523
|
+
"fieldType": "URN"
|
|
18524
|
+
},
|
|
18521
18525
|
"java": {
|
|
18522
18526
|
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
18523
18527
|
},
|
|
@@ -1383,8 +1383,7 @@ class SqlParsingAggregator(Closeable):
|
|
|
1383
1383
|
return QueryUrn(query_id).urn()
|
|
1384
1384
|
|
|
1385
1385
|
@classmethod
|
|
1386
|
-
def _composite_query_id(cls, composed_of_queries:
|
|
1387
|
-
composed_of_queries = list(composed_of_queries)
|
|
1386
|
+
def _composite_query_id(cls, composed_of_queries: List[QueryId]) -> str:
|
|
1388
1387
|
combined = json.dumps(composed_of_queries)
|
|
1389
1388
|
return f"composite_{generate_hash(combined)}"
|
|
1390
1389
|
|
|
@@ -121,7 +121,7 @@ _BASIC_NORMALIZATION_RULES = {
|
|
|
121
121
|
# Remove /* */ comments.
|
|
122
122
|
re.compile(r"/\*.*?\*/", re.DOTALL): "",
|
|
123
123
|
# Remove -- comments.
|
|
124
|
-
re.compile(r"--.*$"): "",
|
|
124
|
+
re.compile(r"--.*$", re.MULTILINE): "",
|
|
125
125
|
# Replace all runs of whitespace with a single space.
|
|
126
126
|
re.compile(r"\s+"): " ",
|
|
127
127
|
# Remove leading and trailing whitespace and trailing semicolons.
|
|
@@ -131,10 +131,16 @@ _BASIC_NORMALIZATION_RULES = {
|
|
|
131
131
|
# Replace anything that looks like a string with a placeholder.
|
|
132
132
|
re.compile(r"'[^']*'"): "?",
|
|
133
133
|
# Replace sequences of IN/VALUES with a single placeholder.
|
|
134
|
-
|
|
134
|
+
# The r" ?" makes it more robust to uneven spacing.
|
|
135
|
+
re.compile(r"\b(IN|VALUES)\s*\( ?\?(?:, ?\?)* ?\)", re.IGNORECASE): r"\1 (?)",
|
|
135
136
|
# Normalize parenthesis spacing.
|
|
136
137
|
re.compile(r"\( "): "(",
|
|
137
138
|
re.compile(r" \)"): ")",
|
|
139
|
+
# Fix up spaces before commas in column lists.
|
|
140
|
+
# e.g. "col1 , col2" -> "col1, col2"
|
|
141
|
+
# e.g. "col1,col2" -> "col1, col2"
|
|
142
|
+
re.compile(r"\b ,"): ",",
|
|
143
|
+
re.compile(r"\b,\b"): ", ",
|
|
138
144
|
}
|
|
139
145
|
_TABLE_NAME_NORMALIZATION_RULES = {
|
|
140
146
|
# Replace UUID-like strings with a placeholder (both - and _ variants).
|
|
File without changes
|
|
File without changes
|
|
File without changes
|