acryl-datahub 0.15.0.5rc4__py3-none-any.whl → 0.15.0.5rc6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.5rc4.dist-info → acryl_datahub-0.15.0.5rc6.dist-info}/METADATA +2355 -2355
- {acryl_datahub-0.15.0.5rc4.dist-info → acryl_datahub-0.15.0.5rc6.dist-info}/RECORD +18 -15
- {acryl_datahub-0.15.0.5rc4.dist-info → acryl_datahub-0.15.0.5rc6.dist-info}/entry_points.txt +1 -0
- datahub/_version.py +1 -1
- datahub/cli/container_cli.py +1 -64
- datahub/emitter/composite_emitter.py +36 -0
- datahub/ingestion/source/apply/__init__.py +0 -0
- datahub/ingestion/source/apply/datahub_apply.py +223 -0
- datahub/ingestion/source/looker/looker_config.py +3 -1
- datahub/ingestion/source/looker/looker_dataclasses.py +8 -0
- datahub/ingestion/source/looker/looker_file_loader.py +14 -3
- datahub/ingestion/source/looker/looker_template_language.py +104 -14
- datahub/ingestion/source/looker/lookml_config.py +16 -2
- datahub/ingestion/source/looker/lookml_source.py +54 -22
- datahub/ingestion/source/snowflake/snowflake_v2.py +41 -4
- {acryl_datahub-0.15.0.5rc4.dist-info → acryl_datahub-0.15.0.5rc6.dist-info}/LICENSE +0 -0
- {acryl_datahub-0.15.0.5rc4.dist-info → acryl_datahub-0.15.0.5rc6.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0.5rc4.dist-info → acryl_datahub-0.15.0.5rc6.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
|
-
datahub/_version.py,sha256=
|
|
3
|
+
datahub/_version.py,sha256=w4julQ8A-97paLIXUIUhvcioGTJP8CcU4tW-IaJw3zY,324
|
|
4
4
|
datahub/entrypoints.py,sha256=osv2ailvuW-HHlAE0fOtyblJI1X7HInZutd9DC66jqQ,8022
|
|
5
5
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
6
6
|
datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -62,7 +62,7 @@ datahub/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
62
62
|
datahub/cli/check_cli.py,sha256=ajrWVMAHYbgvYi4OFitFXx7Y6oigvZFgIeUiKV9ECik,12859
|
|
63
63
|
datahub/cli/cli_utils.py,sha256=2uvPv6WqxbRdH7UteHwhRash4E0ncU5P6XebrFLeECo,13584
|
|
64
64
|
datahub/cli/config_utils.py,sha256=yuXw7RzpRY5x_-MAoqWbv46qUkIeRNAJL4_OeJpYdBE,4879
|
|
65
|
-
datahub/cli/container_cli.py,sha256=
|
|
65
|
+
datahub/cli/container_cli.py,sha256=uDOwewGEPYHQt-ppYEb8ESXhZjPNIZG0Rt3cm2FzPqc,1569
|
|
66
66
|
datahub/cli/delete_cli.py,sha256=oQ4Yy6hxZHcl67MYJiQumLs_8QmFEj7SPZFzxFXvDk8,23481
|
|
67
67
|
datahub/cli/docker_check.py,sha256=rED4wHXqxcQ_qNFyIgFEZ85BHT9ZTE5YC-oUKqbRqi0,9432
|
|
68
68
|
datahub/cli/docker_cli.py,sha256=w9ZQMRVlHwfJI2XDe7mO0lwnT7-dZoK6tPadSMgwEM8,36493
|
|
@@ -113,6 +113,7 @@ datahub/configuration/validate_multiline_string.py,sha256=l9PF6_EAC_1lWxU_RWrvPB
|
|
|
113
113
|
datahub/configuration/yaml.py,sha256=dLmjCalPOjgdc7mmJxtlP7uOrIHZiAWxD1gwAFOdtUU,308
|
|
114
114
|
datahub/emitter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
115
115
|
datahub/emitter/aspect.py,sha256=ef0DVycqg-tRPurkYjc-5zknmLP2p2Y2RxP55WkvAEc,480
|
|
116
|
+
datahub/emitter/composite_emitter.py,sha256=ZU-IdlAXKGPtmyT0JJgYC09vRn-TmeNaA6VP8V0fioM,1212
|
|
116
117
|
datahub/emitter/enum_helpers.py,sha256=QBOEUu_hDCvyL_v4ayNQV8XwJbf5zKyu0Xat0mI1Kgo,376
|
|
117
118
|
datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvViN-fm8,828
|
|
118
119
|
datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
|
|
@@ -217,6 +218,8 @@ datahub/ingestion/source/abs/datalake_profiler_config.py,sha256=Rkf64evufyVGPiE4
|
|
|
217
218
|
datahub/ingestion/source/abs/profiling.py,sha256=yKNCKpr6w7qpCH-baeSkNE9VjkN6eBot_weD-2_Jxzk,17579
|
|
218
219
|
datahub/ingestion/source/abs/report.py,sha256=fzkTdTewYlWrTk4f2Cyl-e8RV4qw9wEVtm0cdKD-Xgw,542
|
|
219
220
|
datahub/ingestion/source/abs/source.py,sha256=cuMezUzr-Smp5tok2ceYor5I5jp52NDMjfeN8kfIbvg,24816
|
|
221
|
+
datahub/ingestion/source/apply/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
222
|
+
datahub/ingestion/source/apply/datahub_apply.py,sha256=NGhbszi6ObfJoh3YPGjcVWHJsbNboVwKUgnrCSt8HJU,7629
|
|
220
223
|
datahub/ingestion/source/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
221
224
|
datahub/ingestion/source/aws/aws_common.py,sha256=DfdQgkJ_s2isFx8WvqKTlAcBk4KE8SgfpmA5BgC3fgY,17716
|
|
222
225
|
datahub/ingestion/source/aws/glue.py,sha256=DwROr923M01QnvImUbMoHS6TTTT9kBz2tEmQ3Sv4EoY,58019
|
|
@@ -333,22 +336,22 @@ datahub/ingestion/source/kafka_connect/source_connectors.py,sha256=-rFNXKD8_EFoX
|
|
|
333
336
|
datahub/ingestion/source/looker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
334
337
|
datahub/ingestion/source/looker/lkml_patched.py,sha256=XShEU7Wbz0DubDhYMjKf9wjKZrBJa2XPg9MIjp8rPhk,733
|
|
335
338
|
datahub/ingestion/source/looker/looker_common.py,sha256=squUUBHxsLeT5xbZOTO66irtOB8fL0V4Q8Tgd9EJMYU,62067
|
|
336
|
-
datahub/ingestion/source/looker/looker_config.py,sha256=
|
|
339
|
+
datahub/ingestion/source/looker/looker_config.py,sha256=eVKw1nn9D8hUFdRfNyT3MtzL8w-zWhFeokiwSnNKQuc,13607
|
|
337
340
|
datahub/ingestion/source/looker/looker_connection.py,sha256=yDmC6lDsHmL2e_Pw8ULylwOIHPWPp_6gT1iyLvD0fTw,2075
|
|
338
341
|
datahub/ingestion/source/looker/looker_constant.py,sha256=GMKYtNXlpojPxa9azridKfcGLSJwKdUCTesp7U8dIrQ,402
|
|
339
|
-
datahub/ingestion/source/looker/looker_dataclasses.py,sha256=
|
|
340
|
-
datahub/ingestion/source/looker/looker_file_loader.py,sha256=
|
|
342
|
+
datahub/ingestion/source/looker/looker_dataclasses.py,sha256=MrDeZ4Nd0wQnJbCoI1qePYlYeObnUw5dvpWcmhKuNgc,12346
|
|
343
|
+
datahub/ingestion/source/looker/looker_file_loader.py,sha256=PEyL9KWRaFcrvOkapU8wSNlFbmetmBy9tAyCgeVDOa4,4864
|
|
341
344
|
datahub/ingestion/source/looker/looker_lib_wrapper.py,sha256=0gaYjBv4wkbbLWVgvaAV6JyWAFb0utTG6TCve2d9xss,11511
|
|
342
345
|
datahub/ingestion/source/looker/looker_liquid_tag.py,sha256=mO4G4MNA4YZFvZaDBpdiJ2vP3irC82kY34RdaK4Pbfs,3100
|
|
343
346
|
datahub/ingestion/source/looker/looker_query_model.py,sha256=N0jBbFruiCIIGT6sJn6tNeppeQ78KGTkOwTLirhxFNc,2144
|
|
344
347
|
datahub/ingestion/source/looker/looker_source.py,sha256=S-g06Bm3sbyD0Qjra9hEhZmsVDb-BY_-bCPDwCjtEoQ,66427
|
|
345
|
-
datahub/ingestion/source/looker/looker_template_language.py,sha256=
|
|
348
|
+
datahub/ingestion/source/looker/looker_template_language.py,sha256=W-SMICKBfIuivrHywHRYchz9SJiXhoU8VOEKGQW_1v8,17825
|
|
346
349
|
datahub/ingestion/source/looker/looker_usage.py,sha256=qFBX7OHtIcarYIqFe0jQMrDV8MMPV_nN4PZrZRUznTw,23029
|
|
347
350
|
datahub/ingestion/source/looker/looker_view_id_cache.py,sha256=92gDy6NONhJYBp92z_IBzDVZvezmUIkaBCZY1bdk6mE,4392
|
|
348
351
|
datahub/ingestion/source/looker/lookml_concept_context.py,sha256=eDaze9S7cgO5eFP7-0azUMEJyR3EfMjmfj5pMPjpm8c,18066
|
|
349
|
-
datahub/ingestion/source/looker/lookml_config.py,sha256=
|
|
352
|
+
datahub/ingestion/source/looker/lookml_config.py,sha256=RuZkH3DDmII21gEsUvPsJi5gxWngbYkqBP06H8_n_Hs,11353
|
|
350
353
|
datahub/ingestion/source/looker/lookml_refinement.py,sha256=MkVreI0BylaCFyDHihDHaCcXyDSP84eF9p1h5d-ZHnM,9504
|
|
351
|
-
datahub/ingestion/source/looker/lookml_source.py,sha256=
|
|
354
|
+
datahub/ingestion/source/looker/lookml_source.py,sha256=PJBUJgZfZyvmasDf_LJC39SggLCA6vSfAbf1PdzviZU,43889
|
|
352
355
|
datahub/ingestion/source/looker/str_functions.py,sha256=zceEX2ka_4WaWwWgEdyknUSz7X3GrO951BkwSbF2afo,766
|
|
353
356
|
datahub/ingestion/source/looker/urn_functions.py,sha256=4VvqEfGvIMq3rNHHps0-HlPurMPnpqdxNtDAOOHIZww,528
|
|
354
357
|
datahub/ingestion/source/looker/view_upstream.py,sha256=4FCjZaU6p2G7npB2RJpP4Gv2yLjbvbsYWEbAg55IvjY,26110
|
|
@@ -446,7 +449,7 @@ datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYh
|
|
|
446
449
|
datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=TN_cTF4a8V_tbeR2czm_hoMYfQMuqmBbFlAqyh7PJzQ,6551
|
|
447
450
|
datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=ySFm7WDk8FW9KjCnX4HQfTqObIrlUS-V8WIHl3j0CTI,24848
|
|
448
451
|
datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=xq58c47zmaQPkTVqjKW25iViX8VJuHdQDTFY4jxzZ2o,12778
|
|
449
|
-
datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=
|
|
452
|
+
datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=vyTqC_C5Bf0AMRVyoxUfl1CdlgeQouX20msP2FsMqnk,33439
|
|
450
453
|
datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
451
454
|
datahub/ingestion/source/sql/athena.py,sha256=Uh9wGLOqAkcphffxOPIQNyXvjeRm74XIpaLb4rjqMjM,24045
|
|
452
455
|
datahub/ingestion/source/sql/clickhouse.py,sha256=uSRy-HKAiGFTHVLoVtGoh23X0O1lwyYUaK8BaWkYhps,25555
|
|
@@ -993,9 +996,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
993
996
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
994
997
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
995
998
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
996
|
-
acryl_datahub-0.15.0.
|
|
997
|
-
acryl_datahub-0.15.0.
|
|
998
|
-
acryl_datahub-0.15.0.
|
|
999
|
-
acryl_datahub-0.15.0.
|
|
1000
|
-
acryl_datahub-0.15.0.
|
|
1001
|
-
acryl_datahub-0.15.0.
|
|
999
|
+
acryl_datahub-0.15.0.5rc6.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
1000
|
+
acryl_datahub-0.15.0.5rc6.dist-info/METADATA,sha256=EYc5u1TJYgxZjmer0MNvW4TpmUkIwDFwJ1qTmQsrkxQ,173382
|
|
1001
|
+
acryl_datahub-0.15.0.5rc6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
1002
|
+
acryl_datahub-0.15.0.5rc6.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
|
|
1003
|
+
acryl_datahub-0.15.0.5rc6.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1004
|
+
acryl_datahub-0.15.0.5rc6.dist-info/RECORD,,
|
{acryl_datahub-0.15.0.5rc4.dist-info → acryl_datahub-0.15.0.5rc6.dist-info}/entry_points.txt
RENAMED
|
@@ -34,6 +34,7 @@ clickhouse-usage = datahub.ingestion.source.usage.clickhouse_usage:ClickHouseUsa
|
|
|
34
34
|
cockroachdb = datahub.ingestion.source.sql.cockroachdb:CockroachDBSource
|
|
35
35
|
csv-enricher = datahub.ingestion.source.csv_enricher:CSVEnricherSource
|
|
36
36
|
datahub = datahub.ingestion.source.datahub.datahub_source:DataHubSource
|
|
37
|
+
datahub-apply = datahub.ingestion.source.apply.datahub_apply:DataHubApplySource
|
|
37
38
|
datahub-business-glossary = datahub.ingestion.source.metadata.business_glossary:BusinessGlossaryFileSource
|
|
38
39
|
datahub-gc = datahub.ingestion.source.gc.datahub_gc:DataHubGcSource
|
|
39
40
|
datahub-lineage-file = datahub.ingestion.source.metadata.lineage:LineageFileSource
|
datahub/_version.py
CHANGED
datahub/cli/container_cli.py
CHANGED
|
@@ -1,19 +1,8 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import Any, List
|
|
3
2
|
|
|
4
3
|
import click
|
|
5
|
-
import progressbar
|
|
6
4
|
|
|
7
|
-
from datahub.
|
|
8
|
-
from datahub.ingestion.graph.client import get_default_graph
|
|
9
|
-
from datahub.metadata.schema_classes import (
|
|
10
|
-
DomainsClass,
|
|
11
|
-
GlossaryTermAssociationClass,
|
|
12
|
-
OwnerClass,
|
|
13
|
-
OwnershipTypeClass,
|
|
14
|
-
TagAssociationClass,
|
|
15
|
-
)
|
|
16
|
-
from datahub.specific.dataset import DatasetPatchBuilder
|
|
5
|
+
from datahub.ingestion.source.apply.datahub_apply import apply_association_to_container
|
|
17
6
|
|
|
18
7
|
logger = logging.getLogger(__name__)
|
|
19
8
|
|
|
@@ -24,58 +13,6 @@ def container() -> None:
|
|
|
24
13
|
pass
|
|
25
14
|
|
|
26
15
|
|
|
27
|
-
def apply_association_to_container(
|
|
28
|
-
container_urn: str,
|
|
29
|
-
association_urn: str,
|
|
30
|
-
association_type: str,
|
|
31
|
-
) -> None:
|
|
32
|
-
"""
|
|
33
|
-
Common function to add either tags, terms, domains, or owners to child datasets (for now).
|
|
34
|
-
|
|
35
|
-
Args:
|
|
36
|
-
container_urn: The URN of the container
|
|
37
|
-
association_urn: The URN of the tag, term, or user to apply
|
|
38
|
-
association_type: One of 'tag', 'term', 'domain' or 'owner'
|
|
39
|
-
"""
|
|
40
|
-
urns: List[str] = []
|
|
41
|
-
graph = get_default_graph()
|
|
42
|
-
logger.info(f"Using {graph}")
|
|
43
|
-
urns.extend(
|
|
44
|
-
graph.get_urns_by_filter(
|
|
45
|
-
container=container_urn, batch_size=1000, entity_types=["dataset"]
|
|
46
|
-
)
|
|
47
|
-
)
|
|
48
|
-
|
|
49
|
-
all_patches: List[Any] = []
|
|
50
|
-
for urn in urns:
|
|
51
|
-
builder = DatasetPatchBuilder(urn)
|
|
52
|
-
patches: List[Any] = []
|
|
53
|
-
if association_type == "tag":
|
|
54
|
-
patches = builder.add_tag(TagAssociationClass(association_urn)).build()
|
|
55
|
-
elif association_type == "term":
|
|
56
|
-
patches = builder.add_term(
|
|
57
|
-
GlossaryTermAssociationClass(association_urn)
|
|
58
|
-
).build()
|
|
59
|
-
elif association_type == "owner":
|
|
60
|
-
patches = builder.add_owner(
|
|
61
|
-
OwnerClass(
|
|
62
|
-
owner=association_urn,
|
|
63
|
-
type=OwnershipTypeClass.TECHNICAL_OWNER,
|
|
64
|
-
)
|
|
65
|
-
).build()
|
|
66
|
-
elif association_type == "domain":
|
|
67
|
-
patches = [
|
|
68
|
-
MetadataChangeProposalWrapper(
|
|
69
|
-
entityUrn=urn,
|
|
70
|
-
aspect=DomainsClass(domains=[association_urn]),
|
|
71
|
-
)
|
|
72
|
-
]
|
|
73
|
-
all_patches.extend(patches)
|
|
74
|
-
mcps_iter = progressbar.progressbar(all_patches, redirect_stdout=True)
|
|
75
|
-
for mcp in mcps_iter:
|
|
76
|
-
graph.emit(mcp)
|
|
77
|
-
|
|
78
|
-
|
|
79
16
|
@container.command()
|
|
80
17
|
@click.option("--container-urn", required=True, type=str)
|
|
81
18
|
@click.option("--tag-urn", required=True, type=str)
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
from typing import Callable, List, Optional, Union
|
|
2
|
+
|
|
3
|
+
from datahub.emitter.generic_emitter import Emitter
|
|
4
|
+
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
5
|
+
from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
|
|
6
|
+
MetadataChangeEvent,
|
|
7
|
+
MetadataChangeProposal,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
# Experimental composite emitter that allows multiple emitters to be used in a single ingestion job
|
|
12
|
+
class CompositeEmitter(Emitter):
|
|
13
|
+
def __init__(self, emitters: List[Emitter]) -> None:
|
|
14
|
+
self.emitters = emitters
|
|
15
|
+
|
|
16
|
+
def emit(
|
|
17
|
+
self,
|
|
18
|
+
item: Union[
|
|
19
|
+
MetadataChangeEvent,
|
|
20
|
+
MetadataChangeProposal,
|
|
21
|
+
MetadataChangeProposalWrapper,
|
|
22
|
+
],
|
|
23
|
+
callback: Optional[Callable[[Exception, str], None]] = None,
|
|
24
|
+
) -> None:
|
|
25
|
+
callback_called = False
|
|
26
|
+
for emitter in self.emitters:
|
|
27
|
+
if not callback_called:
|
|
28
|
+
# We want to ensure that the callback is only called once and we tie it to the first emitter
|
|
29
|
+
emitter.emit(item, callback)
|
|
30
|
+
callback_called = True
|
|
31
|
+
else:
|
|
32
|
+
emitter.emit(item)
|
|
33
|
+
|
|
34
|
+
def flush(self) -> None:
|
|
35
|
+
for emitter in self.emitters:
|
|
36
|
+
emitter.flush()
|
|
File without changes
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from functools import partial
|
|
3
|
+
from typing import Any, Iterable, List, Optional, Union
|
|
4
|
+
|
|
5
|
+
import progressbar
|
|
6
|
+
from pydantic import Field
|
|
7
|
+
|
|
8
|
+
from datahub.configuration.common import ConfigModel
|
|
9
|
+
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
10
|
+
from datahub.ingestion.api.common import PipelineContext
|
|
11
|
+
from datahub.ingestion.api.decorators import (
|
|
12
|
+
SupportStatus,
|
|
13
|
+
config_class,
|
|
14
|
+
platform_name,
|
|
15
|
+
support_status,
|
|
16
|
+
)
|
|
17
|
+
from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source, SourceReport
|
|
18
|
+
from datahub.ingestion.api.source_helpers import auto_workunit_reporter
|
|
19
|
+
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
20
|
+
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
21
|
+
from datahub.metadata.schema_classes import (
|
|
22
|
+
DomainsClass,
|
|
23
|
+
GlossaryTermAssociationClass,
|
|
24
|
+
MetadataChangeProposalClass,
|
|
25
|
+
OwnerClass,
|
|
26
|
+
OwnershipTypeClass,
|
|
27
|
+
TagAssociationClass,
|
|
28
|
+
)
|
|
29
|
+
from datahub.specific.dataset import DatasetPatchBuilder
|
|
30
|
+
|
|
31
|
+
logger = logging.getLogger(__name__)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def apply_association_to_container(
|
|
35
|
+
container_urn: str,
|
|
36
|
+
association_urn: str,
|
|
37
|
+
association_type: str,
|
|
38
|
+
emit: bool = True,
|
|
39
|
+
graph: Optional[DataHubGraph] = None,
|
|
40
|
+
) -> Optional[List[Union[MetadataChangeProposalWrapper, MetadataChangeProposalClass]]]:
|
|
41
|
+
"""
|
|
42
|
+
Common function to add either tags, terms, domains, or owners to child datasets (for now).
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
container_urn: The URN of the container
|
|
46
|
+
association_urn: The URN of the tag, term, or user to apply
|
|
47
|
+
association_type: One of 'tag', 'term', 'domain' or 'owner'
|
|
48
|
+
"""
|
|
49
|
+
urns: List[str] = [container_urn]
|
|
50
|
+
if not graph:
|
|
51
|
+
graph = get_default_graph()
|
|
52
|
+
logger.info(f"Using {graph}")
|
|
53
|
+
urns.extend(
|
|
54
|
+
graph.get_urns_by_filter(
|
|
55
|
+
container=container_urn,
|
|
56
|
+
batch_size=1000,
|
|
57
|
+
entity_types=["dataset", "container"],
|
|
58
|
+
)
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
all_patches: List[Any] = []
|
|
62
|
+
for urn in urns:
|
|
63
|
+
builder = DatasetPatchBuilder(urn)
|
|
64
|
+
patches: List[Any] = []
|
|
65
|
+
if association_type == "tag":
|
|
66
|
+
patches = builder.add_tag(TagAssociationClass(association_urn)).build()
|
|
67
|
+
elif association_type == "term":
|
|
68
|
+
patches = builder.add_term(
|
|
69
|
+
GlossaryTermAssociationClass(association_urn)
|
|
70
|
+
).build()
|
|
71
|
+
elif association_type == "owner":
|
|
72
|
+
patches = builder.add_owner(
|
|
73
|
+
OwnerClass(
|
|
74
|
+
owner=association_urn,
|
|
75
|
+
type=OwnershipTypeClass.TECHNICAL_OWNER,
|
|
76
|
+
)
|
|
77
|
+
).build()
|
|
78
|
+
elif association_type == "domain":
|
|
79
|
+
patches = [
|
|
80
|
+
MetadataChangeProposalWrapper(
|
|
81
|
+
entityUrn=urn,
|
|
82
|
+
aspect=DomainsClass(domains=[association_urn]),
|
|
83
|
+
)
|
|
84
|
+
]
|
|
85
|
+
all_patches.extend(patches)
|
|
86
|
+
if emit:
|
|
87
|
+
mcps_iter = progressbar.progressbar(all_patches, redirect_stdout=True)
|
|
88
|
+
for mcp in mcps_iter:
|
|
89
|
+
graph.emit(mcp)
|
|
90
|
+
return None
|
|
91
|
+
else:
|
|
92
|
+
return all_patches
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
class DomainApplyConfig(ConfigModel):
|
|
96
|
+
assets: List[str] = Field(
|
|
97
|
+
default_factory=list,
|
|
98
|
+
description="List of assets to apply domain hierarchichaly. Currently only containers and datasets are supported",
|
|
99
|
+
)
|
|
100
|
+
domain_urn: str = Field(default="")
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class TagApplyConfig(ConfigModel):
|
|
104
|
+
assets: List[str] = Field(
|
|
105
|
+
default_factory=list,
|
|
106
|
+
description="List of assets to apply tag hierarchichaly. Currently only containers and datasets are supported",
|
|
107
|
+
)
|
|
108
|
+
tag_urn: str = Field(default="")
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
class TermApplyConfig(ConfigModel):
|
|
112
|
+
assets: List[str] = Field(
|
|
113
|
+
default_factory=list,
|
|
114
|
+
description="List of assets to apply term hierarchichaly. Currently only containers and datasets are supported",
|
|
115
|
+
)
|
|
116
|
+
term_urn: str = Field(default="")
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class OwnerApplyConfig(ConfigModel):
|
|
120
|
+
assets: List[str] = Field(
|
|
121
|
+
default_factory=list,
|
|
122
|
+
description="List of assets to apply owner hierarchichaly. Currently only containers and datasets are supported",
|
|
123
|
+
)
|
|
124
|
+
owner_urn: str = Field(default="")
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class DataHubApplyConfig(ConfigModel):
|
|
128
|
+
domain_apply: Optional[List[DomainApplyConfig]] = Field(
|
|
129
|
+
default=None,
|
|
130
|
+
description="List to apply domains to assets",
|
|
131
|
+
)
|
|
132
|
+
tag_apply: Optional[List[TagApplyConfig]] = Field(
|
|
133
|
+
default=None,
|
|
134
|
+
description="List to apply tags to assets",
|
|
135
|
+
)
|
|
136
|
+
term_apply: Optional[List[TermApplyConfig]] = Field(
|
|
137
|
+
default=None,
|
|
138
|
+
description="List to apply terms to assets",
|
|
139
|
+
)
|
|
140
|
+
owner_apply: Optional[List[OwnerApplyConfig]] = Field(
|
|
141
|
+
default=None,
|
|
142
|
+
description="List to apply owners to assets",
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
@platform_name("DataHubApply")
|
|
147
|
+
@config_class(DataHubApplyConfig)
|
|
148
|
+
@support_status(SupportStatus.TESTING)
|
|
149
|
+
class DataHubApplySource(Source):
|
|
150
|
+
"""
|
|
151
|
+
This source is a helper over CLI
|
|
152
|
+
so people can use the helper to apply various metadata changes to DataHub
|
|
153
|
+
via Managed Ingestion
|
|
154
|
+
"""
|
|
155
|
+
|
|
156
|
+
def __init__(self, ctx: PipelineContext, config: DataHubApplyConfig):
|
|
157
|
+
self.ctx = ctx
|
|
158
|
+
self.config = config
|
|
159
|
+
self.report = SourceReport()
|
|
160
|
+
self.graph = ctx.require_graph()
|
|
161
|
+
|
|
162
|
+
def _yield_workunits(
|
|
163
|
+
self,
|
|
164
|
+
proposals: List[
|
|
165
|
+
Union[MetadataChangeProposalWrapper, MetadataChangeProposalClass]
|
|
166
|
+
],
|
|
167
|
+
) -> Iterable[MetadataWorkUnit]:
|
|
168
|
+
for proposal in proposals:
|
|
169
|
+
if isinstance(proposal, MetadataChangeProposalWrapper):
|
|
170
|
+
yield proposal.as_workunit()
|
|
171
|
+
else:
|
|
172
|
+
yield MetadataWorkUnit(
|
|
173
|
+
id=MetadataWorkUnit.generate_workunit_id(proposal),
|
|
174
|
+
mcp_raw=proposal,
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
def _handle_assets(
|
|
178
|
+
self, assets: List[str], apply_urn: str, apply_type: str
|
|
179
|
+
) -> Iterable[MetadataWorkUnit]:
|
|
180
|
+
for asset in assets:
|
|
181
|
+
change_proposals = apply_association_to_container(
|
|
182
|
+
asset, apply_urn, apply_type, emit=False, graph=self.graph
|
|
183
|
+
)
|
|
184
|
+
assert change_proposals is not None
|
|
185
|
+
yield from self._yield_workunits(change_proposals)
|
|
186
|
+
|
|
187
|
+
def _yield_domain(self) -> Iterable[MetadataWorkUnit]:
|
|
188
|
+
if not self.config.domain_apply:
|
|
189
|
+
return
|
|
190
|
+
for apply in self.config.domain_apply:
|
|
191
|
+
yield from self._handle_assets(apply.assets, apply.domain_urn, "domain")
|
|
192
|
+
|
|
193
|
+
def _yield_tag(self) -> Iterable[MetadataWorkUnit]:
|
|
194
|
+
if not self.config.tag_apply:
|
|
195
|
+
return
|
|
196
|
+
for apply in self.config.tag_apply:
|
|
197
|
+
yield from self._handle_assets(apply.assets, apply.tag_urn, "tag")
|
|
198
|
+
|
|
199
|
+
def _yield_term(self) -> Iterable[MetadataWorkUnit]:
|
|
200
|
+
if not self.config.term_apply:
|
|
201
|
+
return
|
|
202
|
+
for apply in self.config.term_apply:
|
|
203
|
+
yield from self._handle_assets(apply.assets, apply.term_urn, "term")
|
|
204
|
+
|
|
205
|
+
def _yield_owner(self) -> Iterable[MetadataWorkUnit]:
|
|
206
|
+
if not self.config.owner_apply:
|
|
207
|
+
return
|
|
208
|
+
for apply in self.config.owner_apply:
|
|
209
|
+
yield from self._handle_assets(apply.assets, apply.owner_urn, "owner")
|
|
210
|
+
|
|
211
|
+
def get_workunits_internal(
|
|
212
|
+
self,
|
|
213
|
+
) -> Iterable[MetadataWorkUnit]:
|
|
214
|
+
yield from self._yield_domain()
|
|
215
|
+
yield from self._yield_tag()
|
|
216
|
+
yield from self._yield_term()
|
|
217
|
+
yield from self._yield_owner()
|
|
218
|
+
|
|
219
|
+
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
220
|
+
return [partial(auto_workunit_reporter, self.get_report())]
|
|
221
|
+
|
|
222
|
+
def get_report(self) -> SourceReport:
|
|
223
|
+
return self.report
|
|
@@ -177,7 +177,9 @@ def _get_generic_definition(
|
|
|
177
177
|
class LookerConnectionDefinition(ConfigModel):
|
|
178
178
|
platform: str
|
|
179
179
|
default_db: str
|
|
180
|
-
default_schema: Optional[str]
|
|
180
|
+
default_schema: Optional[str] = (
|
|
181
|
+
None # Optional since some sources are two-level only
|
|
182
|
+
)
|
|
181
183
|
platform_instance: Optional[str] = None
|
|
182
184
|
platform_env: Optional[str] = Field(
|
|
183
185
|
default=None,
|
|
@@ -32,6 +32,12 @@ class LookerField:
|
|
|
32
32
|
sql: Optional[str]
|
|
33
33
|
|
|
34
34
|
|
|
35
|
+
@dataclass
|
|
36
|
+
class LookerConstant:
|
|
37
|
+
name: str
|
|
38
|
+
value: str
|
|
39
|
+
|
|
40
|
+
|
|
35
41
|
@dataclass
|
|
36
42
|
class LookerModel:
|
|
37
43
|
connection: str
|
|
@@ -75,6 +81,7 @@ class LookerModel:
|
|
|
75
81
|
try:
|
|
76
82
|
parsed = load_and_preprocess_file(
|
|
77
83
|
path=included_file,
|
|
84
|
+
reporter=reporter,
|
|
78
85
|
source_config=source_config,
|
|
79
86
|
)
|
|
80
87
|
included_explores = parsed.get("explores", [])
|
|
@@ -217,6 +224,7 @@ class LookerModel:
|
|
|
217
224
|
try:
|
|
218
225
|
parsed = load_and_preprocess_file(
|
|
219
226
|
path=included_file,
|
|
227
|
+
reporter=reporter,
|
|
220
228
|
source_config=source_config,
|
|
221
229
|
)
|
|
222
230
|
seen_so_far.add(included_file)
|
|
@@ -4,7 +4,10 @@ from dataclasses import replace
|
|
|
4
4
|
from typing import Dict, Optional
|
|
5
5
|
|
|
6
6
|
from datahub.ingestion.source.looker.looker_config import LookerConnectionDefinition
|
|
7
|
-
from datahub.ingestion.source.looker.looker_dataclasses import
|
|
7
|
+
from datahub.ingestion.source.looker.looker_dataclasses import (
|
|
8
|
+
LookerConstant,
|
|
9
|
+
LookerViewFile,
|
|
10
|
+
)
|
|
8
11
|
from datahub.ingestion.source.looker.looker_template_language import (
|
|
9
12
|
load_and_preprocess_file,
|
|
10
13
|
)
|
|
@@ -30,12 +33,14 @@ class LookerViewFileLoader:
|
|
|
30
33
|
base_projects_folder: Dict[str, pathlib.Path],
|
|
31
34
|
reporter: LookMLSourceReport,
|
|
32
35
|
source_config: LookMLSourceConfig,
|
|
36
|
+
manifest_constants: Dict[str, LookerConstant] = {},
|
|
33
37
|
) -> None:
|
|
34
38
|
self.viewfile_cache: Dict[str, Optional[LookerViewFile]] = {}
|
|
35
39
|
self._root_project_name = root_project_name
|
|
36
40
|
self._base_projects_folder = base_projects_folder
|
|
37
41
|
self.reporter = reporter
|
|
38
42
|
self.source_config = source_config
|
|
43
|
+
self.manifest_constants = manifest_constants
|
|
39
44
|
|
|
40
45
|
def _load_viewfile(
|
|
41
46
|
self, project_name: str, path: str, reporter: LookMLSourceReport
|
|
@@ -60,7 +65,7 @@ class LookerViewFileLoader:
|
|
|
60
65
|
with open(path) as file:
|
|
61
66
|
raw_file_content = file.read()
|
|
62
67
|
except Exception as e:
|
|
63
|
-
self.reporter.
|
|
68
|
+
self.reporter.report_warning(
|
|
64
69
|
title="LKML File Loading Error",
|
|
65
70
|
message="A lookml file is not present on local storage or GitHub",
|
|
66
71
|
context=f"file path: {path}",
|
|
@@ -71,9 +76,15 @@ class LookerViewFileLoader:
|
|
|
71
76
|
try:
|
|
72
77
|
logger.debug(f"Loading viewfile {path}")
|
|
73
78
|
|
|
79
|
+
# load_and preprocess_file is called multiple times for loading view file from multiple flows.
|
|
80
|
+
# Flag resolve_constants is a hack to avoid passing around manifest_constants from all of the flows.
|
|
81
|
+
# This is fine as rest of flows do not need resolution of constants.
|
|
74
82
|
parsed = load_and_preprocess_file(
|
|
75
83
|
path=path,
|
|
84
|
+
reporter=self.reporter,
|
|
76
85
|
source_config=self.source_config,
|
|
86
|
+
resolve_constants=True,
|
|
87
|
+
manifest_constants=self.manifest_constants,
|
|
77
88
|
)
|
|
78
89
|
|
|
79
90
|
looker_viewfile = LookerViewFile.from_looker_dict(
|
|
@@ -90,7 +101,7 @@ class LookerViewFileLoader:
|
|
|
90
101
|
self.viewfile_cache[path] = looker_viewfile
|
|
91
102
|
return looker_viewfile
|
|
92
103
|
except Exception as e:
|
|
93
|
-
self.reporter.
|
|
104
|
+
self.reporter.report_warning(
|
|
94
105
|
title="LKML File Parsing Error",
|
|
95
106
|
message="The input file is not lookml file",
|
|
96
107
|
context=f"file path: {path}",
|