acryl-datahub 0.15.0.5rc4__py3-none-any.whl → 0.15.0.5rc6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
- datahub/_version.py,sha256=WfLwqZVZ8jnBAUkc1mxyo_REl_AjCDcwm9R97SuldaY,324
3
+ datahub/_version.py,sha256=w4julQ8A-97paLIXUIUhvcioGTJP8CcU4tW-IaJw3zY,324
4
4
  datahub/entrypoints.py,sha256=osv2ailvuW-HHlAE0fOtyblJI1X7HInZutd9DC66jqQ,8022
5
5
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
6
  datahub/_codegen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -62,7 +62,7 @@ datahub/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
62
62
  datahub/cli/check_cli.py,sha256=ajrWVMAHYbgvYi4OFitFXx7Y6oigvZFgIeUiKV9ECik,12859
63
63
  datahub/cli/cli_utils.py,sha256=2uvPv6WqxbRdH7UteHwhRash4E0ncU5P6XebrFLeECo,13584
64
64
  datahub/cli/config_utils.py,sha256=yuXw7RzpRY5x_-MAoqWbv46qUkIeRNAJL4_OeJpYdBE,4879
65
- datahub/cli/container_cli.py,sha256=8D73hLfTHsDg4Cedh_2x0utl7ppOeB1TUJVRgur-Crw,3624
65
+ datahub/cli/container_cli.py,sha256=uDOwewGEPYHQt-ppYEb8ESXhZjPNIZG0Rt3cm2FzPqc,1569
66
66
  datahub/cli/delete_cli.py,sha256=oQ4Yy6hxZHcl67MYJiQumLs_8QmFEj7SPZFzxFXvDk8,23481
67
67
  datahub/cli/docker_check.py,sha256=rED4wHXqxcQ_qNFyIgFEZ85BHT9ZTE5YC-oUKqbRqi0,9432
68
68
  datahub/cli/docker_cli.py,sha256=w9ZQMRVlHwfJI2XDe7mO0lwnT7-dZoK6tPadSMgwEM8,36493
@@ -113,6 +113,7 @@ datahub/configuration/validate_multiline_string.py,sha256=l9PF6_EAC_1lWxU_RWrvPB
113
113
  datahub/configuration/yaml.py,sha256=dLmjCalPOjgdc7mmJxtlP7uOrIHZiAWxD1gwAFOdtUU,308
114
114
  datahub/emitter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
115
115
  datahub/emitter/aspect.py,sha256=ef0DVycqg-tRPurkYjc-5zknmLP2p2Y2RxP55WkvAEc,480
116
+ datahub/emitter/composite_emitter.py,sha256=ZU-IdlAXKGPtmyT0JJgYC09vRn-TmeNaA6VP8V0fioM,1212
116
117
  datahub/emitter/enum_helpers.py,sha256=QBOEUu_hDCvyL_v4ayNQV8XwJbf5zKyu0Xat0mI1Kgo,376
117
118
  datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvViN-fm8,828
118
119
  datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
@@ -217,6 +218,8 @@ datahub/ingestion/source/abs/datalake_profiler_config.py,sha256=Rkf64evufyVGPiE4
217
218
  datahub/ingestion/source/abs/profiling.py,sha256=yKNCKpr6w7qpCH-baeSkNE9VjkN6eBot_weD-2_Jxzk,17579
218
219
  datahub/ingestion/source/abs/report.py,sha256=fzkTdTewYlWrTk4f2Cyl-e8RV4qw9wEVtm0cdKD-Xgw,542
219
220
  datahub/ingestion/source/abs/source.py,sha256=cuMezUzr-Smp5tok2ceYor5I5jp52NDMjfeN8kfIbvg,24816
221
+ datahub/ingestion/source/apply/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
222
+ datahub/ingestion/source/apply/datahub_apply.py,sha256=NGhbszi6ObfJoh3YPGjcVWHJsbNboVwKUgnrCSt8HJU,7629
220
223
  datahub/ingestion/source/aws/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
221
224
  datahub/ingestion/source/aws/aws_common.py,sha256=DfdQgkJ_s2isFx8WvqKTlAcBk4KE8SgfpmA5BgC3fgY,17716
222
225
  datahub/ingestion/source/aws/glue.py,sha256=DwROr923M01QnvImUbMoHS6TTTT9kBz2tEmQ3Sv4EoY,58019
@@ -333,22 +336,22 @@ datahub/ingestion/source/kafka_connect/source_connectors.py,sha256=-rFNXKD8_EFoX
333
336
  datahub/ingestion/source/looker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
334
337
  datahub/ingestion/source/looker/lkml_patched.py,sha256=XShEU7Wbz0DubDhYMjKf9wjKZrBJa2XPg9MIjp8rPhk,733
335
338
  datahub/ingestion/source/looker/looker_common.py,sha256=squUUBHxsLeT5xbZOTO66irtOB8fL0V4Q8Tgd9EJMYU,62067
336
- datahub/ingestion/source/looker/looker_config.py,sha256=Kp-IoGcjfk5yPoqXFbsjo6aLA1j0H9LFEzZwhoGNvy4,13584
339
+ datahub/ingestion/source/looker/looker_config.py,sha256=eVKw1nn9D8hUFdRfNyT3MtzL8w-zWhFeokiwSnNKQuc,13607
337
340
  datahub/ingestion/source/looker/looker_connection.py,sha256=yDmC6lDsHmL2e_Pw8ULylwOIHPWPp_6gT1iyLvD0fTw,2075
338
341
  datahub/ingestion/source/looker/looker_constant.py,sha256=GMKYtNXlpojPxa9azridKfcGLSJwKdUCTesp7U8dIrQ,402
339
- datahub/ingestion/source/looker/looker_dataclasses.py,sha256=LjrP5m_A4HV-XeFlSNGVYNuyF0ulxp_qwB82Ss4Iycs,12200
340
- datahub/ingestion/source/looker/looker_file_loader.py,sha256=c1ewDrIb9VJg1o-asbwX9gL83kgL01vIETzzbmZIhmw,4267
342
+ datahub/ingestion/source/looker/looker_dataclasses.py,sha256=MrDeZ4Nd0wQnJbCoI1qePYlYeObnUw5dvpWcmhKuNgc,12346
343
+ datahub/ingestion/source/looker/looker_file_loader.py,sha256=PEyL9KWRaFcrvOkapU8wSNlFbmetmBy9tAyCgeVDOa4,4864
341
344
  datahub/ingestion/source/looker/looker_lib_wrapper.py,sha256=0gaYjBv4wkbbLWVgvaAV6JyWAFb0utTG6TCve2d9xss,11511
342
345
  datahub/ingestion/source/looker/looker_liquid_tag.py,sha256=mO4G4MNA4YZFvZaDBpdiJ2vP3irC82kY34RdaK4Pbfs,3100
343
346
  datahub/ingestion/source/looker/looker_query_model.py,sha256=N0jBbFruiCIIGT6sJn6tNeppeQ78KGTkOwTLirhxFNc,2144
344
347
  datahub/ingestion/source/looker/looker_source.py,sha256=S-g06Bm3sbyD0Qjra9hEhZmsVDb-BY_-bCPDwCjtEoQ,66427
345
- datahub/ingestion/source/looker/looker_template_language.py,sha256=mfbU27NYs0mkZHXdtvS38FC5WCJ4S_aGjC8t09yecKY,14330
348
+ datahub/ingestion/source/looker/looker_template_language.py,sha256=W-SMICKBfIuivrHywHRYchz9SJiXhoU8VOEKGQW_1v8,17825
346
349
  datahub/ingestion/source/looker/looker_usage.py,sha256=qFBX7OHtIcarYIqFe0jQMrDV8MMPV_nN4PZrZRUznTw,23029
347
350
  datahub/ingestion/source/looker/looker_view_id_cache.py,sha256=92gDy6NONhJYBp92z_IBzDVZvezmUIkaBCZY1bdk6mE,4392
348
351
  datahub/ingestion/source/looker/lookml_concept_context.py,sha256=eDaze9S7cgO5eFP7-0azUMEJyR3EfMjmfj5pMPjpm8c,18066
349
- datahub/ingestion/source/looker/lookml_config.py,sha256=Ub5Efgzb1bDId5nNcUhcZKEm2hp273wF5edip283U2g,10775
352
+ datahub/ingestion/source/looker/lookml_config.py,sha256=RuZkH3DDmII21gEsUvPsJi5gxWngbYkqBP06H8_n_Hs,11353
350
353
  datahub/ingestion/source/looker/lookml_refinement.py,sha256=MkVreI0BylaCFyDHihDHaCcXyDSP84eF9p1h5d-ZHnM,9504
351
- datahub/ingestion/source/looker/lookml_source.py,sha256=qkLVzs5AHSA5B518s7fCD99KKHh6_8-QR8KM0mySnFg,42757
354
+ datahub/ingestion/source/looker/lookml_source.py,sha256=PJBUJgZfZyvmasDf_LJC39SggLCA6vSfAbf1PdzviZU,43889
352
355
  datahub/ingestion/source/looker/str_functions.py,sha256=zceEX2ka_4WaWwWgEdyknUSz7X3GrO951BkwSbF2afo,766
353
356
  datahub/ingestion/source/looker/urn_functions.py,sha256=4VvqEfGvIMq3rNHHps0-HlPurMPnpqdxNtDAOOHIZww,528
354
357
  datahub/ingestion/source/looker/view_upstream.py,sha256=4FCjZaU6p2G7npB2RJpP4Gv2yLjbvbsYWEbAg55IvjY,26110
@@ -446,7 +449,7 @@ datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYh
446
449
  datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=TN_cTF4a8V_tbeR2czm_hoMYfQMuqmBbFlAqyh7PJzQ,6551
447
450
  datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=ySFm7WDk8FW9KjCnX4HQfTqObIrlUS-V8WIHl3j0CTI,24848
448
451
  datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=xq58c47zmaQPkTVqjKW25iViX8VJuHdQDTFY4jxzZ2o,12778
449
- datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=8nnQ_XlbT4q6y6_JleSV67njUaPjF9x1yehKu5I1lgc,32072
452
+ datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=vyTqC_C5Bf0AMRVyoxUfl1CdlgeQouX20msP2FsMqnk,33439
450
453
  datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
451
454
  datahub/ingestion/source/sql/athena.py,sha256=Uh9wGLOqAkcphffxOPIQNyXvjeRm74XIpaLb4rjqMjM,24045
452
455
  datahub/ingestion/source/sql/clickhouse.py,sha256=uSRy-HKAiGFTHVLoVtGoh23X0O1lwyYUaK8BaWkYhps,25555
@@ -993,9 +996,9 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
993
996
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
994
997
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
995
998
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
996
- acryl_datahub-0.15.0.5rc4.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
997
- acryl_datahub-0.15.0.5rc4.dist-info/METADATA,sha256=5_fFeSDo0RY3z5NeouX22pyoKVy8iM7I7a8KyYQz4Xg,173382
998
- acryl_datahub-0.15.0.5rc4.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
999
- acryl_datahub-0.15.0.5rc4.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
1000
- acryl_datahub-0.15.0.5rc4.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1001
- acryl_datahub-0.15.0.5rc4.dist-info/RECORD,,
999
+ acryl_datahub-0.15.0.5rc6.dist-info/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1000
+ acryl_datahub-0.15.0.5rc6.dist-info/METADATA,sha256=EYc5u1TJYgxZjmer0MNvW4TpmUkIwDFwJ1qTmQsrkxQ,173382
1001
+ acryl_datahub-0.15.0.5rc6.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
1002
+ acryl_datahub-0.15.0.5rc6.dist-info/entry_points.txt,sha256=U1e5ZwqPX1OaIbvGrwvozcdB8SbzFYXQM7plpdLKKeo,9592
1003
+ acryl_datahub-0.15.0.5rc6.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1004
+ acryl_datahub-0.15.0.5rc6.dist-info/RECORD,,
@@ -34,6 +34,7 @@ clickhouse-usage = datahub.ingestion.source.usage.clickhouse_usage:ClickHouseUsa
34
34
  cockroachdb = datahub.ingestion.source.sql.cockroachdb:CockroachDBSource
35
35
  csv-enricher = datahub.ingestion.source.csv_enricher:CSVEnricherSource
36
36
  datahub = datahub.ingestion.source.datahub.datahub_source:DataHubSource
37
+ datahub-apply = datahub.ingestion.source.apply.datahub_apply:DataHubApplySource
37
38
  datahub-business-glossary = datahub.ingestion.source.metadata.business_glossary:BusinessGlossaryFileSource
38
39
  datahub-gc = datahub.ingestion.source.gc.datahub_gc:DataHubGcSource
39
40
  datahub-lineage-file = datahub.ingestion.source.metadata.lineage:LineageFileSource
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "0.15.0.5rc4"
3
+ __version__ = "0.15.0.5rc6"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -1,19 +1,8 @@
1
1
  import logging
2
- from typing import Any, List
3
2
 
4
3
  import click
5
- import progressbar
6
4
 
7
- from datahub.emitter.mcp import MetadataChangeProposalWrapper
8
- from datahub.ingestion.graph.client import get_default_graph
9
- from datahub.metadata.schema_classes import (
10
- DomainsClass,
11
- GlossaryTermAssociationClass,
12
- OwnerClass,
13
- OwnershipTypeClass,
14
- TagAssociationClass,
15
- )
16
- from datahub.specific.dataset import DatasetPatchBuilder
5
+ from datahub.ingestion.source.apply.datahub_apply import apply_association_to_container
17
6
 
18
7
  logger = logging.getLogger(__name__)
19
8
 
@@ -24,58 +13,6 @@ def container() -> None:
24
13
  pass
25
14
 
26
15
 
27
- def apply_association_to_container(
28
- container_urn: str,
29
- association_urn: str,
30
- association_type: str,
31
- ) -> None:
32
- """
33
- Common function to add either tags, terms, domains, or owners to child datasets (for now).
34
-
35
- Args:
36
- container_urn: The URN of the container
37
- association_urn: The URN of the tag, term, or user to apply
38
- association_type: One of 'tag', 'term', 'domain' or 'owner'
39
- """
40
- urns: List[str] = []
41
- graph = get_default_graph()
42
- logger.info(f"Using {graph}")
43
- urns.extend(
44
- graph.get_urns_by_filter(
45
- container=container_urn, batch_size=1000, entity_types=["dataset"]
46
- )
47
- )
48
-
49
- all_patches: List[Any] = []
50
- for urn in urns:
51
- builder = DatasetPatchBuilder(urn)
52
- patches: List[Any] = []
53
- if association_type == "tag":
54
- patches = builder.add_tag(TagAssociationClass(association_urn)).build()
55
- elif association_type == "term":
56
- patches = builder.add_term(
57
- GlossaryTermAssociationClass(association_urn)
58
- ).build()
59
- elif association_type == "owner":
60
- patches = builder.add_owner(
61
- OwnerClass(
62
- owner=association_urn,
63
- type=OwnershipTypeClass.TECHNICAL_OWNER,
64
- )
65
- ).build()
66
- elif association_type == "domain":
67
- patches = [
68
- MetadataChangeProposalWrapper(
69
- entityUrn=urn,
70
- aspect=DomainsClass(domains=[association_urn]),
71
- )
72
- ]
73
- all_patches.extend(patches)
74
- mcps_iter = progressbar.progressbar(all_patches, redirect_stdout=True)
75
- for mcp in mcps_iter:
76
- graph.emit(mcp)
77
-
78
-
79
16
  @container.command()
80
17
  @click.option("--container-urn", required=True, type=str)
81
18
  @click.option("--tag-urn", required=True, type=str)
@@ -0,0 +1,36 @@
1
+ from typing import Callable, List, Optional, Union
2
+
3
+ from datahub.emitter.generic_emitter import Emitter
4
+ from datahub.emitter.mcp import MetadataChangeProposalWrapper
5
+ from datahub.metadata.com.linkedin.pegasus2avro.mxe import (
6
+ MetadataChangeEvent,
7
+ MetadataChangeProposal,
8
+ )
9
+
10
+
11
+ # Experimental composite emitter that allows multiple emitters to be used in a single ingestion job
12
+ class CompositeEmitter(Emitter):
13
+ def __init__(self, emitters: List[Emitter]) -> None:
14
+ self.emitters = emitters
15
+
16
+ def emit(
17
+ self,
18
+ item: Union[
19
+ MetadataChangeEvent,
20
+ MetadataChangeProposal,
21
+ MetadataChangeProposalWrapper,
22
+ ],
23
+ callback: Optional[Callable[[Exception, str], None]] = None,
24
+ ) -> None:
25
+ callback_called = False
26
+ for emitter in self.emitters:
27
+ if not callback_called:
28
+ # We want to ensure that the callback is only called once and we tie it to the first emitter
29
+ emitter.emit(item, callback)
30
+ callback_called = True
31
+ else:
32
+ emitter.emit(item)
33
+
34
+ def flush(self) -> None:
35
+ for emitter in self.emitters:
36
+ emitter.flush()
File without changes
@@ -0,0 +1,223 @@
1
+ import logging
2
+ from functools import partial
3
+ from typing import Any, Iterable, List, Optional, Union
4
+
5
+ import progressbar
6
+ from pydantic import Field
7
+
8
+ from datahub.configuration.common import ConfigModel
9
+ from datahub.emitter.mcp import MetadataChangeProposalWrapper
10
+ from datahub.ingestion.api.common import PipelineContext
11
+ from datahub.ingestion.api.decorators import (
12
+ SupportStatus,
13
+ config_class,
14
+ platform_name,
15
+ support_status,
16
+ )
17
+ from datahub.ingestion.api.source import MetadataWorkUnitProcessor, Source, SourceReport
18
+ from datahub.ingestion.api.source_helpers import auto_workunit_reporter
19
+ from datahub.ingestion.api.workunit import MetadataWorkUnit
20
+ from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
21
+ from datahub.metadata.schema_classes import (
22
+ DomainsClass,
23
+ GlossaryTermAssociationClass,
24
+ MetadataChangeProposalClass,
25
+ OwnerClass,
26
+ OwnershipTypeClass,
27
+ TagAssociationClass,
28
+ )
29
+ from datahub.specific.dataset import DatasetPatchBuilder
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+
34
+ def apply_association_to_container(
35
+ container_urn: str,
36
+ association_urn: str,
37
+ association_type: str,
38
+ emit: bool = True,
39
+ graph: Optional[DataHubGraph] = None,
40
+ ) -> Optional[List[Union[MetadataChangeProposalWrapper, MetadataChangeProposalClass]]]:
41
+ """
42
+ Common function to add either tags, terms, domains, or owners to child datasets (for now).
43
+
44
+ Args:
45
+ container_urn: The URN of the container
46
+ association_urn: The URN of the tag, term, or user to apply
47
+ association_type: One of 'tag', 'term', 'domain' or 'owner'
48
+ """
49
+ urns: List[str] = [container_urn]
50
+ if not graph:
51
+ graph = get_default_graph()
52
+ logger.info(f"Using {graph}")
53
+ urns.extend(
54
+ graph.get_urns_by_filter(
55
+ container=container_urn,
56
+ batch_size=1000,
57
+ entity_types=["dataset", "container"],
58
+ )
59
+ )
60
+
61
+ all_patches: List[Any] = []
62
+ for urn in urns:
63
+ builder = DatasetPatchBuilder(urn)
64
+ patches: List[Any] = []
65
+ if association_type == "tag":
66
+ patches = builder.add_tag(TagAssociationClass(association_urn)).build()
67
+ elif association_type == "term":
68
+ patches = builder.add_term(
69
+ GlossaryTermAssociationClass(association_urn)
70
+ ).build()
71
+ elif association_type == "owner":
72
+ patches = builder.add_owner(
73
+ OwnerClass(
74
+ owner=association_urn,
75
+ type=OwnershipTypeClass.TECHNICAL_OWNER,
76
+ )
77
+ ).build()
78
+ elif association_type == "domain":
79
+ patches = [
80
+ MetadataChangeProposalWrapper(
81
+ entityUrn=urn,
82
+ aspect=DomainsClass(domains=[association_urn]),
83
+ )
84
+ ]
85
+ all_patches.extend(patches)
86
+ if emit:
87
+ mcps_iter = progressbar.progressbar(all_patches, redirect_stdout=True)
88
+ for mcp in mcps_iter:
89
+ graph.emit(mcp)
90
+ return None
91
+ else:
92
+ return all_patches
93
+
94
+
95
+ class DomainApplyConfig(ConfigModel):
96
+ assets: List[str] = Field(
97
+ default_factory=list,
98
+ description="List of assets to apply domain hierarchichaly. Currently only containers and datasets are supported",
99
+ )
100
+ domain_urn: str = Field(default="")
101
+
102
+
103
+ class TagApplyConfig(ConfigModel):
104
+ assets: List[str] = Field(
105
+ default_factory=list,
106
+ description="List of assets to apply tag hierarchichaly. Currently only containers and datasets are supported",
107
+ )
108
+ tag_urn: str = Field(default="")
109
+
110
+
111
+ class TermApplyConfig(ConfigModel):
112
+ assets: List[str] = Field(
113
+ default_factory=list,
114
+ description="List of assets to apply term hierarchichaly. Currently only containers and datasets are supported",
115
+ )
116
+ term_urn: str = Field(default="")
117
+
118
+
119
+ class OwnerApplyConfig(ConfigModel):
120
+ assets: List[str] = Field(
121
+ default_factory=list,
122
+ description="List of assets to apply owner hierarchichaly. Currently only containers and datasets are supported",
123
+ )
124
+ owner_urn: str = Field(default="")
125
+
126
+
127
+ class DataHubApplyConfig(ConfigModel):
128
+ domain_apply: Optional[List[DomainApplyConfig]] = Field(
129
+ default=None,
130
+ description="List to apply domains to assets",
131
+ )
132
+ tag_apply: Optional[List[TagApplyConfig]] = Field(
133
+ default=None,
134
+ description="List to apply tags to assets",
135
+ )
136
+ term_apply: Optional[List[TermApplyConfig]] = Field(
137
+ default=None,
138
+ description="List to apply terms to assets",
139
+ )
140
+ owner_apply: Optional[List[OwnerApplyConfig]] = Field(
141
+ default=None,
142
+ description="List to apply owners to assets",
143
+ )
144
+
145
+
146
+ @platform_name("DataHubApply")
147
+ @config_class(DataHubApplyConfig)
148
+ @support_status(SupportStatus.TESTING)
149
+ class DataHubApplySource(Source):
150
+ """
151
+ This source is a helper over CLI
152
+ so people can use the helper to apply various metadata changes to DataHub
153
+ via Managed Ingestion
154
+ """
155
+
156
+ def __init__(self, ctx: PipelineContext, config: DataHubApplyConfig):
157
+ self.ctx = ctx
158
+ self.config = config
159
+ self.report = SourceReport()
160
+ self.graph = ctx.require_graph()
161
+
162
+ def _yield_workunits(
163
+ self,
164
+ proposals: List[
165
+ Union[MetadataChangeProposalWrapper, MetadataChangeProposalClass]
166
+ ],
167
+ ) -> Iterable[MetadataWorkUnit]:
168
+ for proposal in proposals:
169
+ if isinstance(proposal, MetadataChangeProposalWrapper):
170
+ yield proposal.as_workunit()
171
+ else:
172
+ yield MetadataWorkUnit(
173
+ id=MetadataWorkUnit.generate_workunit_id(proposal),
174
+ mcp_raw=proposal,
175
+ )
176
+
177
+ def _handle_assets(
178
+ self, assets: List[str], apply_urn: str, apply_type: str
179
+ ) -> Iterable[MetadataWorkUnit]:
180
+ for asset in assets:
181
+ change_proposals = apply_association_to_container(
182
+ asset, apply_urn, apply_type, emit=False, graph=self.graph
183
+ )
184
+ assert change_proposals is not None
185
+ yield from self._yield_workunits(change_proposals)
186
+
187
+ def _yield_domain(self) -> Iterable[MetadataWorkUnit]:
188
+ if not self.config.domain_apply:
189
+ return
190
+ for apply in self.config.domain_apply:
191
+ yield from self._handle_assets(apply.assets, apply.domain_urn, "domain")
192
+
193
+ def _yield_tag(self) -> Iterable[MetadataWorkUnit]:
194
+ if not self.config.tag_apply:
195
+ return
196
+ for apply in self.config.tag_apply:
197
+ yield from self._handle_assets(apply.assets, apply.tag_urn, "tag")
198
+
199
+ def _yield_term(self) -> Iterable[MetadataWorkUnit]:
200
+ if not self.config.term_apply:
201
+ return
202
+ for apply in self.config.term_apply:
203
+ yield from self._handle_assets(apply.assets, apply.term_urn, "term")
204
+
205
+ def _yield_owner(self) -> Iterable[MetadataWorkUnit]:
206
+ if not self.config.owner_apply:
207
+ return
208
+ for apply in self.config.owner_apply:
209
+ yield from self._handle_assets(apply.assets, apply.owner_urn, "owner")
210
+
211
+ def get_workunits_internal(
212
+ self,
213
+ ) -> Iterable[MetadataWorkUnit]:
214
+ yield from self._yield_domain()
215
+ yield from self._yield_tag()
216
+ yield from self._yield_term()
217
+ yield from self._yield_owner()
218
+
219
+ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
220
+ return [partial(auto_workunit_reporter, self.get_report())]
221
+
222
+ def get_report(self) -> SourceReport:
223
+ return self.report
@@ -177,7 +177,9 @@ def _get_generic_definition(
177
177
  class LookerConnectionDefinition(ConfigModel):
178
178
  platform: str
179
179
  default_db: str
180
- default_schema: Optional[str] # Optional since some sources are two-level only
180
+ default_schema: Optional[str] = (
181
+ None # Optional since some sources are two-level only
182
+ )
181
183
  platform_instance: Optional[str] = None
182
184
  platform_env: Optional[str] = Field(
183
185
  default=None,
@@ -32,6 +32,12 @@ class LookerField:
32
32
  sql: Optional[str]
33
33
 
34
34
 
35
+ @dataclass
36
+ class LookerConstant:
37
+ name: str
38
+ value: str
39
+
40
+
35
41
  @dataclass
36
42
  class LookerModel:
37
43
  connection: str
@@ -75,6 +81,7 @@ class LookerModel:
75
81
  try:
76
82
  parsed = load_and_preprocess_file(
77
83
  path=included_file,
84
+ reporter=reporter,
78
85
  source_config=source_config,
79
86
  )
80
87
  included_explores = parsed.get("explores", [])
@@ -217,6 +224,7 @@ class LookerModel:
217
224
  try:
218
225
  parsed = load_and_preprocess_file(
219
226
  path=included_file,
227
+ reporter=reporter,
220
228
  source_config=source_config,
221
229
  )
222
230
  seen_so_far.add(included_file)
@@ -4,7 +4,10 @@ from dataclasses import replace
4
4
  from typing import Dict, Optional
5
5
 
6
6
  from datahub.ingestion.source.looker.looker_config import LookerConnectionDefinition
7
- from datahub.ingestion.source.looker.looker_dataclasses import LookerViewFile
7
+ from datahub.ingestion.source.looker.looker_dataclasses import (
8
+ LookerConstant,
9
+ LookerViewFile,
10
+ )
8
11
  from datahub.ingestion.source.looker.looker_template_language import (
9
12
  load_and_preprocess_file,
10
13
  )
@@ -30,12 +33,14 @@ class LookerViewFileLoader:
30
33
  base_projects_folder: Dict[str, pathlib.Path],
31
34
  reporter: LookMLSourceReport,
32
35
  source_config: LookMLSourceConfig,
36
+ manifest_constants: Dict[str, LookerConstant] = {},
33
37
  ) -> None:
34
38
  self.viewfile_cache: Dict[str, Optional[LookerViewFile]] = {}
35
39
  self._root_project_name = root_project_name
36
40
  self._base_projects_folder = base_projects_folder
37
41
  self.reporter = reporter
38
42
  self.source_config = source_config
43
+ self.manifest_constants = manifest_constants
39
44
 
40
45
  def _load_viewfile(
41
46
  self, project_name: str, path: str, reporter: LookMLSourceReport
@@ -60,7 +65,7 @@ class LookerViewFileLoader:
60
65
  with open(path) as file:
61
66
  raw_file_content = file.read()
62
67
  except Exception as e:
63
- self.reporter.failure(
68
+ self.reporter.report_warning(
64
69
  title="LKML File Loading Error",
65
70
  message="A lookml file is not present on local storage or GitHub",
66
71
  context=f"file path: {path}",
@@ -71,9 +76,15 @@ class LookerViewFileLoader:
71
76
  try:
72
77
  logger.debug(f"Loading viewfile {path}")
73
78
 
79
+ # load_and preprocess_file is called multiple times for loading view file from multiple flows.
80
+ # Flag resolve_constants is a hack to avoid passing around manifest_constants from all of the flows.
81
+ # This is fine as rest of flows do not need resolution of constants.
74
82
  parsed = load_and_preprocess_file(
75
83
  path=path,
84
+ reporter=self.reporter,
76
85
  source_config=self.source_config,
86
+ resolve_constants=True,
87
+ manifest_constants=self.manifest_constants,
77
88
  )
78
89
 
79
90
  looker_viewfile = LookerViewFile.from_looker_dict(
@@ -90,7 +101,7 @@ class LookerViewFileLoader:
90
101
  self.viewfile_cache[path] = looker_viewfile
91
102
  return looker_viewfile
92
103
  except Exception as e:
93
- self.reporter.failure(
104
+ self.reporter.report_warning(
94
105
  title="LKML File Parsing Error",
95
106
  message="The input file is not lookml file",
96
107
  context=f"file path: {path}",