acryl-datahub 1.3.1.1rc1__py3-none-any.whl → 1.3.1.1rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,7 +1,7 @@
1
- acryl_datahub-1.3.1.1rc1.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.3.1.1rc3.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=EeAHlBCqg0RG-oXfYfDsP4a2hg6rh7Jnhdkg-lWY3Es,323
4
+ datahub/_version.py,sha256=m-yUiaArabPX6tKfFCNQY2lBFEsMvc1WrBPy2LEU7gw,323
5
5
  datahub/entrypoints.py,sha256=VcbU6Z47b_JKW1zI-WJMYIngm05FSogKLiuvFNtyNcI,9088
6
6
  datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -323,7 +323,7 @@ datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py,sha256=MQk8BAHL
323
323
  datahub/ingestion/source/dremio/dremio_entities.py,sha256=e5NgsMMMiyNAEWeJX3iIasY-i8FL9bTozLyWCIFjrUU,16339
324
324
  datahub/ingestion/source/dremio/dremio_profiling.py,sha256=fxNlIHjZx2wrpU_zoGa89yvckgC99hj5Ki_Cnh3KRIw,12742
325
325
  datahub/ingestion/source/dremio/dremio_reporting.py,sha256=UEj-6FMdIWsry5535_kM2hLze5aPRMatTwvI0Bd2BSo,2140
326
- datahub/ingestion/source/dremio/dremio_source.py,sha256=iESzBbPLzSNBjNhkoeY0nlchgF13W8UthFc-3oCjAd8,25478
326
+ datahub/ingestion/source/dremio/dremio_source.py,sha256=LANQSJwHhgYVVvitIH72vn1GOTARBYvroRofGPZuu00,25658
327
327
  datahub/ingestion/source/dremio/dremio_sql_queries.py,sha256=wA1hqKk9cKMJDyEdZRQcDDLZPGYwuNqrvleUHTkWgrQ,10508
328
328
  datahub/ingestion/source/dynamodb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
329
329
  datahub/ingestion/source/dynamodb/data_reader.py,sha256=vC77KpcP8LJN0g8wsPRDVw4sebv0ZWIP3tJkEIHaomA,3120
@@ -572,18 +572,19 @@ datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=wsVD0SkGUwb-H9
572
572
  datahub/ingestion/source/tableau/tableau_validation.py,sha256=Hjbfc1AMIkGgzo5ffWXtNRjrxSxzHvw7-dYZDt4d3WE,1819
573
573
  datahub/ingestion/source/unity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
574
574
  datahub/ingestion/source/unity/analyze_profiler.py,sha256=2pqkFY30CfN4aHgFZZntjeG0hNhBytZJvXC13VfTc1I,4689
575
- datahub/ingestion/source/unity/config.py,sha256=9vUpkuGOzUaatY3pf2U3Bl-GHkeBhbciUA2k3VYv50I,19912
576
- datahub/ingestion/source/unity/connection.py,sha256=iCsQhZ1vxzv1qQKTl_sFUZdmBLLIrNdu2X2V8hT7IGI,2441
577
- datahub/ingestion/source/unity/connection_test.py,sha256=Dwpz4AIc6ZDwq6pWmRCSCuDUgNjPP_bVAVJumgAAS4w,2661
575
+ datahub/ingestion/source/unity/azure_auth_config.py,sha256=JZKrncqGvXlNP3e8ukYNPIoA2zwEDmjTtnGVc7brgvE,626
576
+ datahub/ingestion/source/unity/config.py,sha256=IHlwcRplXf8BR3yPHounYy2Ua_5L2pen3eMhTCt6I_w,20586
577
+ datahub/ingestion/source/unity/connection.py,sha256=K56rwh1LcCVKOmh3YO43Gn7tPburXNRWQQ1sl_gzgSA,2669
578
+ datahub/ingestion/source/unity/connection_test.py,sha256=uAoDUOBPTFQ-1m0lQW3AZ2pXMVaIGkook3oR3BIQyn8,2683
578
579
  datahub/ingestion/source/unity/ge_profiler.py,sha256=NBRHZceq-f95iUn7u0h7cgcd9nAc48Aa-lmp_BqE0As,8409
579
580
  datahub/ingestion/source/unity/hive_metastore_proxy.py,sha256=IAWWJjaW0si_UF52Se2D7wmdYRY_afUG4QlVmQu6xaw,15351
580
581
  datahub/ingestion/source/unity/platform_resource_repository.py,sha256=znDmtddQB6B0owcWgOf5DACV2hbe7lseO7hwGszYTYs,541
581
- datahub/ingestion/source/unity/proxy.py,sha256=LclyOQ0rxlCuSaSIo5x8jW4PqMT19Jdg_fde1WbS1Gs,56606
582
+ datahub/ingestion/source/unity/proxy.py,sha256=8IrS5wl1jQiAhFHJEp35F9EYZD5xK6b4MSSEcIovbhk,56898
582
583
  datahub/ingestion/source/unity/proxy_patch.py,sha256=gVYl5Fm_ase0iwBf3yDg7PE3bbTOl92RR-JgofHNkus,12374
583
584
  datahub/ingestion/source/unity/proxy_profiling.py,sha256=WLqvYP6MziaisA4LYL4T_GA-kPt6Xdde7bfaYsjYw40,9663
584
585
  datahub/ingestion/source/unity/proxy_types.py,sha256=3ChQa8tjrXd89o1twB988e_uiJSz0UWne_G1jcBUpJo,13702
585
586
  datahub/ingestion/source/unity/report.py,sha256=wa5ER1hLc-362iWS9MNwDJf_adfAb3tfge_O3wSAaTc,3291
586
- datahub/ingestion/source/unity/source.py,sha256=w3ub6ChKjaeKpRkvnBFRKQ5msm0WXjdJzM7387Nzjak,57405
587
+ datahub/ingestion/source/unity/source.py,sha256=NQKgr5Vk1zXsR_UllHQWd1ATCWUM1SxWlZGYswHZqhU,57526
587
588
  datahub/ingestion/source/unity/tag_entities.py,sha256=-Z-XYc1XhquE-Eoksn9v0o11ZjV9CWz8n6zeXLbzluQ,7275
588
589
  datahub/ingestion/source/unity/usage.py,sha256=5CMQ0uOwuXfRq0rdxQbiHwrfSkvmQ5R2GNLqqZ2dOJw,13351
589
590
  datahub/ingestion/source/usage/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -649,7 +650,7 @@ datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1
649
650
  datahub/lite/lite_util.py,sha256=DOIzuU78oxdyHGV-MjpS_iUzO_zF0oa9KJemFhfBp8U,4555
650
651
  datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
651
652
  datahub/metadata/_internal_schema_classes.py,sha256=PBMgJ92rA0JQlEKPcjcn_eFdjEVPZnapmi8gmHYgheQ,1086866
652
- datahub/metadata/schema.avsc,sha256=-P9qY8bKEj2ilPHKlYErL0eAxnkBALTIs7Rr0hzFRyg,787557
653
+ datahub/metadata/schema.avsc,sha256=mj0wn3jv2XPajNcmXgBxuLdDeTvSYLWP877IJrvtf20,787709
653
654
  datahub/metadata/schema_classes.py,sha256=tPT8iHCak4IsZi_oL0nirbPpI8ETTPTZzapqLRpeKU4,1326
654
655
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
655
656
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
@@ -933,7 +934,7 @@ datahub/metadata/schemas/PostKey.avsc,sha256=MEA-S-RonGrsrpwYEejR6CXRka2DcqP7S7q
933
934
  datahub/metadata/schemas/QuantitativeAnalyses.avsc,sha256=XrGYVoGUh7ZRjAbMORSiqF5Zl4a0IbYDPiQuSJhABZM,960
934
935
  datahub/metadata/schemas/QueryKey.avsc,sha256=VI4oIHvAO7f0lN_7V3QVuBfHcPz31c57XtW6IrlDfxc,518
935
936
  datahub/metadata/schemas/QueryProperties.avsc,sha256=26Q3zzuzJbUCUG7IJ3q_OEdcNbyzloZzDJWPbv3GQAk,5589
936
- datahub/metadata/schemas/QuerySubjects.avsc,sha256=PDM6Ek1AkjwLGDk17Rjg7KVtE2tbgFpc1XTGkhoG4MA,1437
937
+ datahub/metadata/schemas/QuerySubjects.avsc,sha256=EpwpqwQBgTIcPlYDIR-J_4VlQ8Y66xSExMdxJxpctxI,1441
937
938
  datahub/metadata/schemas/QueryUsageStatistics.avsc,sha256=z1gfAnXdBoPEeERi5RESjrdBuS6AcIdqdN5JqWOSuNo,6192
938
939
  datahub/metadata/schemas/RelationshipChangeEvent.avsc,sha256=il7yiTXf9nkZvSyqH8GUu0j2jws_n-QlABXThLJ871c,8288
939
940
  datahub/metadata/schemas/RoleKey.avsc,sha256=Uas5jFViSHXhFqq8D4P6-UXqywOth3coztjQ5wA7wL0,449
@@ -948,7 +949,7 @@ datahub/metadata/schemas/SlackUserInfo.avsc,sha256=IY7InWaiDzJa3hJ9J4W3Eg8EUKuh0
948
949
  datahub/metadata/schemas/SourceCode.avsc,sha256=tUgo2rczO5x1fxw3fYNWQj-51vRNmNIj38b1wayA0aQ,1370
949
950
  datahub/metadata/schemas/Status.avsc,sha256=rPZSXSJdwnNywqNx2qll8cdt54aYgI-YUbRr3GK7h78,522
950
951
  datahub/metadata/schemas/StructuredProperties.avsc,sha256=qe45sKZ9XrLcf15Gt03Ttzt2J_kJYHvN-DAOSErSYuY,7028
951
- datahub/metadata/schemas/StructuredPropertyDefinition.avsc,sha256=TCvF80cW_2uEV2yJZOjAdxVV-Wr-4OsasSAM-5QDoAo,11921
952
+ datahub/metadata/schemas/StructuredPropertyDefinition.avsc,sha256=n7WFG8ugTeBP3lB1KxrMPENw7aS73w8lkzRFcKcmNOk,12059
952
953
  datahub/metadata/schemas/StructuredPropertyKey.avsc,sha256=lp7tQBgeriEU1YMQ6a4-6aUGSWDqNl00lLDym97j1yI,618
953
954
  datahub/metadata/schemas/StructuredPropertySettings.avsc,sha256=oHPbDuV_Wr_zR9vmTwZZKRzeaBx5nyqN1zn0cfIRhuQ,4044
954
955
  datahub/metadata/schemas/SubTypes.avsc,sha256=bhXbzK020zDyQno97Xp05vmoMeZ82IGu2jz7pWDo3RQ,655
@@ -971,8 +972,8 @@ datahub/metadata/schemas/ViewProperties.avsc,sha256=3HhcbH5493dJUnEUtFMYMVfbYQ52
971
972
  datahub/metadata/schemas/__init__.py,sha256=kCcak_fBn_KyuysZTJIoipAzZ8EO44Amk4DWSEvplEY,581
972
973
  datahub/pydantic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
973
974
  datahub/pydantic/compat.py,sha256=TUEo4kSEeOWVAhV6LQtst1phrpVgGtK4uif4OI5vQ2M,1937
974
- datahub/sdk/__init__.py,sha256=66OOcFi7qlnL6q72c_yUX2mWU2HudbOdRsC5CIoDxow,1922
975
- datahub/sdk/_all_entities.py,sha256=eQAmD_fcEHlTShe1_nHpdvHxLDN9njk9bdLnuTrYg8M,905
975
+ datahub/sdk/__init__.py,sha256=Bo7pnilTfV4ucYy0zHHFQCymnNw7_dNuBnwPKowORC0,1954
976
+ datahub/sdk/_all_entities.py,sha256=CllJAJ0gncg10Qm6VCSQTqDOU98P0Yt-gSh8BCZccec,946
976
977
  datahub/sdk/_attribution.py,sha256=0Trh8steVd27GOr9MKCZeawbuDD2_q3GIsZlCtHqEUg,1321
977
978
  datahub/sdk/_shared.py,sha256=tlyxyxgo7x-8sJhUG9VvUiLpxcOP-GIg4ToqPixagbw,34221
978
979
  datahub/sdk/_utils.py,sha256=oXE2BzsXE5zmSkCP3R1tObD4RHnPeH_ps83D_Dw9JaQ,1169
@@ -991,6 +992,7 @@ datahub/sdk/mlmodelgroup.py,sha256=wlZZHny0UORpF0fRYuVkWLSQwIHX_fWl5lPb1NKR6dM,8
991
992
  datahub/sdk/resolver_client.py,sha256=nKMAZJt2tRSGfKSzoREIh43PXqjM3umLiYkYHJjo1io,3243
992
993
  datahub/sdk/search_client.py,sha256=f2L_aOy-pPB9Mk7WdLSJ6Htp3OT4cEgDIrqnCweNtM8,3592
993
994
  datahub/sdk/search_filters.py,sha256=XG-V4V21DWwFDdBFowC0PsZ5I8H2snEfX_-0jFn5zYY,22929
995
+ datahub/sdk/tag.py,sha256=jZHtL3hyiibrCXb0i_q8QsL8eT6dCT4egpqBWrTVe04,3123
994
996
  datahub/secret/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
995
997
  datahub/secret/datahub_secret_store.py,sha256=XjmDElUUEzozw589-NN63LcTArquJhiEzxOKKTP-NcA,2665
996
998
  datahub/secret/datahub_secrets_client.py,sha256=nDmhziKdvseJHlaDVUcAwK8Fv8maeAaG-ktZtWG2b70,1316
@@ -1133,8 +1135,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1133
1135
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1134
1136
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1135
1137
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1136
- acryl_datahub-1.3.1.1rc1.dist-info/METADATA,sha256=rFziskaXXNthb4jDqBkMBVYPZXxgRIpVioW85ZFjUzE,184975
1137
- acryl_datahub-1.3.1.1rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1138
- acryl_datahub-1.3.1.1rc1.dist-info/entry_points.txt,sha256=pzsBoTx-D-iTcmpX8oCGCyzlHP2112EygUMzZWz56M8,10105
1139
- acryl_datahub-1.3.1.1rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1140
- acryl_datahub-1.3.1.1rc1.dist-info/RECORD,,
1138
+ acryl_datahub-1.3.1.1rc3.dist-info/METADATA,sha256=r0p0khmFOz6-qenHWVttyw2qegGG8qriPU--__AHceA,184975
1139
+ acryl_datahub-1.3.1.1rc3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1140
+ acryl_datahub-1.3.1.1rc3.dist-info/entry_points.txt,sha256=pzsBoTx-D-iTcmpX8oCGCyzlHP2112EygUMzZWz56M8,10105
1141
+ acryl_datahub-1.3.1.1rc3.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1142
+ acryl_datahub-1.3.1.1rc3.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.3.1.1rc1"
3
+ __version__ = "1.3.1.1rc3"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -336,10 +336,10 @@ class DremioSource(StatefulIngestionSourceBase):
336
336
  return
337
337
 
338
338
  dataset_urn = make_dataset_urn_with_platform_instance(
339
- platform=self.get_platform(),
340
- name=dataset_name,
341
- platform_instance=self.config.platform_instance,
339
+ platform=make_data_platform_urn(self.get_platform()),
340
+ name=f"dremio.{dataset_name}",
342
341
  env=self.config.env,
342
+ platform_instance=self.config.platform_instance,
343
343
  )
344
344
 
345
345
  for dremio_mcp in self.dremio_aspects.populate_dataset_mcp(
@@ -419,10 +419,10 @@ class DremioSource(StatefulIngestionSourceBase):
419
419
  schema_str = ".".join(dataset_info.path)
420
420
  dataset_name = f"{schema_str}.{dataset_info.resource_name}".lower()
421
421
  dataset_urn = make_dataset_urn_with_platform_instance(
422
- platform=self.get_platform(),
423
- name=dataset_name,
424
- platform_instance=self.config.platform_instance,
422
+ platform=make_data_platform_urn(self.get_platform()),
423
+ name=f"dremio.{dataset_name}",
425
424
  env=self.config.env,
425
+ platform_instance=self.config.platform_instance,
426
426
  )
427
427
  yield from self.profiler.get_workunits(dataset_info, dataset_urn)
428
428
 
@@ -434,10 +434,10 @@ class DremioSource(StatefulIngestionSourceBase):
434
434
  """
435
435
  upstream_urns = [
436
436
  make_dataset_urn_with_platform_instance(
437
- platform=self.get_platform(),
438
- name=upstream_table.lower(),
439
- platform_instance=self.config.platform_instance,
437
+ platform=make_data_platform_urn(self.get_platform()),
438
+ name=f"dremio.{upstream_table.lower()}",
440
439
  env=self.config.env,
440
+ platform_instance=self.config.platform_instance,
441
441
  )
442
442
  for upstream_table in parents
443
443
  ]
@@ -496,19 +496,19 @@ class DremioSource(StatefulIngestionSourceBase):
496
496
  if query.query and query.affected_dataset:
497
497
  upstream_urns = [
498
498
  make_dataset_urn_with_platform_instance(
499
- platform=self.get_platform(),
500
- name=ds.lower(),
501
- platform_instance=self.config.platform_instance,
499
+ platform=make_data_platform_urn(self.get_platform()),
500
+ name=f"dremio.{ds.lower()}",
502
501
  env=self.config.env,
502
+ platform_instance=self.config.platform_instance,
503
503
  )
504
504
  for ds in query.queried_datasets
505
505
  ]
506
506
 
507
507
  downstream_urn = make_dataset_urn_with_platform_instance(
508
- platform=self.get_platform(),
509
- name=query.affected_dataset.lower(),
510
- platform_instance=self.config.platform_instance,
508
+ platform=make_data_platform_urn(self.get_platform()),
509
+ name=f"dremio.{query.affected_dataset.lower()}",
511
510
  env=self.config.env,
511
+ platform_instance=self.config.platform_instance,
512
512
  )
513
513
 
514
514
  # Add query to SqlParsingAggregator
@@ -0,0 +1,15 @@
1
+ from pydantic import Field, SecretStr
2
+
3
+ from datahub.configuration import ConfigModel
4
+
5
+
6
+ class AzureAuthConfig(ConfigModel):
7
+ client_secret: SecretStr = Field(
8
+ description="Azure application client secret used for authentication. This is a confidential credential that should be kept secure."
9
+ )
10
+ client_id: str = Field(
11
+ description="Azure application (client) ID. This is the unique identifier for the registered Azure AD application.",
12
+ )
13
+ tenant_id: str = Field(
14
+ description="Azure tenant (directory) ID. This identifies the Azure AD tenant where the application is registered.",
15
+ )
@@ -413,6 +413,24 @@ class UnityCatalogSourceConfig(
413
413
  )
414
414
  return workspace_url
415
415
 
416
+ @model_validator(mode="before")
417
+ def either_token_or_azure_auth_provided(cls, values: dict) -> dict:
418
+ token = values.get("token")
419
+ azure_auth = values.get("azure_auth")
420
+
421
+ # Check if exactly one of the authentication methods is provided
422
+ if not token and not azure_auth:
423
+ raise ValueError(
424
+ "Either 'azure_auth' or 'token' (personal access token) must be provided in the configuration."
425
+ )
426
+
427
+ if token and azure_auth:
428
+ raise ValueError(
429
+ "Cannot specify both 'token' and 'azure_auth'. Please provide only one authentication method."
430
+ )
431
+
432
+ return values
433
+
416
434
  @field_validator("include_metastore", mode="after")
417
435
  @classmethod
418
436
  def include_metastore_warning(cls, v: bool) -> bool:
@@ -8,6 +8,7 @@ from pydantic import Field
8
8
 
9
9
  from datahub.configuration.common import ConfigModel
10
10
  from datahub.ingestion.source.sql.sqlalchemy_uri import make_sqlalchemy_uri
11
+ from datahub.ingestion.source.unity.azure_auth_config import AzureAuthConfig
11
12
 
12
13
  DATABRICKS = "databricks"
13
14
 
@@ -19,7 +20,12 @@ class UnityCatalogConnectionConfig(ConfigModel):
19
20
  """
20
21
 
21
22
  scheme: str = DATABRICKS
22
- token: str = pydantic.Field(description="Databricks personal access token")
23
+ token: Optional[str] = pydantic.Field(
24
+ default=None, description="Databricks personal access token"
25
+ )
26
+ azure_auth: Optional[AzureAuthConfig] = Field(
27
+ default=None, description="Azure configuration"
28
+ )
23
29
  workspace_url: str = pydantic.Field(
24
30
  description="Databricks workspace url. e.g. https://my-workspace.cloud.databricks.com"
25
31
  )
@@ -16,10 +16,10 @@ class UnityCatalogConnectionTest:
16
16
  self.report = UnityCatalogReport()
17
17
  self.proxy = UnityCatalogApiProxy(
18
18
  self.config.workspace_url,
19
- self.config.token,
20
19
  self.config.profiling.warehouse_id,
21
20
  report=self.report,
22
21
  databricks_api_page_size=self.config.databricks_api_page_size,
22
+ personal_access_token=self.config.token,
23
23
  )
24
24
 
25
25
  def get_connection_test(self) -> TestConnectionReport:
@@ -44,6 +44,7 @@ from typing_extensions import assert_never
44
44
  from datahub._version import nice_version_name
45
45
  from datahub.api.entities.external.unity_catalog_external_entites import UnityCatalogTag
46
46
  from datahub.emitter.mce_builder import parse_ts_millis
47
+ from datahub.ingestion.source.unity.azure_auth_config import AzureAuthConfig
47
48
  from datahub.ingestion.source.unity.config import (
48
49
  LineageDataSource,
49
50
  UsageDataSource,
@@ -169,20 +170,31 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
169
170
  def __init__(
170
171
  self,
171
172
  workspace_url: str,
172
- personal_access_token: str,
173
173
  warehouse_id: Optional[str],
174
174
  report: UnityCatalogReport,
175
175
  hive_metastore_proxy: Optional[HiveMetastoreProxy] = None,
176
176
  lineage_data_source: LineageDataSource = LineageDataSource.AUTO,
177
177
  usage_data_source: UsageDataSource = UsageDataSource.AUTO,
178
178
  databricks_api_page_size: int = 0,
179
+ personal_access_token: Optional[str] = None,
180
+ azure_auth: Optional[AzureAuthConfig] = None,
179
181
  ):
180
- self._workspace_client = WorkspaceClient(
181
- host=workspace_url,
182
- token=personal_access_token,
183
- product="datahub",
184
- product_version=nice_version_name(),
185
- )
182
+ if azure_auth:
183
+ self._workspace_client = WorkspaceClient(
184
+ host=workspace_url,
185
+ azure_tenant_id=azure_auth.tenant_id,
186
+ azure_client_id=azure_auth.client_id,
187
+ azure_client_secret=azure_auth.client_secret.get_secret_value(),
188
+ product="datahub",
189
+ product_version=nice_version_name(),
190
+ )
191
+ else:
192
+ self._workspace_client = WorkspaceClient(
193
+ host=workspace_url,
194
+ token=personal_access_token,
195
+ product="datahub",
196
+ product_version=nice_version_name(),
197
+ )
186
198
  self.warehouse_id = warehouse_id or ""
187
199
  self.report = report
188
200
  self.hive_metastore_proxy = hive_metastore_proxy
@@ -336,24 +348,18 @@ class UnityCatalogApiProxy(UnityCatalogProxyProfilingMixin):
336
348
  except (json.JSONDecodeError, TypeError) as e:
337
349
  logger.debug(f"Failed to parse outputs JSON: {e}")
338
350
 
339
- if "parameters" in signature_raw:
351
+ if "params" in signature_raw:
340
352
  try:
341
- signature_data["parameters"] = json.loads(
342
- signature_raw["parameters"]
353
+ signature_data["params"] = json.loads(
354
+ signature_raw["params"]
343
355
  )
344
356
  except (json.JSONDecodeError, TypeError) as e:
345
- logger.debug(f"Failed to parse parameters JSON: {e}")
357
+ logger.debug(f"Failed to parse params JSON: {e}")
346
358
 
347
359
  return ModelSignature(
348
- inputs=signature_data["inputs"]
349
- if "inputs" in signature_raw
350
- else None,
351
- outputs=signature_data["outputs"]
352
- if "outputs" in signature_raw
353
- else None,
354
- parameters=signature_data["parameters"]
355
- if "parameters" in signature_raw
356
- else None,
360
+ inputs=signature_data.get("inputs"),
361
+ outputs=signature_data.get("outputs"),
362
+ parameters=signature_data.get("params"),
357
363
  )
358
364
  else:
359
365
  logger.debug(
@@ -211,13 +211,14 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
211
211
 
212
212
  self.unity_catalog_api_proxy = UnityCatalogApiProxy(
213
213
  config.workspace_url,
214
- config.token,
215
214
  config.warehouse_id,
216
215
  report=self.report,
217
216
  hive_metastore_proxy=self.hive_metastore_proxy,
218
217
  lineage_data_source=config.lineage_data_source,
219
218
  usage_data_source=config.usage_data_source,
220
219
  databricks_api_page_size=config.databricks_api_page_size,
220
+ personal_access_token=config.token if config.token else None,
221
+ azure_auth=config.azure_auth if config.azure_auth else None,
221
222
  )
222
223
 
223
224
  self.external_url_base = urljoin(self.config.workspace_url, "/explore/data")
@@ -4824,7 +4824,7 @@
4824
4824
  {
4825
4825
  "Searchable": {
4826
4826
  "fieldName": "entities",
4827
- "fieldType": "URN"
4827
+ "fieldType": "KEYWORD"
4828
4828
  },
4829
4829
  "java": {
4830
4830
  "class": "com.linkedin.pegasus2avro.common.urn.Urn"
@@ -20303,7 +20303,12 @@
20303
20303
  "doc": "The fully qualified name of the property. e.g. io.acryl.datahub.myProperty"
20304
20304
  },
20305
20305
  {
20306
- "Searchable": {},
20306
+ "Searchable": {
20307
+ "enableAutocomplete": true,
20308
+ "fieldType": "WORD_GRAM",
20309
+ "searchLabel": "entityName",
20310
+ "searchTier": 1
20311
+ },
20307
20312
  "type": [
20308
20313
  "null",
20309
20314
  "string"
@@ -17,7 +17,7 @@
17
17
  {
18
18
  "Searchable": {
19
19
  "fieldName": "entities",
20
- "fieldType": "URN"
20
+ "fieldType": "KEYWORD"
21
21
  },
22
22
  "java": {
23
23
  "class": "com.linkedin.pegasus2avro.common.urn.Urn"
@@ -17,7 +17,12 @@
17
17
  "doc": "The fully qualified name of the property. e.g. io.acryl.datahub.myProperty"
18
18
  },
19
19
  {
20
- "Searchable": {},
20
+ "Searchable": {
21
+ "enableAutocomplete": true,
22
+ "fieldType": "WORD_GRAM",
23
+ "searchLabel": "entityName",
24
+ "searchTier": 1
25
+ },
21
26
  "type": [
22
27
  "null",
23
28
  "string"
datahub/sdk/__init__.py CHANGED
@@ -28,6 +28,7 @@ from datahub.sdk.main_client import DataHubClient
28
28
  from datahub.sdk.mlmodel import MLModel
29
29
  from datahub.sdk.mlmodelgroup import MLModelGroup
30
30
  from datahub.sdk.search_filters import Filter, FilterDsl
31
+ from datahub.sdk.tag import Tag
31
32
 
32
33
  # We want to print out the warning if people do `from datahub.sdk import X`.
33
34
  # But we don't want to print out warnings if they're doing a more direct
@@ -9,6 +9,7 @@ from datahub.sdk.dataset import Dataset
9
9
  from datahub.sdk.entity import Entity
10
10
  from datahub.sdk.mlmodel import MLModel
11
11
  from datahub.sdk.mlmodelgroup import MLModelGroup
12
+ from datahub.sdk.tag import Tag
12
13
 
13
14
  # Base entity classes that don't have circular dependencies
14
15
  # Those that do are imported in the EntityClient where needed
@@ -22,6 +23,7 @@ ENTITY_CLASSES_LIST: List[Type[Entity]] = [
22
23
  DataJob,
23
24
  Dashboard,
24
25
  Chart,
26
+ Tag,
25
27
  ]
26
28
 
27
29
  # Create the mapping of entity types to classes
datahub/sdk/tag.py ADDED
@@ -0,0 +1,112 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Optional, Type
4
+
5
+ from typing_extensions import Self
6
+
7
+ import datahub.metadata.schema_classes as models
8
+ from datahub.metadata.urns import TagUrn, Urn
9
+ from datahub.sdk._shared import (
10
+ HasOwnership,
11
+ OwnersInputType,
12
+ )
13
+ from datahub.sdk.entity import Entity, ExtraAspectsType
14
+
15
+
16
+ class Tag(
17
+ HasOwnership,
18
+ Entity,
19
+ ):
20
+ __slots__ = ()
21
+
22
+ @classmethod
23
+ def get_urn_type(cls) -> Type[TagUrn]:
24
+ return TagUrn
25
+
26
+ def __init__(
27
+ self,
28
+ *,
29
+ # Identity.
30
+ name: str,
31
+ # Tag properties.
32
+ display_name: Optional[str] = None,
33
+ description: Optional[str] = None,
34
+ color: Optional[str] = None,
35
+ # Standard aspects.
36
+ owners: Optional[OwnersInputType] = None,
37
+ extra_aspects: ExtraAspectsType = None,
38
+ ):
39
+ """Initialize a new Tag instance."""
40
+ urn = TagUrn(name=name)
41
+ super().__init__(urn)
42
+ self._set_extra_aspects(extra_aspects)
43
+
44
+ self._ensure_tag_props(
45
+ display_name=display_name or name,
46
+ description=description,
47
+ color=color,
48
+ )
49
+
50
+ if owners is not None:
51
+ self.set_owners(owners)
52
+
53
+ @classmethod
54
+ def _new_from_graph(cls, urn: Urn, current_aspects: models.AspectBag) -> Self:
55
+ assert isinstance(urn, TagUrn)
56
+ entity = cls(name=urn.name)
57
+ return entity._init_from_graph(current_aspects)
58
+
59
+ @property
60
+ def urn(self) -> TagUrn:
61
+ assert isinstance(self._urn, TagUrn)
62
+ return self._urn
63
+
64
+ def _ensure_tag_props(
65
+ self,
66
+ *,
67
+ display_name: Optional[str] = None,
68
+ description: Optional[str] = None,
69
+ color: Optional[str] = None,
70
+ ) -> models.TagPropertiesClass:
71
+ existing_props = self._get_aspect(models.TagPropertiesClass)
72
+ if existing_props is not None:
73
+ if display_name is not None:
74
+ existing_props.name = display_name
75
+ if description is not None:
76
+ existing_props.description = description
77
+ if color is not None:
78
+ existing_props.colorHex = color
79
+ return existing_props
80
+
81
+ return self._setdefault_aspect(
82
+ models.TagPropertiesClass(
83
+ name=display_name or self.urn.name,
84
+ description=description,
85
+ colorHex=color,
86
+ )
87
+ )
88
+
89
+ @property
90
+ def name(self) -> str:
91
+ return self.urn.name
92
+
93
+ @property
94
+ def display_name(self) -> str:
95
+ return self._ensure_tag_props().name
96
+
97
+ def set_display_name(self, display_name: str) -> None:
98
+ self._ensure_tag_props(display_name=display_name)
99
+
100
+ @property
101
+ def description(self) -> Optional[str]:
102
+ return self._ensure_tag_props().description
103
+
104
+ def set_description(self, description: str) -> None:
105
+ self._ensure_tag_props(description=description)
106
+
107
+ @property
108
+ def color(self) -> Optional[str]:
109
+ return self._ensure_tag_props().colorHex
110
+
111
+ def set_color(self, color: str) -> None:
112
+ self._ensure_tag_props(color=color)