acryl-datahub 1.0.0.3rc8__py3-none-any.whl → 1.0.0.3rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,7 +1,7 @@
1
- acryl_datahub-1.0.0.3rc8.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.0.0.3rc9.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=Zb8TjJYjx7zz4w3IY_sYhrV3x0o_kqbFVKFrYP8Ld48,323
4
+ datahub/_version.py,sha256=6XeiyYGjXD3cLrKhOmtOz90fhxaal2Ir3lq-m_cgOes,323
5
5
  datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
6
6
  datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -48,7 +48,7 @@ datahub/api/entities/dataprocess/dataprocess_instance.py,sha256=IhY-rcXs-r8EatwW
48
48
  datahub/api/entities/dataproduct/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
49
  datahub/api/entities/dataproduct/dataproduct.py,sha256=148TmItxDDyGNzfZdL8aDreSEtyAw79IN8N8oSmNOPE,21461
50
50
  datahub/api/entities/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
- datahub/api/entities/dataset/dataset.py,sha256=UX9UW5z9realL7lPxAhaKKigpVAqtxAfhqS6vY2eDnU,49638
51
+ datahub/api/entities/dataset/dataset.py,sha256=se2tv6jsvS5BmH5b53mKs8lEPF4LNzVRrd9PDovPIQk,49627
52
52
  datahub/api/entities/forms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
53
  datahub/api/entities/forms/forms.py,sha256=17GLVVrunUj6hWS7CADhNPrT4gV6il905Ny_Y_5S5Qc,15889
54
54
  datahub/api/entities/forms/forms_graphql_constants.py,sha256=DKpnKlMKTjmnyrCTvp63V4LX4THGTAMq3ep8THrSGP4,537
@@ -94,7 +94,7 @@ datahub/cli/specific/structuredproperties_cli.py,sha256=Rb06XJKxqda5RSUo188B90Wn
94
94
  datahub/cli/specific/user_cli.py,sha256=jGAokb1NRu8obs6P2g4OL2NQdFgpUBa9De55TBBtun0,1897
95
95
  datahub/configuration/__init__.py,sha256=5TN3a7CWNsLRHpdj-sv2bxKWF2IslvJwE6EpNMFrIS4,123
96
96
  datahub/configuration/_config_enum.py,sha256=ul2hr5gMmdLvBINicFkMNMi1ApmnmZSwNdUYYted5nk,1447
97
- datahub/configuration/common.py,sha256=2AUcZD8c4W8gdopt07UcQYPLl0DR-O7Spb-LUqguJXc,10619
97
+ datahub/configuration/common.py,sha256=0OXk_yhyVJroP47Nant5-5fRWOahkVB8YU4G70TR9BI,10692
98
98
  datahub/configuration/config_loader.py,sha256=hRzPFxkz-w9IqkpSa5vwCzSra1p49DyfeJNeyqGa8-4,6827
99
99
  datahub/configuration/connection_resolver.py,sha256=n4-6MwMiOEDgTouxO0SMjTILKVhJPo6-naE6FuR5qMs,1516
100
100
  datahub/configuration/datetimes.py,sha256=nayNc0mmlVKH6oVv9ud6C1dDUiZPGabW-YZxvrkosPg,2870
@@ -146,7 +146,7 @@ datahub/ingestion/api/report.py,sha256=eM_TWWz6iJNd-c_S2_4eg2qKLGYP8vSROb_TMiCwB
146
146
  datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
147
147
  datahub/ingestion/api/sink.py,sha256=nfal7nsYY1AT2WQRjqO48uAHitpjax7TsRVzYXnqbeM,4918
148
148
  datahub/ingestion/api/source.py,sha256=2h7Tx2As0gu5-6d7PiRuJ8myr_y3MRx2YYgH735Jj18,19494
149
- datahub/ingestion/api/source_helpers.py,sha256=tzFl2sP-sFOZWX0O0eEQK-0tJcJ5vMs2bccpKRKBIFY,21081
149
+ datahub/ingestion/api/source_helpers.py,sha256=XT9y5HgfVeF52jrX39vlLn1SdXpLVyT2Su8oGNsddYo,21148
150
150
  datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188mmC8J8,582
151
151
  datahub/ingestion/api/workunit.py,sha256=e8n8RfSjHZZm2R4ShNH0UuMtUkMjyqqM2j2t7oL74lo,6327
152
152
  datahub/ingestion/api/auto_work_units/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -445,14 +445,14 @@ datahub/ingestion/source/sigma/data_classes.py,sha256=YZkkzwftV34mq5c_4jlC2PCSiR
445
445
  datahub/ingestion/source/sigma/sigma.py,sha256=ZtPj8eu6hcJxyFcWizob4kRaxrpcqsWzh__lmuVZdt8,25212
446
446
  datahub/ingestion/source/sigma/sigma_api.py,sha256=7PK5AQa838hYeaQ5L0dioi4n4bLrpN-r7COKTTNUYw8,19837
447
447
  datahub/ingestion/source/slack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
448
- datahub/ingestion/source/slack/slack.py,sha256=3N7Yp-u9DvBmo536Z6-pQTrJgSJ3i742GePSgjlBOUU,27616
448
+ datahub/ingestion/source/slack/slack.py,sha256=B_HyRlhY2VWjMHDspQZqqejvVhnTREFx5B2GNa0jqzE,25903
449
449
  datahub/ingestion/source/snowflake/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
450
450
  datahub/ingestion/source/snowflake/constants.py,sha256=XCW3vw4JfLn_s8-oXBX6WFNMPOkX5qAQLFX2KqagojQ,2678
451
451
  datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQguSqcD2HfkjPkMF2AB_eZs,1277
452
452
  datahub/ingestion/source/snowflake/oauth_generator.py,sha256=fu2VnREGuJXeTqIV2jx4TwieVnznf83HQkrE0h2DGGM,3423
453
453
  datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81xxdeizJn9nJCZ_nMIXgk9N6pEk5o,4803
454
454
  datahub/ingestion/source/snowflake/snowflake_config.py,sha256=SD2agFE64WgEDbQHPXQjAIP4gsHT1G9H8X_r-RvKGas,20804
455
- datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=pEw2O9xoTSIWDiROlkF8k4oj5zBjkqTnynLvut08yhc,17796
455
+ datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=50fHPCzOny-3Mh_DxJU83mPTww5m5T48pzqb8m6QYuo,18491
456
456
  datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
457
457
  datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=FBmiONx4EGHWV8RNJT6zHZyntKinPFFyd2oKbTUIbhE,21319
458
458
  datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
@@ -924,7 +924,7 @@ datahub/specific/chart.py,sha256=NPdzDHcZkPodthOn9c8QF_aDEo2y4lCJ4t1sI556uZ0,668
924
924
  datahub/specific/dashboard.py,sha256=3AsXZ1Cp03uaTHsOmJqEiXzJjZUBgDbX-zmgwMw908o,11514
925
925
  datahub/specific/datajob.py,sha256=yZXic3CuCGxg-ewnaHphoH9Jjpe-P09XbN7T-rrvkyE,13711
926
926
  datahub/specific/dataproduct.py,sha256=SrBNYACDVatuXA64GCHA0Igaes24ajJqTcXmDTT1FPA,2877
927
- datahub/specific/dataset.py,sha256=EhSjarFfvxF-JbVuNBNIcV1pEebqAdcYHDZIhnG_8sk,9714
927
+ datahub/specific/dataset.py,sha256=E4XTS4T1c5mjdDn0sTAZsXJ4eK-REV3wwhSxTQkjWJs,10067
928
928
  datahub/specific/form.py,sha256=hbxmmBWHma0d4NCZEGR6Nr6R-5A5gYgl1mmkGgnM97o,3834
929
929
  datahub/specific/structured_property.py,sha256=NZ2yppDgtqrH04Wn3_m5IupyLeWoBCMygyr3nEi1A3o,4038
930
930
  datahub/specific/aspect_helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -1050,8 +1050,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1050
1050
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1051
1051
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1052
1052
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1053
- acryl_datahub-1.0.0.3rc8.dist-info/METADATA,sha256=d-sEix3GRzPU8h4yA867OWhRXlXMRTsHYGGBtfBuk-c,176989
1054
- acryl_datahub-1.0.0.3rc8.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
1055
- acryl_datahub-1.0.0.3rc8.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
1056
- acryl_datahub-1.0.0.3rc8.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1057
- acryl_datahub-1.0.0.3rc8.dist-info/RECORD,,
1053
+ acryl_datahub-1.0.0.3rc9.dist-info/METADATA,sha256=c2nBayXTv9LuBzQY7HuANB9MbIfnnA0H6z_DUTq4GKE,176989
1054
+ acryl_datahub-1.0.0.3rc9.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
1055
+ acryl_datahub-1.0.0.3rc9.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
1056
+ acryl_datahub-1.0.0.3rc9.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1057
+ acryl_datahub-1.0.0.3rc9.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.0.0.3rc8"
3
+ __version__ = "1.0.0.3rc9"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -509,16 +509,14 @@ class Dataset(StrictModel):
509
509
  def generate_mcp(
510
510
  self,
511
511
  ) -> Iterable[Union[MetadataChangeProposalClass, MetadataChangeProposalWrapper]]:
512
- mcp = MetadataChangeProposalWrapper(
513
- entityUrn=self.urn,
514
- aspect=DatasetPropertiesClass(
515
- description=self.description,
516
- name=self.name,
517
- customProperties=self.properties,
518
- externalUrl=self.external_url,
519
- ),
520
- )
521
- yield mcp
512
+ patch_builder = self.patch_builder()
513
+
514
+ patch_builder.set_custom_properties(self.properties or {})
515
+ patch_builder.set_description(self.description)
516
+ patch_builder.set_display_name(self.name)
517
+ patch_builder.set_external_url(self.external_url)
518
+
519
+ yield from patch_builder.build()
522
520
 
523
521
  if self.schema_metadata:
524
522
  schema_fields = set()
@@ -981,7 +979,7 @@ class Dataset(StrictModel):
981
979
 
982
980
  def model_dump(self, **kwargs):
983
981
  """Custom model_dump method for Pydantic v2 to handle YAML serialization properly."""
984
- exclude = kwargs.pop("exclude", set())
982
+ exclude = kwargs.pop("exclude", None) or set()
985
983
 
986
984
  # If id and name are identical, exclude name from the output
987
985
  if self.id == self.name and self.id is not None:
@@ -33,10 +33,15 @@ REDACT_KEYS = {
33
33
  }
34
34
  REDACT_SUFFIXES = {
35
35
  "_password",
36
+ "-password",
36
37
  "_secret",
38
+ "-secret",
37
39
  "_token",
40
+ "-token",
38
41
  "_key",
42
+ "-key",
39
43
  "_key_id",
44
+ "-key-id",
40
45
  }
41
46
 
42
47
 
@@ -92,6 +92,7 @@ def create_dataset_props_patch_builder(
92
92
  patch_builder.set_last_modified(dataset_properties.lastModified)
93
93
  patch_builder.set_qualified_name(dataset_properties.qualifiedName)
94
94
  patch_builder.add_custom_properties(dataset_properties.customProperties)
95
+ patch_builder.set_external_url(dataset_properties.externalUrl)
95
96
 
96
97
  return patch_builder
97
98
 
@@ -1,6 +1,5 @@
1
1
  import json
2
2
  import logging
3
- import textwrap
4
3
  from dataclasses import dataclass
5
4
  from typing import Any, Dict, Iterable, List, Optional, Tuple
6
5
 
@@ -613,6 +612,10 @@ class SlackSource(StatefulIngestionSourceBase):
613
612
  ),
614
613
  )
615
614
 
615
+ @retry(
616
+ wait=wait_exponential(multiplier=2, min=4, max=60),
617
+ before_sleep=before_sleep_log(logger, logging.ERROR, True),
618
+ )
616
619
  def get_user_to_be_updated(
617
620
  self,
618
621
  ) -> Iterable[Tuple[CorpUser, Optional[CorpUserEditableInfoClass]]]:
@@ -634,56 +637,5 @@ class SlackSource(StatefulIngestionSourceBase):
634
637
  if user_obj.email is not None:
635
638
  yield (user_obj, editable_properties)
636
639
 
637
- @retry(
638
- wait=wait_exponential(multiplier=2, min=4, max=60),
639
- before_sleep=before_sleep_log(logger, logging.ERROR, True),
640
- )
641
- def get_user_to_be_updated_oss(self) -> Iterable[CorpUser]:
642
- graphql_query = textwrap.dedent(
643
- """
644
- query listUsers($input: ListUsersInput!) {
645
- listUsers(input: $input) {
646
- total
647
- users {
648
- urn
649
- editableProperties {
650
- email
651
- slack
652
- }
653
- }
654
- }
655
- }
656
- """
657
- )
658
- start = 0
659
- count = 10
660
- total = count
661
-
662
- assert self.ctx.graph is not None
663
-
664
- while start < total:
665
- variables = {"input": {"start": start, "count": count}}
666
- response = self.ctx.graph.execute_graphql(
667
- query=graphql_query, variables=variables
668
- )
669
- list_users = response.get("listUsers", {})
670
- total = list_users.get("total", 0)
671
- users = list_users.get("users", [])
672
- for user in users:
673
- user_obj = CorpUser()
674
- editable_properties = user.get("editableProperties", {})
675
- user_obj.urn = user.get("urn")
676
- if user_obj.urn is None:
677
- continue
678
- if editable_properties is not None:
679
- user_obj.email = editable_properties.get("email")
680
- if user_obj.email is None:
681
- urn_id = Urn.from_string(user_obj.urn).get_entity_id_as_string()
682
- if "@" in urn_id:
683
- user_obj.email = urn_id
684
- if user_obj.email is not None:
685
- yield user_obj
686
- start += count
687
-
688
640
  def get_report(self) -> SourceReport:
689
641
  return self.report
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ import threading
2
3
  from typing import Any, Dict, Optional
3
4
 
4
5
  import pydantic
@@ -402,13 +403,30 @@ class SnowflakeConnection(Closeable):
402
403
  def __init__(self, connection: NativeSnowflakeConnection):
403
404
  self._connection = connection
404
405
 
406
+ self._query_num_lock = threading.Lock()
407
+ self._query_num = 1
408
+
405
409
  def native_connection(self) -> NativeSnowflakeConnection:
406
410
  return self._connection
407
411
 
412
+ def get_query_no(self) -> int:
413
+ with self._query_num_lock:
414
+ no = self._query_num
415
+ self._query_num += 1
416
+ return no
417
+
408
418
  def query(self, query: str) -> Any:
409
419
  try:
410
- logger.info(f"Query: {query}", stacklevel=2)
420
+ # We often run multiple queries in parallel across multiple threads,
421
+ # so we need to number them to help with log readability.
422
+ query_num = self.get_query_no()
423
+ logger.info(f"Query #{query_num}: {query}", stacklevel=2)
411
424
  resp = self._connection.cursor(DictCursor).execute(query)
425
+ if resp is not None and resp.rowcount is not None:
426
+ logger.info(
427
+ f"Query #{query_num} got {resp.rowcount} row(s) back from Snowflake",
428
+ stacklevel=2,
429
+ )
412
430
  return resp
413
431
 
414
432
  except Exception as e:
@@ -292,3 +292,15 @@ class DatasetPatchBuilder(
292
292
  value=timestamp,
293
293
  )
294
294
  return self
295
+
296
+ def set_external_url(
297
+ self, external_url: Optional[str] = None
298
+ ) -> "DatasetPatchBuilder":
299
+ if external_url is not None:
300
+ self._add_patch(
301
+ DatasetProperties.ASPECT_NAME,
302
+ "add",
303
+ path=("externalUrl",),
304
+ value=external_url,
305
+ )
306
+ return self