acryl-datahub 1.0.0.3rc8__py3-none-any.whl → 1.0.0.3rc9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0.3rc8.dist-info → acryl_datahub-1.0.0.3rc9.dist-info}/METADATA +2462 -2462
- {acryl_datahub-1.0.0.3rc8.dist-info → acryl_datahub-1.0.0.3rc9.dist-info}/RECORD +13 -13
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +9 -11
- datahub/configuration/common.py +5 -0
- datahub/ingestion/api/source_helpers.py +1 -0
- datahub/ingestion/source/slack/slack.py +4 -52
- datahub/ingestion/source/snowflake/snowflake_connection.py +19 -1
- datahub/specific/dataset.py +12 -0
- {acryl_datahub-1.0.0.3rc8.dist-info → acryl_datahub-1.0.0.3rc9.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0.3rc8.dist-info → acryl_datahub-1.0.0.3rc9.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.0.0.3rc8.dist-info → acryl_datahub-1.0.0.3rc9.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.0.0.3rc8.dist-info → acryl_datahub-1.0.0.3rc9.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
acryl_datahub-1.0.0.
|
|
1
|
+
acryl_datahub-1.0.0.3rc9.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
2
2
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
3
3
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
4
|
-
datahub/_version.py,sha256=
|
|
4
|
+
datahub/_version.py,sha256=6XeiyYGjXD3cLrKhOmtOz90fhxaal2Ir3lq-m_cgOes,323
|
|
5
5
|
datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
|
|
6
6
|
datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
|
|
7
7
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -48,7 +48,7 @@ datahub/api/entities/dataprocess/dataprocess_instance.py,sha256=IhY-rcXs-r8EatwW
|
|
|
48
48
|
datahub/api/entities/dataproduct/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
49
49
|
datahub/api/entities/dataproduct/dataproduct.py,sha256=148TmItxDDyGNzfZdL8aDreSEtyAw79IN8N8oSmNOPE,21461
|
|
50
50
|
datahub/api/entities/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
|
-
datahub/api/entities/dataset/dataset.py,sha256=
|
|
51
|
+
datahub/api/entities/dataset/dataset.py,sha256=se2tv6jsvS5BmH5b53mKs8lEPF4LNzVRrd9PDovPIQk,49627
|
|
52
52
|
datahub/api/entities/forms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
53
53
|
datahub/api/entities/forms/forms.py,sha256=17GLVVrunUj6hWS7CADhNPrT4gV6il905Ny_Y_5S5Qc,15889
|
|
54
54
|
datahub/api/entities/forms/forms_graphql_constants.py,sha256=DKpnKlMKTjmnyrCTvp63V4LX4THGTAMq3ep8THrSGP4,537
|
|
@@ -94,7 +94,7 @@ datahub/cli/specific/structuredproperties_cli.py,sha256=Rb06XJKxqda5RSUo188B90Wn
|
|
|
94
94
|
datahub/cli/specific/user_cli.py,sha256=jGAokb1NRu8obs6P2g4OL2NQdFgpUBa9De55TBBtun0,1897
|
|
95
95
|
datahub/configuration/__init__.py,sha256=5TN3a7CWNsLRHpdj-sv2bxKWF2IslvJwE6EpNMFrIS4,123
|
|
96
96
|
datahub/configuration/_config_enum.py,sha256=ul2hr5gMmdLvBINicFkMNMi1ApmnmZSwNdUYYted5nk,1447
|
|
97
|
-
datahub/configuration/common.py,sha256=
|
|
97
|
+
datahub/configuration/common.py,sha256=0OXk_yhyVJroP47Nant5-5fRWOahkVB8YU4G70TR9BI,10692
|
|
98
98
|
datahub/configuration/config_loader.py,sha256=hRzPFxkz-w9IqkpSa5vwCzSra1p49DyfeJNeyqGa8-4,6827
|
|
99
99
|
datahub/configuration/connection_resolver.py,sha256=n4-6MwMiOEDgTouxO0SMjTILKVhJPo6-naE6FuR5qMs,1516
|
|
100
100
|
datahub/configuration/datetimes.py,sha256=nayNc0mmlVKH6oVv9ud6C1dDUiZPGabW-YZxvrkosPg,2870
|
|
@@ -146,7 +146,7 @@ datahub/ingestion/api/report.py,sha256=eM_TWWz6iJNd-c_S2_4eg2qKLGYP8vSROb_TMiCwB
|
|
|
146
146
|
datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
|
|
147
147
|
datahub/ingestion/api/sink.py,sha256=nfal7nsYY1AT2WQRjqO48uAHitpjax7TsRVzYXnqbeM,4918
|
|
148
148
|
datahub/ingestion/api/source.py,sha256=2h7Tx2As0gu5-6d7PiRuJ8myr_y3MRx2YYgH735Jj18,19494
|
|
149
|
-
datahub/ingestion/api/source_helpers.py,sha256=
|
|
149
|
+
datahub/ingestion/api/source_helpers.py,sha256=XT9y5HgfVeF52jrX39vlLn1SdXpLVyT2Su8oGNsddYo,21148
|
|
150
150
|
datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188mmC8J8,582
|
|
151
151
|
datahub/ingestion/api/workunit.py,sha256=e8n8RfSjHZZm2R4ShNH0UuMtUkMjyqqM2j2t7oL74lo,6327
|
|
152
152
|
datahub/ingestion/api/auto_work_units/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -445,14 +445,14 @@ datahub/ingestion/source/sigma/data_classes.py,sha256=YZkkzwftV34mq5c_4jlC2PCSiR
|
|
|
445
445
|
datahub/ingestion/source/sigma/sigma.py,sha256=ZtPj8eu6hcJxyFcWizob4kRaxrpcqsWzh__lmuVZdt8,25212
|
|
446
446
|
datahub/ingestion/source/sigma/sigma_api.py,sha256=7PK5AQa838hYeaQ5L0dioi4n4bLrpN-r7COKTTNUYw8,19837
|
|
447
447
|
datahub/ingestion/source/slack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
448
|
-
datahub/ingestion/source/slack/slack.py,sha256=
|
|
448
|
+
datahub/ingestion/source/slack/slack.py,sha256=B_HyRlhY2VWjMHDspQZqqejvVhnTREFx5B2GNa0jqzE,25903
|
|
449
449
|
datahub/ingestion/source/snowflake/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
450
450
|
datahub/ingestion/source/snowflake/constants.py,sha256=XCW3vw4JfLn_s8-oXBX6WFNMPOkX5qAQLFX2KqagojQ,2678
|
|
451
451
|
datahub/ingestion/source/snowflake/oauth_config.py,sha256=ol9D3RmruGStJAeL8PYSQguSqcD2HfkjPkMF2AB_eZs,1277
|
|
452
452
|
datahub/ingestion/source/snowflake/oauth_generator.py,sha256=fu2VnREGuJXeTqIV2jx4TwieVnznf83HQkrE0h2DGGM,3423
|
|
453
453
|
datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81xxdeizJn9nJCZ_nMIXgk9N6pEk5o,4803
|
|
454
454
|
datahub/ingestion/source/snowflake/snowflake_config.py,sha256=SD2agFE64WgEDbQHPXQjAIP4gsHT1G9H8X_r-RvKGas,20804
|
|
455
|
-
datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=
|
|
455
|
+
datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=50fHPCzOny-3Mh_DxJU83mPTww5m5T48pzqb8m6QYuo,18491
|
|
456
456
|
datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
|
|
457
457
|
datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=FBmiONx4EGHWV8RNJT6zHZyntKinPFFyd2oKbTUIbhE,21319
|
|
458
458
|
datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
|
|
@@ -924,7 +924,7 @@ datahub/specific/chart.py,sha256=NPdzDHcZkPodthOn9c8QF_aDEo2y4lCJ4t1sI556uZ0,668
|
|
|
924
924
|
datahub/specific/dashboard.py,sha256=3AsXZ1Cp03uaTHsOmJqEiXzJjZUBgDbX-zmgwMw908o,11514
|
|
925
925
|
datahub/specific/datajob.py,sha256=yZXic3CuCGxg-ewnaHphoH9Jjpe-P09XbN7T-rrvkyE,13711
|
|
926
926
|
datahub/specific/dataproduct.py,sha256=SrBNYACDVatuXA64GCHA0Igaes24ajJqTcXmDTT1FPA,2877
|
|
927
|
-
datahub/specific/dataset.py,sha256=
|
|
927
|
+
datahub/specific/dataset.py,sha256=E4XTS4T1c5mjdDn0sTAZsXJ4eK-REV3wwhSxTQkjWJs,10067
|
|
928
928
|
datahub/specific/form.py,sha256=hbxmmBWHma0d4NCZEGR6Nr6R-5A5gYgl1mmkGgnM97o,3834
|
|
929
929
|
datahub/specific/structured_property.py,sha256=NZ2yppDgtqrH04Wn3_m5IupyLeWoBCMygyr3nEi1A3o,4038
|
|
930
930
|
datahub/specific/aspect_helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -1050,8 +1050,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1050
1050
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1051
1051
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1052
1052
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1053
|
-
acryl_datahub-1.0.0.
|
|
1054
|
-
acryl_datahub-1.0.0.
|
|
1055
|
-
acryl_datahub-1.0.0.
|
|
1056
|
-
acryl_datahub-1.0.0.
|
|
1057
|
-
acryl_datahub-1.0.0.
|
|
1053
|
+
acryl_datahub-1.0.0.3rc9.dist-info/METADATA,sha256=c2nBayXTv9LuBzQY7HuANB9MbIfnnA0H6z_DUTq4GKE,176989
|
|
1054
|
+
acryl_datahub-1.0.0.3rc9.dist-info/WHEEL,sha256=pxyMxgL8-pra_rKaQ4drOZAegBVuX-G_4nRHjjgWbmo,91
|
|
1055
|
+
acryl_datahub-1.0.0.3rc9.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
|
|
1056
|
+
acryl_datahub-1.0.0.3rc9.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1057
|
+
acryl_datahub-1.0.0.3rc9.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
|
@@ -509,16 +509,14 @@ class Dataset(StrictModel):
|
|
|
509
509
|
def generate_mcp(
|
|
510
510
|
self,
|
|
511
511
|
) -> Iterable[Union[MetadataChangeProposalClass, MetadataChangeProposalWrapper]]:
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
)
|
|
521
|
-
yield mcp
|
|
512
|
+
patch_builder = self.patch_builder()
|
|
513
|
+
|
|
514
|
+
patch_builder.set_custom_properties(self.properties or {})
|
|
515
|
+
patch_builder.set_description(self.description)
|
|
516
|
+
patch_builder.set_display_name(self.name)
|
|
517
|
+
patch_builder.set_external_url(self.external_url)
|
|
518
|
+
|
|
519
|
+
yield from patch_builder.build()
|
|
522
520
|
|
|
523
521
|
if self.schema_metadata:
|
|
524
522
|
schema_fields = set()
|
|
@@ -981,7 +979,7 @@ class Dataset(StrictModel):
|
|
|
981
979
|
|
|
982
980
|
def model_dump(self, **kwargs):
|
|
983
981
|
"""Custom model_dump method for Pydantic v2 to handle YAML serialization properly."""
|
|
984
|
-
exclude = kwargs.pop("exclude", set()
|
|
982
|
+
exclude = kwargs.pop("exclude", None) or set()
|
|
985
983
|
|
|
986
984
|
# If id and name are identical, exclude name from the output
|
|
987
985
|
if self.id == self.name and self.id is not None:
|
datahub/configuration/common.py
CHANGED
|
@@ -92,6 +92,7 @@ def create_dataset_props_patch_builder(
|
|
|
92
92
|
patch_builder.set_last_modified(dataset_properties.lastModified)
|
|
93
93
|
patch_builder.set_qualified_name(dataset_properties.qualifiedName)
|
|
94
94
|
patch_builder.add_custom_properties(dataset_properties.customProperties)
|
|
95
|
+
patch_builder.set_external_url(dataset_properties.externalUrl)
|
|
95
96
|
|
|
96
97
|
return patch_builder
|
|
97
98
|
|
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
-
import textwrap
|
|
4
3
|
from dataclasses import dataclass
|
|
5
4
|
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
|
6
5
|
|
|
@@ -613,6 +612,10 @@ class SlackSource(StatefulIngestionSourceBase):
|
|
|
613
612
|
),
|
|
614
613
|
)
|
|
615
614
|
|
|
615
|
+
@retry(
|
|
616
|
+
wait=wait_exponential(multiplier=2, min=4, max=60),
|
|
617
|
+
before_sleep=before_sleep_log(logger, logging.ERROR, True),
|
|
618
|
+
)
|
|
616
619
|
def get_user_to_be_updated(
|
|
617
620
|
self,
|
|
618
621
|
) -> Iterable[Tuple[CorpUser, Optional[CorpUserEditableInfoClass]]]:
|
|
@@ -634,56 +637,5 @@ class SlackSource(StatefulIngestionSourceBase):
|
|
|
634
637
|
if user_obj.email is not None:
|
|
635
638
|
yield (user_obj, editable_properties)
|
|
636
639
|
|
|
637
|
-
@retry(
|
|
638
|
-
wait=wait_exponential(multiplier=2, min=4, max=60),
|
|
639
|
-
before_sleep=before_sleep_log(logger, logging.ERROR, True),
|
|
640
|
-
)
|
|
641
|
-
def get_user_to_be_updated_oss(self) -> Iterable[CorpUser]:
|
|
642
|
-
graphql_query = textwrap.dedent(
|
|
643
|
-
"""
|
|
644
|
-
query listUsers($input: ListUsersInput!) {
|
|
645
|
-
listUsers(input: $input) {
|
|
646
|
-
total
|
|
647
|
-
users {
|
|
648
|
-
urn
|
|
649
|
-
editableProperties {
|
|
650
|
-
email
|
|
651
|
-
slack
|
|
652
|
-
}
|
|
653
|
-
}
|
|
654
|
-
}
|
|
655
|
-
}
|
|
656
|
-
"""
|
|
657
|
-
)
|
|
658
|
-
start = 0
|
|
659
|
-
count = 10
|
|
660
|
-
total = count
|
|
661
|
-
|
|
662
|
-
assert self.ctx.graph is not None
|
|
663
|
-
|
|
664
|
-
while start < total:
|
|
665
|
-
variables = {"input": {"start": start, "count": count}}
|
|
666
|
-
response = self.ctx.graph.execute_graphql(
|
|
667
|
-
query=graphql_query, variables=variables
|
|
668
|
-
)
|
|
669
|
-
list_users = response.get("listUsers", {})
|
|
670
|
-
total = list_users.get("total", 0)
|
|
671
|
-
users = list_users.get("users", [])
|
|
672
|
-
for user in users:
|
|
673
|
-
user_obj = CorpUser()
|
|
674
|
-
editable_properties = user.get("editableProperties", {})
|
|
675
|
-
user_obj.urn = user.get("urn")
|
|
676
|
-
if user_obj.urn is None:
|
|
677
|
-
continue
|
|
678
|
-
if editable_properties is not None:
|
|
679
|
-
user_obj.email = editable_properties.get("email")
|
|
680
|
-
if user_obj.email is None:
|
|
681
|
-
urn_id = Urn.from_string(user_obj.urn).get_entity_id_as_string()
|
|
682
|
-
if "@" in urn_id:
|
|
683
|
-
user_obj.email = urn_id
|
|
684
|
-
if user_obj.email is not None:
|
|
685
|
-
yield user_obj
|
|
686
|
-
start += count
|
|
687
|
-
|
|
688
640
|
def get_report(self) -> SourceReport:
|
|
689
641
|
return self.report
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
+
import threading
|
|
2
3
|
from typing import Any, Dict, Optional
|
|
3
4
|
|
|
4
5
|
import pydantic
|
|
@@ -402,13 +403,30 @@ class SnowflakeConnection(Closeable):
|
|
|
402
403
|
def __init__(self, connection: NativeSnowflakeConnection):
|
|
403
404
|
self._connection = connection
|
|
404
405
|
|
|
406
|
+
self._query_num_lock = threading.Lock()
|
|
407
|
+
self._query_num = 1
|
|
408
|
+
|
|
405
409
|
def native_connection(self) -> NativeSnowflakeConnection:
|
|
406
410
|
return self._connection
|
|
407
411
|
|
|
412
|
+
def get_query_no(self) -> int:
|
|
413
|
+
with self._query_num_lock:
|
|
414
|
+
no = self._query_num
|
|
415
|
+
self._query_num += 1
|
|
416
|
+
return no
|
|
417
|
+
|
|
408
418
|
def query(self, query: str) -> Any:
|
|
409
419
|
try:
|
|
410
|
-
|
|
420
|
+
# We often run multiple queries in parallel across multiple threads,
|
|
421
|
+
# so we need to number them to help with log readability.
|
|
422
|
+
query_num = self.get_query_no()
|
|
423
|
+
logger.info(f"Query #{query_num}: {query}", stacklevel=2)
|
|
411
424
|
resp = self._connection.cursor(DictCursor).execute(query)
|
|
425
|
+
if resp is not None and resp.rowcount is not None:
|
|
426
|
+
logger.info(
|
|
427
|
+
f"Query #{query_num} got {resp.rowcount} row(s) back from Snowflake",
|
|
428
|
+
stacklevel=2,
|
|
429
|
+
)
|
|
412
430
|
return resp
|
|
413
431
|
|
|
414
432
|
except Exception as e:
|
datahub/specific/dataset.py
CHANGED
|
@@ -292,3 +292,15 @@ class DatasetPatchBuilder(
|
|
|
292
292
|
value=timestamp,
|
|
293
293
|
)
|
|
294
294
|
return self
|
|
295
|
+
|
|
296
|
+
def set_external_url(
|
|
297
|
+
self, external_url: Optional[str] = None
|
|
298
|
+
) -> "DatasetPatchBuilder":
|
|
299
|
+
if external_url is not None:
|
|
300
|
+
self._add_patch(
|
|
301
|
+
DatasetProperties.ASPECT_NAME,
|
|
302
|
+
"add",
|
|
303
|
+
path=("externalUrl",),
|
|
304
|
+
value=external_url,
|
|
305
|
+
)
|
|
306
|
+
return self
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|