acryl-datahub 1.1.1rc2__py3-none-any.whl → 1.1.1rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.1.1rc2.dist-info → acryl_datahub-1.1.1rc3.dist-info}/METADATA +2530 -2528
- {acryl_datahub-1.1.1rc2.dist-info → acryl_datahub-1.1.1rc3.dist-info}/RECORD +16 -16
- datahub/_version.py +1 -1
- datahub/api/entities/dataset/dataset.py +8 -8
- datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
- datahub/emitter/mce_builder.py +3 -1
- datahub/ingestion/source/dbt/dbt_common.py +30 -11
- datahub/ingestion/source/hex/query_fetcher.py +9 -3
- datahub/ingestion/source/sql/sql_types.py +5 -2
- datahub/metadata/_internal_schema_classes.py +1 -1
- datahub/metadata/schema.avsc +2 -1
- datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
- {acryl_datahub-1.1.1rc2.dist-info → acryl_datahub-1.1.1rc3.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.1.1rc2.dist-info → acryl_datahub-1.1.1rc3.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.1.1rc2.dist-info → acryl_datahub-1.1.1rc3.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.1.1rc2.dist-info → acryl_datahub-1.1.1rc3.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
acryl_datahub-1.1.
|
|
1
|
+
acryl_datahub-1.1.1rc3.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
2
2
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
3
3
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
4
|
-
datahub/_version.py,sha256=
|
|
4
|
+
datahub/_version.py,sha256=YKv9NztkHrTZ4_8dkC02c7V9Al8mWiRqMfERfcuZjRA,321
|
|
5
5
|
datahub/entrypoints.py,sha256=H-YFTvxTJOgpWsFBVlxyb1opjkq-hjTzNmjy5Fq3RHg,8992
|
|
6
6
|
datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
|
|
7
7
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -48,14 +48,14 @@ datahub/api/entities/dataprocess/dataprocess_instance.py,sha256=IhY-rcXs-r8EatwW
|
|
|
48
48
|
datahub/api/entities/dataproduct/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
49
49
|
datahub/api/entities/dataproduct/dataproduct.py,sha256=148TmItxDDyGNzfZdL8aDreSEtyAw79IN8N8oSmNOPE,21461
|
|
50
50
|
datahub/api/entities/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
|
-
datahub/api/entities/dataset/dataset.py,sha256=
|
|
51
|
+
datahub/api/entities/dataset/dataset.py,sha256=Aa89GZA1R3kY6o5YahFC2YF3pq9B0pipWcKThrhY5l4,49481
|
|
52
52
|
datahub/api/entities/forms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
53
53
|
datahub/api/entities/forms/forms.py,sha256=B1KnoMmaXwznWdbjltoLq3sH9qj-BpzyC7z7FcwwOM4,15812
|
|
54
54
|
datahub/api/entities/forms/forms_graphql_constants.py,sha256=DKpnKlMKTjmnyrCTvp63V4LX4THGTAMq3ep8THrSGP4,537
|
|
55
55
|
datahub/api/entities/platformresource/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
56
56
|
datahub/api/entities/platformresource/platform_resource.py,sha256=pVAjv6NoH746Mfvdak7ji0eqlEcEeV-Ji7M5gyNXmds,10603
|
|
57
57
|
datahub/api/entities/structuredproperties/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
58
|
-
datahub/api/entities/structuredproperties/structuredproperties.py,sha256=
|
|
58
|
+
datahub/api/entities/structuredproperties/structuredproperties.py,sha256=CUAMxgQXhlTJThkidyfGiMIeVto4aveThaG2DfDIhZI,8547
|
|
59
59
|
datahub/api/graphql/__init__.py,sha256=5yl0dJxO-2d_QuykdJrDIbWq4ja9bo0t2dAEh89JOog,142
|
|
60
60
|
datahub/api/graphql/assertion.py,sha256=o_q6SV7N1rJTVMNKSUBGJnZPk6TcVYoVShgDmPw65dE,2817
|
|
61
61
|
datahub/api/graphql/base.py,sha256=zk724_oYSJ0nK7X7Z80MijnA6ry9JqpxnBsJeYuONKA,1737
|
|
@@ -120,7 +120,7 @@ datahub/emitter/composite_emitter.py,sha256=ZU-IdlAXKGPtmyT0JJgYC09vRn-TmeNaA6VP
|
|
|
120
120
|
datahub/emitter/enum_helpers.py,sha256=QBOEUu_hDCvyL_v4ayNQV8XwJbf5zKyu0Xat0mI1Kgo,376
|
|
121
121
|
datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvViN-fm8,828
|
|
122
122
|
datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
|
|
123
|
-
datahub/emitter/mce_builder.py,sha256=
|
|
123
|
+
datahub/emitter/mce_builder.py,sha256=tAd3_hsGi94RGR0H8gFgAeW8dloKt6TXF45xPh6bu_M,16823
|
|
124
124
|
datahub/emitter/mcp.py,sha256=u6LphyhpbdFqboTAL_9MzXhGjc45o_BePoDFBkEEYWo,10484
|
|
125
125
|
datahub/emitter/mcp_builder.py,sha256=8IwJAlolQkPpMqQJPLtGrsUqAcuFNs98nrI5iYUxgaU,11920
|
|
126
126
|
datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
|
|
@@ -290,7 +290,7 @@ datahub/ingestion/source/datahub/report.py,sha256=VHBfCbwFRzdLdB7hQG9ST4EiZxl_vB
|
|
|
290
290
|
datahub/ingestion/source/datahub/state.py,sha256=PZoT7sSK1wadVf5vN6phrgr7I6LL7ePP-EJjP1OO0bQ,3507
|
|
291
291
|
datahub/ingestion/source/dbt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
292
292
|
datahub/ingestion/source/dbt/dbt_cloud.py,sha256=_17ZX6WDzg3lKo0J5XukiaZ8AiJVFsg7y2IcMLMOUAQ,17785
|
|
293
|
-
datahub/ingestion/source/dbt/dbt_common.py,sha256=
|
|
293
|
+
datahub/ingestion/source/dbt/dbt_common.py,sha256=Hgdu6yisAOQ4mn98GjhnhTreUJffskMKD-tvYDMQ0bQ,82045
|
|
294
294
|
datahub/ingestion/source/dbt/dbt_core.py,sha256=qtfNQk28r4_hkf5sIkjfWfrvZbW8Q0NIFPi67NpPeB4,24824
|
|
295
295
|
datahub/ingestion/source/dbt/dbt_tests.py,sha256=pOZJaP4VsbaE5j4qVlE_E3ifno_KQpidfGTvOi5fr6I,9839
|
|
296
296
|
datahub/ingestion/source/delta_lake/__init__.py,sha256=u5oqUeus81ONAtdl6o9Puw33ODSMun-0wLIamrZ4BUM,71
|
|
@@ -335,7 +335,7 @@ datahub/ingestion/source/hex/constants.py,sha256=8hUTMWyG5keTNfXoLu_Dh413Hw_mGGJ
|
|
|
335
335
|
datahub/ingestion/source/hex/hex.py,sha256=hpMCkwH_RWdb0MG1U2-N71wc6vPUVQ3fAkTiudplzrQ,12968
|
|
336
336
|
datahub/ingestion/source/hex/mapper.py,sha256=N3mTlEcrOmhv9ia1dnHGFgFJD2ddyTtU3H5IUbb-UxU,13344
|
|
337
337
|
datahub/ingestion/source/hex/model.py,sha256=S9bUhfFcjzuio2dBS6HzSyRVPiSJvRvMQ0qyVrjV5-E,1766
|
|
338
|
-
datahub/ingestion/source/hex/query_fetcher.py,sha256=
|
|
338
|
+
datahub/ingestion/source/hex/query_fetcher.py,sha256=0VqDfviyfR14gUHvIBovCXEqwW4ftFehPSB2VzaYk14,13312
|
|
339
339
|
datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
340
340
|
datahub/ingestion/source/iceberg/iceberg.py,sha256=-8uaBerljvonaT7Gn9Evokq6-SSDiMRf8kKo7Hg1qY4,35414
|
|
341
341
|
datahub/ingestion/source/iceberg/iceberg_common.py,sha256=CD_yHQ_wEgivyLQUTRO9BZJB29S7j5fUVllki-BPwUU,12292
|
|
@@ -489,7 +489,7 @@ datahub/ingestion/source/sql/sql_config.py,sha256=u3nGZYYl1WtaxfNsDU5bglgZ5Jq3Fx
|
|
|
489
489
|
datahub/ingestion/source/sql/sql_generic.py,sha256=9AERvkK8kdJUeDOzCYJDb93xdv6Z4DGho0NfeHj5Uyg,2740
|
|
490
490
|
datahub/ingestion/source/sql/sql_generic_profiler.py,sha256=8cDmNpT_UXzYmP8-RWoDCnewmVGCj2cYCzH9_gSsF3o,11590
|
|
491
491
|
datahub/ingestion/source/sql/sql_report.py,sha256=gw-OPHSExp_b6DRjvwqE1U6BpkwekxGrsvNMGYSGDio,2671
|
|
492
|
-
datahub/ingestion/source/sql/sql_types.py,sha256=
|
|
492
|
+
datahub/ingestion/source/sql/sql_types.py,sha256=TZSuK29Y5TPFIjzEXbwTbbkSQAS6Dnzuq8mCbJa5fK4,15119
|
|
493
493
|
datahub/ingestion/source/sql/sql_utils.py,sha256=q-Bsk6WxlsRtrw9RXBxvqI3zuaMTC_F25T2VrCziR9I,8418
|
|
494
494
|
datahub/ingestion/source/sql/sqlalchemy_data_reader.py,sha256=FvHZ4JEK3aR2DYOBZiT_ZsAy12RjTu4t_KIR_92B11k,2644
|
|
495
495
|
datahub/ingestion/source/sql/sqlalchemy_uri.py,sha256=u0ZvgdJjXZdo_vl7YIQfYuuWbGwpnH6OSozI2e8ZV4I,858
|
|
@@ -598,8 +598,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
|
|
|
598
598
|
datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
|
|
599
599
|
datahub/lite/lite_util.py,sha256=G0LQHKkyEb1pc_q183g6hflShclGx7kikgMaOxtVVcs,4545
|
|
600
600
|
datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
601
|
-
datahub/metadata/_internal_schema_classes.py,sha256=
|
|
602
|
-
datahub/metadata/schema.avsc,sha256=
|
|
601
|
+
datahub/metadata/_internal_schema_classes.py,sha256=zFi0q-OOxdhPXr6ril8nEZhgChzXY_a3BkVwH3h_8bo,1013146
|
|
602
|
+
datahub/metadata/schema.avsc,sha256=dnBmtikhHW7neORZOUoiH21MZXoXRaw5E1gwwfyCnqs,701603
|
|
603
603
|
datahub/metadata/schema_classes.py,sha256=tPT8iHCak4IsZi_oL0nirbPpI8ETTPTZzapqLRpeKU4,1326
|
|
604
604
|
datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
|
|
605
605
|
datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
@@ -722,7 +722,7 @@ datahub/metadata/schemas/DataHubActionKey.avsc,sha256=bjiKcoyvUPQKaGUi2ICBMJ_ukw
|
|
|
722
722
|
datahub/metadata/schemas/DataHubConnectionDetails.avsc,sha256=IvZj6OA7HRvy-ZIIn0UbXdJNnyt_oTn16XIe5ZlcqGk,1661
|
|
723
723
|
datahub/metadata/schemas/DataHubConnectionKey.avsc,sha256=VwbamVFoEdp6epz1lJm_UShBl6ksBxoA7jAYuPI5u3M,522
|
|
724
724
|
datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc,sha256=4wac7sluRIq-0ZjODE5SmuVKuQeW8ajLJNRpqEBRyio,4601
|
|
725
|
-
datahub/metadata/schemas/DataHubIngestionSourceKey.avsc,sha256=
|
|
725
|
+
datahub/metadata/schemas/DataHubIngestionSourceKey.avsc,sha256=TGmm9WEGTaABs7kt5Uc-N-kbc5Sd-2sQwx-JpfAptvw,545
|
|
726
726
|
datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc,sha256=q6ZyMoxInwmrkrXkUgMe-i-WZzAxbjcvJ-EI99SnEp8,599
|
|
727
727
|
datahub/metadata/schemas/DataHubPersonaInfo.avsc,sha256=OUvbTgPQsBtzkDDb9pxHXpQ6A7dkL77ZnCXZ-MLEG14,227
|
|
728
728
|
datahub/metadata/schemas/DataHubPersonaKey.avsc,sha256=ddj-DhXa0_YMdLaGkKLLSklfIeDRvSwPXu8o__YEXUE,448
|
|
@@ -1057,8 +1057,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1057
1057
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1058
1058
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1059
1059
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1060
|
-
acryl_datahub-1.1.
|
|
1061
|
-
acryl_datahub-1.1.
|
|
1062
|
-
acryl_datahub-1.1.
|
|
1063
|
-
acryl_datahub-1.1.
|
|
1064
|
-
acryl_datahub-1.1.
|
|
1060
|
+
acryl_datahub-1.1.1rc3.dist-info/METADATA,sha256=EU7551u1heqvgQuRO_U-7JIPE1hnZzewnH-c3EaWkyY,180694
|
|
1061
|
+
acryl_datahub-1.1.1rc3.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
|
|
1062
|
+
acryl_datahub-1.1.1rc3.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
|
|
1063
|
+
acryl_datahub-1.1.1rc3.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1064
|
+
acryl_datahub-1.1.1rc3.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
|
@@ -15,7 +15,13 @@ from typing import (
|
|
|
15
15
|
|
|
16
16
|
import avro
|
|
17
17
|
import yaml
|
|
18
|
-
from pydantic import
|
|
18
|
+
from pydantic import (
|
|
19
|
+
BaseModel,
|
|
20
|
+
Field,
|
|
21
|
+
StrictStr,
|
|
22
|
+
root_validator,
|
|
23
|
+
validator,
|
|
24
|
+
)
|
|
19
25
|
from ruamel.yaml import YAML
|
|
20
26
|
from typing_extensions import TypeAlias
|
|
21
27
|
|
|
@@ -90,7 +96,7 @@ class StrictModel(BaseModel):
|
|
|
90
96
|
|
|
91
97
|
|
|
92
98
|
# Define type aliases for the complex types
|
|
93
|
-
PropertyValue: TypeAlias = Union[
|
|
99
|
+
PropertyValue: TypeAlias = Union[StrictStr, float]
|
|
94
100
|
PropertyValueList: TypeAlias = List[PropertyValue]
|
|
95
101
|
StructuredProperties: TypeAlias = Dict[str, Union[PropertyValue, PropertyValueList]]
|
|
96
102
|
|
|
@@ -366,12 +372,6 @@ class Ownership(ConfigModel):
|
|
|
366
372
|
return v
|
|
367
373
|
|
|
368
374
|
|
|
369
|
-
class StructuredPropertyValue(ConfigModel):
|
|
370
|
-
value: Union[str, int, float, List[str], List[int], List[float]]
|
|
371
|
-
created: Optional[str] = None
|
|
372
|
-
lastModified: Optional[str] = None
|
|
373
|
-
|
|
374
|
-
|
|
375
375
|
class DatasetRetrievalConfig(BaseModel):
|
|
376
376
|
include_downstreams: Optional[bool] = False
|
|
377
377
|
|
|
@@ -4,7 +4,7 @@ from pathlib import Path
|
|
|
4
4
|
from typing import Iterable, List, Optional, Union
|
|
5
5
|
|
|
6
6
|
import yaml
|
|
7
|
-
from pydantic import validator
|
|
7
|
+
from pydantic import StrictStr, validator
|
|
8
8
|
from ruamel.yaml import YAML
|
|
9
9
|
|
|
10
10
|
from datahub.configuration.common import ConfigModel
|
|
@@ -38,7 +38,7 @@ class AllowedTypes(Enum):
|
|
|
38
38
|
|
|
39
39
|
|
|
40
40
|
class AllowedValue(ConfigModel):
|
|
41
|
-
value: Union[
|
|
41
|
+
value: Union[StrictStr, float]
|
|
42
42
|
description: Optional[str] = None
|
|
43
43
|
|
|
44
44
|
|
datahub/emitter/mce_builder.py
CHANGED
|
@@ -59,6 +59,7 @@ from datahub.metadata.urns import (
|
|
|
59
59
|
DataJobUrn,
|
|
60
60
|
DataPlatformUrn,
|
|
61
61
|
DatasetUrn,
|
|
62
|
+
OwnershipTypeUrn,
|
|
62
63
|
TagUrn,
|
|
63
64
|
)
|
|
64
65
|
from datahub.utilities.urn_encoder import UrnEncoder
|
|
@@ -406,7 +407,8 @@ def make_ml_model_group_urn(platform: str, group_name: str, env: str) -> str:
|
|
|
406
407
|
|
|
407
408
|
def validate_ownership_type(ownership_type: str) -> Tuple[str, Optional[str]]:
|
|
408
409
|
if ownership_type.startswith("urn:li:"):
|
|
409
|
-
|
|
410
|
+
ownership_type_urn = OwnershipTypeUrn.from_string(ownership_type)
|
|
411
|
+
return OwnershipTypeClass.CUSTOM, ownership_type_urn.urn()
|
|
410
412
|
ownership_type = ownership_type.upper()
|
|
411
413
|
if ownership_type in get_enum_options(OwnershipTypeClass):
|
|
412
414
|
return ownership_type, None
|
|
@@ -132,6 +132,12 @@ class DBTSourceReport(StaleEntityRemovalSourceReport):
|
|
|
132
132
|
sql_parser_column_errors: int = 0
|
|
133
133
|
sql_parser_successes: int = 0
|
|
134
134
|
|
|
135
|
+
# Details on where column info comes from.
|
|
136
|
+
nodes_with_catalog_columns: int = 0
|
|
137
|
+
nodes_with_inferred_columns: int = 0
|
|
138
|
+
nodes_with_graph_columns: int = 0
|
|
139
|
+
nodes_with_no_columns: int = 0
|
|
140
|
+
|
|
135
141
|
sql_parser_parse_failures_list: LossyList[str] = field(default_factory=LossyList)
|
|
136
142
|
sql_parser_detach_ctes_failures_list: LossyList[str] = field(
|
|
137
143
|
default_factory=LossyList
|
|
@@ -619,14 +625,8 @@ class DBTNode:
|
|
|
619
625
|
def exists_in_target_platform(self):
|
|
620
626
|
return not (self.is_ephemeral_model() or self.node_type == "test")
|
|
621
627
|
|
|
622
|
-
def
|
|
623
|
-
"""
|
|
624
|
-
Update the column list if they are not already set.
|
|
625
|
-
"""
|
|
626
|
-
|
|
627
|
-
if self.columns:
|
|
628
|
-
# If we already have columns, don't overwrite them.
|
|
629
|
-
return
|
|
628
|
+
def set_columns(self, schema_fields: List[SchemaField]) -> None:
|
|
629
|
+
"""Update the column list."""
|
|
630
630
|
|
|
631
631
|
self.columns = [
|
|
632
632
|
DBTColumn(
|
|
@@ -1248,9 +1248,28 @@ class DBTSourceBase(StatefulIngestionSourceBase):
|
|
|
1248
1248
|
target_node_urn, self._to_schema_info(inferred_schema_fields)
|
|
1249
1249
|
)
|
|
1250
1250
|
|
|
1251
|
-
#
|
|
1252
|
-
|
|
1253
|
-
|
|
1251
|
+
# When updating the node's columns, our order of preference is:
|
|
1252
|
+
# 1. Schema from the dbt catalog
|
|
1253
|
+
# 2. Inferred schema
|
|
1254
|
+
# 3. Schema fetched from the graph
|
|
1255
|
+
if node.columns:
|
|
1256
|
+
self.report.nodes_with_catalog_columns += 1
|
|
1257
|
+
pass # we already have columns from the dbt catalog
|
|
1258
|
+
elif inferred_schema_fields:
|
|
1259
|
+
logger.debug(
|
|
1260
|
+
f"Using {len(inferred_schema_fields)} inferred columns for {node.dbt_name}"
|
|
1261
|
+
)
|
|
1262
|
+
self.report.nodes_with_inferred_columns += 1
|
|
1263
|
+
node.set_columns(inferred_schema_fields)
|
|
1264
|
+
elif schema_fields:
|
|
1265
|
+
logger.debug(
|
|
1266
|
+
f"Using {len(schema_fields)} graph columns for {node.dbt_name}"
|
|
1267
|
+
)
|
|
1268
|
+
self.report.nodes_with_graph_columns += 1
|
|
1269
|
+
node.set_columns(schema_fields)
|
|
1270
|
+
else:
|
|
1271
|
+
logger.debug(f"No columns found for {node.dbt_name}")
|
|
1272
|
+
self.report.nodes_with_no_columns += 1
|
|
1254
1273
|
|
|
1255
1274
|
def _parse_cll(
|
|
1256
1275
|
self,
|
|
@@ -18,8 +18,12 @@ from datahub.utilities.time import datetime_to_ts_millis
|
|
|
18
18
|
logger = logging.getLogger(__name__)
|
|
19
19
|
|
|
20
20
|
# Pattern to extract both project_id and workspace_name from Hex metadata in SQL comments
|
|
21
|
-
#
|
|
22
|
-
|
|
21
|
+
# Context values:
|
|
22
|
+
# - SCHEDULED_RUN: The query was executed during a scheduled run of a published Hex app.
|
|
23
|
+
# - LOGIC_VIEW: The query was executed from the Hex project's notebook view. This happens when a user is actively editing a Hex notebook: When they first open and run it or when they rerun without cached results.
|
|
24
|
+
# - APP_VIEW: The query was executed during a published app session. This happens when a user opens up a published app or reruns the app without cached results.
|
|
25
|
+
# Only match metadata with "context": "SCHEDULED_RUN|APP_VIEW" to filter out those from notebook, which may bring more noise from development than value
|
|
26
|
+
HEX_METADATA_PATTERN = r'-- Hex query metadata: \{.*?"context": "(?:SCHEDULED_RUN|APP_VIEW)".*?"project_id": "([^"]+)".*?"project_url": "https?://[^/]+/([^/]+)/hex/.*?\}'
|
|
23
27
|
|
|
24
28
|
|
|
25
29
|
@dataclass
|
|
@@ -197,13 +201,15 @@ class HexQueryFetcher:
|
|
|
197
201
|
Example:
|
|
198
202
|
-- Hex query metadata: {"categories": ["Scratchpad"], "cell_type": "SQL", "connection": "Long Tail Companions", "context": "SCHEDULED_RUN", "project_id": "d73da67d-c87b-4dd8-9e7f-b79cb7f822cf", "project_url": "https://app.hex.tech/acryl-partnership/hex/d73da67d-c87b-4dd8-9e7f-b79cb7f822cf/draft/logic?selectedCellId=67c38da0-e631-4005-9750-5bdae2a2ef3f"}
|
|
199
203
|
|
|
200
|
-
|
|
204
|
+
TODO: Consider supporting multiline metadata format in the future:
|
|
201
205
|
# -- Hex query metadata: {
|
|
202
206
|
# -- "categories": ["Scratchpad"],
|
|
203
207
|
# -- "project_id": "d73da67d-c87b-4dd8-9e7f-b79cb7f822cf",
|
|
204
208
|
# -- ...
|
|
205
209
|
# -- }
|
|
206
210
|
|
|
211
|
+
TODO: Extract based on pattern matching is strict on the order of the keys in the metadata. Consider using a more flexible approach like JSON parsing.
|
|
212
|
+
|
|
207
213
|
Returns:
|
|
208
214
|
A tuple of (project_id, workspace_name) if both are successfully extracted
|
|
209
215
|
None if extraction fails for any reason
|
|
@@ -284,6 +284,8 @@ SNOWFLAKE_TYPES_MAP: Dict[str, Any] = {
|
|
|
284
284
|
"INTEGER": NumberType,
|
|
285
285
|
"BIGINT": NumberType,
|
|
286
286
|
"SMALLINT": NumberType,
|
|
287
|
+
"TINYINT": NumberType,
|
|
288
|
+
"BYTEINT": NumberType,
|
|
287
289
|
"FLOAT": NumberType,
|
|
288
290
|
"FLOAT4": NumberType,
|
|
289
291
|
"FLOAT8": NumberType,
|
|
@@ -291,6 +293,7 @@ SNOWFLAKE_TYPES_MAP: Dict[str, Any] = {
|
|
|
291
293
|
"DOUBLE PRECISION": NumberType,
|
|
292
294
|
"REAL": NumberType,
|
|
293
295
|
"VARCHAR": StringType,
|
|
296
|
+
"CHARACTER VARYING": StringType,
|
|
294
297
|
"CHAR": StringType,
|
|
295
298
|
"CHARACTER": StringType,
|
|
296
299
|
"STRING": StringType,
|
|
@@ -313,8 +316,8 @@ SNOWFLAKE_TYPES_MAP: Dict[str, Any] = {
|
|
|
313
316
|
|
|
314
317
|
|
|
315
318
|
def resolve_snowflake_modified_type(type_string: str) -> Any:
|
|
316
|
-
# Match types with precision and scale, e.g., 'DECIMAL(38,0)'
|
|
317
|
-
match = re.match(r"([a-
|
|
319
|
+
# Match types with precision and scale, e.g., 'DECIMAL(38,0)' or TIME(3)
|
|
320
|
+
match = re.match(r"([a-z A-Z_]+)\(\d+(,(\s+)?\d+)?\)", type_string)
|
|
318
321
|
if match:
|
|
319
322
|
modified_type_base = match.group(1) # Extract the base type
|
|
320
323
|
return SNOWFLAKE_TYPES_MAP.get(modified_type_base)
|
|
@@ -15431,7 +15431,7 @@ class DataHubIngestionSourceKeyClass(_Aspect):
|
|
|
15431
15431
|
|
|
15432
15432
|
|
|
15433
15433
|
ASPECT_NAME = 'dataHubIngestionSourceKey'
|
|
15434
|
-
ASPECT_INFO = {'keyForEntity': 'dataHubIngestionSource', 'entityCategory': 'internal', 'entityAspects': ['dataHubIngestionSourceInfo']}
|
|
15434
|
+
ASPECT_INFO = {'keyForEntity': 'dataHubIngestionSource', 'entityCategory': 'internal', 'entityAspects': ['dataHubIngestionSourceInfo', 'ownership']}
|
|
15435
15435
|
RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.DataHubIngestionSourceKey")
|
|
15436
15436
|
|
|
15437
15437
|
def __init__(self,
|
datahub/metadata/schema.avsc
CHANGED
|
@@ -3768,7 +3768,8 @@
|
|
|
3768
3768
|
"keyForEntity": "dataHubIngestionSource",
|
|
3769
3769
|
"entityCategory": "internal",
|
|
3770
3770
|
"entityAspects": [
|
|
3771
|
-
"dataHubIngestionSourceInfo"
|
|
3771
|
+
"dataHubIngestionSourceInfo",
|
|
3772
|
+
"ownership"
|
|
3772
3773
|
]
|
|
3773
3774
|
},
|
|
3774
3775
|
"name": "DataHubIngestionSourceKey",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|