acryl-datahub 1.1.1rc2__py3-none-any.whl → 1.1.1rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,7 +1,7 @@
1
- acryl_datahub-1.1.1rc2.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.1.1rc3.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=DKcjhwmDecfgLuBvtigWjIpy1_XPd7kbxrjvQhUCCoE,321
4
+ datahub/_version.py,sha256=YKv9NztkHrTZ4_8dkC02c7V9Al8mWiRqMfERfcuZjRA,321
5
5
  datahub/entrypoints.py,sha256=H-YFTvxTJOgpWsFBVlxyb1opjkq-hjTzNmjy5Fq3RHg,8992
6
6
  datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -48,14 +48,14 @@ datahub/api/entities/dataprocess/dataprocess_instance.py,sha256=IhY-rcXs-r8EatwW
48
48
  datahub/api/entities/dataproduct/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
49
  datahub/api/entities/dataproduct/dataproduct.py,sha256=148TmItxDDyGNzfZdL8aDreSEtyAw79IN8N8oSmNOPE,21461
50
50
  datahub/api/entities/dataset/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
- datahub/api/entities/dataset/dataset.py,sha256=se2tv6jsvS5BmH5b53mKs8lEPF4LNzVRrd9PDovPIQk,49627
51
+ datahub/api/entities/dataset/dataset.py,sha256=Aa89GZA1R3kY6o5YahFC2YF3pq9B0pipWcKThrhY5l4,49481
52
52
  datahub/api/entities/forms/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
53
  datahub/api/entities/forms/forms.py,sha256=B1KnoMmaXwznWdbjltoLq3sH9qj-BpzyC7z7FcwwOM4,15812
54
54
  datahub/api/entities/forms/forms_graphql_constants.py,sha256=DKpnKlMKTjmnyrCTvp63V4LX4THGTAMq3ep8THrSGP4,537
55
55
  datahub/api/entities/platformresource/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
56
  datahub/api/entities/platformresource/platform_resource.py,sha256=pVAjv6NoH746Mfvdak7ji0eqlEcEeV-Ji7M5gyNXmds,10603
57
57
  datahub/api/entities/structuredproperties/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
- datahub/api/entities/structuredproperties/structuredproperties.py,sha256=FU50bB1bgGDbitdzK9gHxWMj83KUuxK80mhpK1KmGZQ,8535
58
+ datahub/api/entities/structuredproperties/structuredproperties.py,sha256=CUAMxgQXhlTJThkidyfGiMIeVto4aveThaG2DfDIhZI,8547
59
59
  datahub/api/graphql/__init__.py,sha256=5yl0dJxO-2d_QuykdJrDIbWq4ja9bo0t2dAEh89JOog,142
60
60
  datahub/api/graphql/assertion.py,sha256=o_q6SV7N1rJTVMNKSUBGJnZPk6TcVYoVShgDmPw65dE,2817
61
61
  datahub/api/graphql/base.py,sha256=zk724_oYSJ0nK7X7Z80MijnA6ry9JqpxnBsJeYuONKA,1737
@@ -120,7 +120,7 @@ datahub/emitter/composite_emitter.py,sha256=ZU-IdlAXKGPtmyT0JJgYC09vRn-TmeNaA6VP
120
120
  datahub/emitter/enum_helpers.py,sha256=QBOEUu_hDCvyL_v4ayNQV8XwJbf5zKyu0Xat0mI1Kgo,376
121
121
  datahub/emitter/generic_emitter.py,sha256=i37ZFm9VR_tmiZm9kIypEkQEB_cLKbzj_tJvViN-fm8,828
122
122
  datahub/emitter/kafka_emitter.py,sha256=Uix1W1WaXF8VqUTUfzdRZKca2XrR1w50Anx2LVkROlc,5822
123
- datahub/emitter/mce_builder.py,sha256=i-iLLdnuy7h1JrzwC2sCtQthbso-cNj1uijOQZKHbeA,16717
123
+ datahub/emitter/mce_builder.py,sha256=tAd3_hsGi94RGR0H8gFgAeW8dloKt6TXF45xPh6bu_M,16823
124
124
  datahub/emitter/mcp.py,sha256=u6LphyhpbdFqboTAL_9MzXhGjc45o_BePoDFBkEEYWo,10484
125
125
  datahub/emitter/mcp_builder.py,sha256=8IwJAlolQkPpMqQJPLtGrsUqAcuFNs98nrI5iYUxgaU,11920
126
126
  datahub/emitter/mcp_patch_builder.py,sha256=u7cpW6DkiN7KpLapmMaXgL_FneoN69boxiANbVgMdSI,4564
@@ -290,7 +290,7 @@ datahub/ingestion/source/datahub/report.py,sha256=VHBfCbwFRzdLdB7hQG9ST4EiZxl_vB
290
290
  datahub/ingestion/source/datahub/state.py,sha256=PZoT7sSK1wadVf5vN6phrgr7I6LL7ePP-EJjP1OO0bQ,3507
291
291
  datahub/ingestion/source/dbt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
292
292
  datahub/ingestion/source/dbt/dbt_cloud.py,sha256=_17ZX6WDzg3lKo0J5XukiaZ8AiJVFsg7y2IcMLMOUAQ,17785
293
- datahub/ingestion/source/dbt/dbt_common.py,sha256=yk5iF671kFI3BQ9pY2HvMq6mkiEKPT5ARkf13Ydr2TE,81098
293
+ datahub/ingestion/source/dbt/dbt_common.py,sha256=Hgdu6yisAOQ4mn98GjhnhTreUJffskMKD-tvYDMQ0bQ,82045
294
294
  datahub/ingestion/source/dbt/dbt_core.py,sha256=qtfNQk28r4_hkf5sIkjfWfrvZbW8Q0NIFPi67NpPeB4,24824
295
295
  datahub/ingestion/source/dbt/dbt_tests.py,sha256=pOZJaP4VsbaE5j4qVlE_E3ifno_KQpidfGTvOi5fr6I,9839
296
296
  datahub/ingestion/source/delta_lake/__init__.py,sha256=u5oqUeus81ONAtdl6o9Puw33ODSMun-0wLIamrZ4BUM,71
@@ -335,7 +335,7 @@ datahub/ingestion/source/hex/constants.py,sha256=8hUTMWyG5keTNfXoLu_Dh413Hw_mGGJ
335
335
  datahub/ingestion/source/hex/hex.py,sha256=hpMCkwH_RWdb0MG1U2-N71wc6vPUVQ3fAkTiudplzrQ,12968
336
336
  datahub/ingestion/source/hex/mapper.py,sha256=N3mTlEcrOmhv9ia1dnHGFgFJD2ddyTtU3H5IUbb-UxU,13344
337
337
  datahub/ingestion/source/hex/model.py,sha256=S9bUhfFcjzuio2dBS6HzSyRVPiSJvRvMQ0qyVrjV5-E,1766
338
- datahub/ingestion/source/hex/query_fetcher.py,sha256=m9A9xzxJipGD-nJU_lItyLF7Q5VYOm11wuM3fMyvm4I,12594
338
+ datahub/ingestion/source/hex/query_fetcher.py,sha256=0VqDfviyfR14gUHvIBovCXEqwW4ftFehPSB2VzaYk14,13312
339
339
  datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
340
340
  datahub/ingestion/source/iceberg/iceberg.py,sha256=-8uaBerljvonaT7Gn9Evokq6-SSDiMRf8kKo7Hg1qY4,35414
341
341
  datahub/ingestion/source/iceberg/iceberg_common.py,sha256=CD_yHQ_wEgivyLQUTRO9BZJB29S7j5fUVllki-BPwUU,12292
@@ -489,7 +489,7 @@ datahub/ingestion/source/sql/sql_config.py,sha256=u3nGZYYl1WtaxfNsDU5bglgZ5Jq3Fx
489
489
  datahub/ingestion/source/sql/sql_generic.py,sha256=9AERvkK8kdJUeDOzCYJDb93xdv6Z4DGho0NfeHj5Uyg,2740
490
490
  datahub/ingestion/source/sql/sql_generic_profiler.py,sha256=8cDmNpT_UXzYmP8-RWoDCnewmVGCj2cYCzH9_gSsF3o,11590
491
491
  datahub/ingestion/source/sql/sql_report.py,sha256=gw-OPHSExp_b6DRjvwqE1U6BpkwekxGrsvNMGYSGDio,2671
492
- datahub/ingestion/source/sql/sql_types.py,sha256=TYz6N2-3j3Rj8gBVZxNuQAGbDBb0hdNfhhIj0q5UIQs,15009
492
+ datahub/ingestion/source/sql/sql_types.py,sha256=TZSuK29Y5TPFIjzEXbwTbbkSQAS6Dnzuq8mCbJa5fK4,15119
493
493
  datahub/ingestion/source/sql/sql_utils.py,sha256=q-Bsk6WxlsRtrw9RXBxvqI3zuaMTC_F25T2VrCziR9I,8418
494
494
  datahub/ingestion/source/sql/sqlalchemy_data_reader.py,sha256=FvHZ4JEK3aR2DYOBZiT_ZsAy12RjTu4t_KIR_92B11k,2644
495
495
  datahub/ingestion/source/sql/sqlalchemy_uri.py,sha256=u0ZvgdJjXZdo_vl7YIQfYuuWbGwpnH6OSozI2e8ZV4I,858
@@ -598,8 +598,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
598
598
  datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
599
599
  datahub/lite/lite_util.py,sha256=G0LQHKkyEb1pc_q183g6hflShclGx7kikgMaOxtVVcs,4545
600
600
  datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
601
- datahub/metadata/_internal_schema_classes.py,sha256=tl0FUKLebvTk766QFVUerWKFf0OpaSSY0o-w-vqPNsA,1013133
602
- datahub/metadata/schema.avsc,sha256=F4cl2V81M75_GVEu-zHL8a8KoM_dI62irB-1lAI147w,701582
601
+ datahub/metadata/_internal_schema_classes.py,sha256=zFi0q-OOxdhPXr6ril8nEZhgChzXY_a3BkVwH3h_8bo,1013146
602
+ datahub/metadata/schema.avsc,sha256=dnBmtikhHW7neORZOUoiH21MZXoXRaw5E1gwwfyCnqs,701603
603
603
  datahub/metadata/schema_classes.py,sha256=tPT8iHCak4IsZi_oL0nirbPpI8ETTPTZzapqLRpeKU4,1326
604
604
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
605
605
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
@@ -722,7 +722,7 @@ datahub/metadata/schemas/DataHubActionKey.avsc,sha256=bjiKcoyvUPQKaGUi2ICBMJ_ukw
722
722
  datahub/metadata/schemas/DataHubConnectionDetails.avsc,sha256=IvZj6OA7HRvy-ZIIn0UbXdJNnyt_oTn16XIe5ZlcqGk,1661
723
723
  datahub/metadata/schemas/DataHubConnectionKey.avsc,sha256=VwbamVFoEdp6epz1lJm_UShBl6ksBxoA7jAYuPI5u3M,522
724
724
  datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc,sha256=4wac7sluRIq-0ZjODE5SmuVKuQeW8ajLJNRpqEBRyio,4601
725
- datahub/metadata/schemas/DataHubIngestionSourceKey.avsc,sha256=CSmoOx_Eqa1-he5dRaVOUQWIv1l2e2lraEPIixKK-lo,526
725
+ datahub/metadata/schemas/DataHubIngestionSourceKey.avsc,sha256=TGmm9WEGTaABs7kt5Uc-N-kbc5Sd-2sQwx-JpfAptvw,545
726
726
  datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc,sha256=q6ZyMoxInwmrkrXkUgMe-i-WZzAxbjcvJ-EI99SnEp8,599
727
727
  datahub/metadata/schemas/DataHubPersonaInfo.avsc,sha256=OUvbTgPQsBtzkDDb9pxHXpQ6A7dkL77ZnCXZ-MLEG14,227
728
728
  datahub/metadata/schemas/DataHubPersonaKey.avsc,sha256=ddj-DhXa0_YMdLaGkKLLSklfIeDRvSwPXu8o__YEXUE,448
@@ -1057,8 +1057,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1057
1057
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1058
1058
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1059
1059
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1060
- acryl_datahub-1.1.1rc2.dist-info/METADATA,sha256=6YaQUpDtFdwJ2Ih1wopQxjfHZ-h7lTjJNNixDR2kp3o,180529
1061
- acryl_datahub-1.1.1rc2.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
1062
- acryl_datahub-1.1.1rc2.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
1063
- acryl_datahub-1.1.1rc2.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1064
- acryl_datahub-1.1.1rc2.dist-info/RECORD,,
1060
+ acryl_datahub-1.1.1rc3.dist-info/METADATA,sha256=EU7551u1heqvgQuRO_U-7JIPE1hnZzewnH-c3EaWkyY,180694
1061
+ acryl_datahub-1.1.1rc3.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
1062
+ acryl_datahub-1.1.1rc3.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
1063
+ acryl_datahub-1.1.1rc3.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1064
+ acryl_datahub-1.1.1rc3.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.1.1rc2"
3
+ __version__ = "1.1.1rc3"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -15,7 +15,13 @@ from typing import (
15
15
 
16
16
  import avro
17
17
  import yaml
18
- from pydantic import BaseModel, Field, root_validator, validator
18
+ from pydantic import (
19
+ BaseModel,
20
+ Field,
21
+ StrictStr,
22
+ root_validator,
23
+ validator,
24
+ )
19
25
  from ruamel.yaml import YAML
20
26
  from typing_extensions import TypeAlias
21
27
 
@@ -90,7 +96,7 @@ class StrictModel(BaseModel):
90
96
 
91
97
 
92
98
  # Define type aliases for the complex types
93
- PropertyValue: TypeAlias = Union[float, str]
99
+ PropertyValue: TypeAlias = Union[StrictStr, float]
94
100
  PropertyValueList: TypeAlias = List[PropertyValue]
95
101
  StructuredProperties: TypeAlias = Dict[str, Union[PropertyValue, PropertyValueList]]
96
102
 
@@ -366,12 +372,6 @@ class Ownership(ConfigModel):
366
372
  return v
367
373
 
368
374
 
369
- class StructuredPropertyValue(ConfigModel):
370
- value: Union[str, int, float, List[str], List[int], List[float]]
371
- created: Optional[str] = None
372
- lastModified: Optional[str] = None
373
-
374
-
375
375
  class DatasetRetrievalConfig(BaseModel):
376
376
  include_downstreams: Optional[bool] = False
377
377
 
@@ -4,7 +4,7 @@ from pathlib import Path
4
4
  from typing import Iterable, List, Optional, Union
5
5
 
6
6
  import yaml
7
- from pydantic import validator
7
+ from pydantic import StrictStr, validator
8
8
  from ruamel.yaml import YAML
9
9
 
10
10
  from datahub.configuration.common import ConfigModel
@@ -38,7 +38,7 @@ class AllowedTypes(Enum):
38
38
 
39
39
 
40
40
  class AllowedValue(ConfigModel):
41
- value: Union[int, float, str]
41
+ value: Union[StrictStr, float]
42
42
  description: Optional[str] = None
43
43
 
44
44
 
@@ -59,6 +59,7 @@ from datahub.metadata.urns import (
59
59
  DataJobUrn,
60
60
  DataPlatformUrn,
61
61
  DatasetUrn,
62
+ OwnershipTypeUrn,
62
63
  TagUrn,
63
64
  )
64
65
  from datahub.utilities.urn_encoder import UrnEncoder
@@ -406,7 +407,8 @@ def make_ml_model_group_urn(platform: str, group_name: str, env: str) -> str:
406
407
 
407
408
  def validate_ownership_type(ownership_type: str) -> Tuple[str, Optional[str]]:
408
409
  if ownership_type.startswith("urn:li:"):
409
- return OwnershipTypeClass.CUSTOM, ownership_type
410
+ ownership_type_urn = OwnershipTypeUrn.from_string(ownership_type)
411
+ return OwnershipTypeClass.CUSTOM, ownership_type_urn.urn()
410
412
  ownership_type = ownership_type.upper()
411
413
  if ownership_type in get_enum_options(OwnershipTypeClass):
412
414
  return ownership_type, None
@@ -132,6 +132,12 @@ class DBTSourceReport(StaleEntityRemovalSourceReport):
132
132
  sql_parser_column_errors: int = 0
133
133
  sql_parser_successes: int = 0
134
134
 
135
+ # Details on where column info comes from.
136
+ nodes_with_catalog_columns: int = 0
137
+ nodes_with_inferred_columns: int = 0
138
+ nodes_with_graph_columns: int = 0
139
+ nodes_with_no_columns: int = 0
140
+
135
141
  sql_parser_parse_failures_list: LossyList[str] = field(default_factory=LossyList)
136
142
  sql_parser_detach_ctes_failures_list: LossyList[str] = field(
137
143
  default_factory=LossyList
@@ -619,14 +625,8 @@ class DBTNode:
619
625
  def exists_in_target_platform(self):
620
626
  return not (self.is_ephemeral_model() or self.node_type == "test")
621
627
 
622
- def columns_setdefault(self, schema_fields: List[SchemaField]) -> None:
623
- """
624
- Update the column list if they are not already set.
625
- """
626
-
627
- if self.columns:
628
- # If we already have columns, don't overwrite them.
629
- return
628
+ def set_columns(self, schema_fields: List[SchemaField]) -> None:
629
+ """Update the column list."""
630
630
 
631
631
  self.columns = [
632
632
  DBTColumn(
@@ -1248,9 +1248,28 @@ class DBTSourceBase(StatefulIngestionSourceBase):
1248
1248
  target_node_urn, self._to_schema_info(inferred_schema_fields)
1249
1249
  )
1250
1250
 
1251
- # Save the inferred schema fields into the dbt node.
1252
- if inferred_schema_fields:
1253
- node.columns_setdefault(inferred_schema_fields)
1251
+ # When updating the node's columns, our order of preference is:
1252
+ # 1. Schema from the dbt catalog
1253
+ # 2. Inferred schema
1254
+ # 3. Schema fetched from the graph
1255
+ if node.columns:
1256
+ self.report.nodes_with_catalog_columns += 1
1257
+ pass # we already have columns from the dbt catalog
1258
+ elif inferred_schema_fields:
1259
+ logger.debug(
1260
+ f"Using {len(inferred_schema_fields)} inferred columns for {node.dbt_name}"
1261
+ )
1262
+ self.report.nodes_with_inferred_columns += 1
1263
+ node.set_columns(inferred_schema_fields)
1264
+ elif schema_fields:
1265
+ logger.debug(
1266
+ f"Using {len(schema_fields)} graph columns for {node.dbt_name}"
1267
+ )
1268
+ self.report.nodes_with_graph_columns += 1
1269
+ node.set_columns(schema_fields)
1270
+ else:
1271
+ logger.debug(f"No columns found for {node.dbt_name}")
1272
+ self.report.nodes_with_no_columns += 1
1254
1273
 
1255
1274
  def _parse_cll(
1256
1275
  self,
@@ -18,8 +18,12 @@ from datahub.utilities.time import datetime_to_ts_millis
18
18
  logger = logging.getLogger(__name__)
19
19
 
20
20
  # Pattern to extract both project_id and workspace_name from Hex metadata in SQL comments
21
- # Only match metadata with "context": "SCHEDULED_RUN" to filter out non-scheduled runs
22
- HEX_METADATA_PATTERN = r'-- Hex query metadata: \{.*?"context": "SCHEDULED_RUN".*?"project_id": "([^"]+)".*?"project_url": "https?://[^/]+/([^/]+)/hex/.*?\}'
21
+ # Context values:
22
+ # - SCHEDULED_RUN: The query was executed during a scheduled run of a published Hex app.
23
+ # - LOGIC_VIEW: The query was executed from the Hex project's notebook view. This happens when a user is actively editing a Hex notebook: When they first open and run it or when they rerun without cached results.
24
+ # - APP_VIEW: The query was executed during a published app session. This happens when a user opens up a published app or reruns the app without cached results.
25
+ # Only match metadata with "context": "SCHEDULED_RUN|APP_VIEW" to filter out those from notebook, which may bring more noise from development than value
26
+ HEX_METADATA_PATTERN = r'-- Hex query metadata: \{.*?"context": "(?:SCHEDULED_RUN|APP_VIEW)".*?"project_id": "([^"]+)".*?"project_url": "https?://[^/]+/([^/]+)/hex/.*?\}'
23
27
 
24
28
 
25
29
  @dataclass
@@ -197,13 +201,15 @@ class HexQueryFetcher:
197
201
  Example:
198
202
  -- Hex query metadata: {"categories": ["Scratchpad"], "cell_type": "SQL", "connection": "Long Tail Companions", "context": "SCHEDULED_RUN", "project_id": "d73da67d-c87b-4dd8-9e7f-b79cb7f822cf", "project_url": "https://app.hex.tech/acryl-partnership/hex/d73da67d-c87b-4dd8-9e7f-b79cb7f822cf/draft/logic?selectedCellId=67c38da0-e631-4005-9750-5bdae2a2ef3f"}
199
203
 
200
- # TODO: Consider supporting multiline metadata format in the future:
204
+ TODO: Consider supporting multiline metadata format in the future:
201
205
  # -- Hex query metadata: {
202
206
  # -- "categories": ["Scratchpad"],
203
207
  # -- "project_id": "d73da67d-c87b-4dd8-9e7f-b79cb7f822cf",
204
208
  # -- ...
205
209
  # -- }
206
210
 
211
+ TODO: Extract based on pattern matching is strict on the order of the keys in the metadata. Consider using a more flexible approach like JSON parsing.
212
+
207
213
  Returns:
208
214
  A tuple of (project_id, workspace_name) if both are successfully extracted
209
215
  None if extraction fails for any reason
@@ -284,6 +284,8 @@ SNOWFLAKE_TYPES_MAP: Dict[str, Any] = {
284
284
  "INTEGER": NumberType,
285
285
  "BIGINT": NumberType,
286
286
  "SMALLINT": NumberType,
287
+ "TINYINT": NumberType,
288
+ "BYTEINT": NumberType,
287
289
  "FLOAT": NumberType,
288
290
  "FLOAT4": NumberType,
289
291
  "FLOAT8": NumberType,
@@ -291,6 +293,7 @@ SNOWFLAKE_TYPES_MAP: Dict[str, Any] = {
291
293
  "DOUBLE PRECISION": NumberType,
292
294
  "REAL": NumberType,
293
295
  "VARCHAR": StringType,
296
+ "CHARACTER VARYING": StringType,
294
297
  "CHAR": StringType,
295
298
  "CHARACTER": StringType,
296
299
  "STRING": StringType,
@@ -313,8 +316,8 @@ SNOWFLAKE_TYPES_MAP: Dict[str, Any] = {
313
316
 
314
317
 
315
318
  def resolve_snowflake_modified_type(type_string: str) -> Any:
316
- # Match types with precision and scale, e.g., 'DECIMAL(38,0)'
317
- match = re.match(r"([a-zA-Z_]+)\(\d+,\s\d+\)", type_string)
319
+ # Match types with precision and scale, e.g., 'DECIMAL(38,0)' or TIME(3)
320
+ match = re.match(r"([a-z A-Z_]+)\(\d+(,(\s+)?\d+)?\)", type_string)
318
321
  if match:
319
322
  modified_type_base = match.group(1) # Extract the base type
320
323
  return SNOWFLAKE_TYPES_MAP.get(modified_type_base)
@@ -15431,7 +15431,7 @@ class DataHubIngestionSourceKeyClass(_Aspect):
15431
15431
 
15432
15432
 
15433
15433
  ASPECT_NAME = 'dataHubIngestionSourceKey'
15434
- ASPECT_INFO = {'keyForEntity': 'dataHubIngestionSource', 'entityCategory': 'internal', 'entityAspects': ['dataHubIngestionSourceInfo']}
15434
+ ASPECT_INFO = {'keyForEntity': 'dataHubIngestionSource', 'entityCategory': 'internal', 'entityAspects': ['dataHubIngestionSourceInfo', 'ownership']}
15435
15435
  RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.metadata.key.DataHubIngestionSourceKey")
15436
15436
 
15437
15437
  def __init__(self,
@@ -3768,7 +3768,8 @@
3768
3768
  "keyForEntity": "dataHubIngestionSource",
3769
3769
  "entityCategory": "internal",
3770
3770
  "entityAspects": [
3771
- "dataHubIngestionSourceInfo"
3771
+ "dataHubIngestionSourceInfo",
3772
+ "ownership"
3772
3773
  ]
3773
3774
  },
3774
3775
  "name": "DataHubIngestionSourceKey",
@@ -5,7 +5,8 @@
5
5
  "keyForEntity": "dataHubIngestionSource",
6
6
  "entityCategory": "internal",
7
7
  "entityAspects": [
8
- "dataHubIngestionSourceInfo"
8
+ "dataHubIngestionSourceInfo",
9
+ "ownership"
9
10
  ]
10
11
  },
11
12
  "name": "DataHubIngestionSourceKey",