acryl-datahub 0.14.1.13rc2__py3-none-any.whl → 0.14.1.13rc4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.14.1.13rc2.dist-info → acryl_datahub-0.14.1.13rc4.dist-info}/METADATA +2396 -2396
- {acryl_datahub-0.14.1.13rc2.dist-info → acryl_datahub-0.14.1.13rc4.dist-info}/RECORD +18 -16
- {acryl_datahub-0.14.1.13rc2.dist-info → acryl_datahub-0.14.1.13rc4.dist-info}/entry_points.txt +2 -2
- datahub/__init__.py +1 -1
- datahub/configuration/kafka.py +12 -1
- datahub/configuration/kafka_consumer_config.py +35 -0
- datahub/ingestion/source/confluent_schema_registry.py +4 -2
- datahub/ingestion/source/kafka/__init__.py +0 -0
- datahub/ingestion/source/{kafka.py → kafka/kafka.py} +12 -2
- datahub/ingestion/source/sql/sql_common.py +3 -2
- datahub/sql_parsing/sql_parsing_common.py +6 -0
- datahub/sql_parsing/sqlglot_lineage.py +49 -7
- datahub/sql_parsing/sqlglot_utils.py +1 -1
- datahub/utilities/file_backed_collections.py +6 -0
- {acryl_datahub-0.14.1.13rc2.dist-info → acryl_datahub-0.14.1.13rc4.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.14.1.13rc2.dist-info → acryl_datahub-0.14.1.13rc4.dist-info}/top_level.txt +0 -0
- /datahub/ingestion/source/{kafka_connect.py → kafka/kafka_connect.py} +0 -0
- /datahub/ingestion/source/{kafka_schema_registry_base.py → kafka/kafka_schema_registry_base.py} +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
datahub/__init__.py,sha256=
|
|
1
|
+
datahub/__init__.py,sha256=WMIoTkprsOt2SjEA2gbvdoJ3VkTIeWCVPilQAqNdCQw,577
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
3
|
datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
|
|
4
4
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -96,7 +96,8 @@ datahub/configuration/datetimes.py,sha256=nayNc0mmlVKH6oVv9ud6C1dDUiZPGabW-YZxvr
|
|
|
96
96
|
datahub/configuration/git.py,sha256=s55eUHxKqVZgtVsISaDyS-1F4iZBiybbjYsjbp5LU5o,6135
|
|
97
97
|
datahub/configuration/import_resolver.py,sha256=b4Ie9L7knN1LALEVMxTcNFSklDD6CVE-4Ipy4ZYhNYA,369
|
|
98
98
|
datahub/configuration/json_loader.py,sha256=vIDnjwXWi9yHDO8KW64EupOzOb_sspehGCD7xGHzg84,302
|
|
99
|
-
datahub/configuration/kafka.py,sha256=
|
|
99
|
+
datahub/configuration/kafka.py,sha256=MlIwpd5FFyOyjdDXW_X9JTLNk7f988sPMgevkcZYVgI,2579
|
|
100
|
+
datahub/configuration/kafka_consumer_config.py,sha256=DSwUU4HoqNyK4CNk9eIbX3eYsJMGQvORDiy1ZxkjlRc,1022
|
|
100
101
|
datahub/configuration/pattern_utils.py,sha256=Q5IB9RfWOOo5FvRVBU7XkhiwHCxSQ1NTMfUlWtWI9qc,699
|
|
101
102
|
datahub/configuration/pydantic_migration_helpers.py,sha256=4C_COAVZ5iJ8yxcWNgXZNWsY7ULogICNZ368oNF7zWg,1462
|
|
102
103
|
datahub/configuration/source_common.py,sha256=68LZOuB23zSEcfgQJE1wZQnyYQHVVnEZK3Sniv_nEQs,2107
|
|
@@ -182,7 +183,7 @@ datahub/ingestion/sink/datahub_rest.py,sha256=17MjPW4F6LwLjZyATIWdDdIYvC74VH6g1j
|
|
|
182
183
|
datahub/ingestion/sink/file.py,sha256=SxXJPJpkIGoaqRjCcSmj2ZE3xE4rLlBABBGwpTj5LWI,3271
|
|
183
184
|
datahub/ingestion/sink/sink_registry.py,sha256=JRBWx8qEYg0ubSTyhqwgSWctgxwyp6fva9GoN2LwBao,490
|
|
184
185
|
datahub/ingestion/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
185
|
-
datahub/ingestion/source/confluent_schema_registry.py,sha256=
|
|
186
|
+
datahub/ingestion/source/confluent_schema_registry.py,sha256=_h9D8bUXoaGcwgwB94dX6aTyLY5ve7XGdcVFSJHGSJc,18804
|
|
186
187
|
datahub/ingestion/source/csv_enricher.py,sha256=xjCbcsSMM8l_ASCRAnNsUGKuYMrD1lec19Waixub1EM,29498
|
|
187
188
|
datahub/ingestion/source/demo_data.py,sha256=yzA_R-wfSX2WPz0i5ukYlscpmpb0Pt8D7EkhtKfftvo,1286
|
|
188
189
|
datahub/ingestion/source/elastic_search.py,sha256=qFUVNzynTVJTabASTjGMu8Qhf9UpNbEtSBFjaPQjBJE,22641
|
|
@@ -191,9 +192,6 @@ datahub/ingestion/source/file.py,sha256=pH-Qkjh5FQ2XvyYPE7Z8XEY4vUk_SUHxm8p8IxG1
|
|
|
191
192
|
datahub/ingestion/source/ge_data_profiler.py,sha256=7oUvvADX4t1WiXwOruCURh3sNEY_7I41wbwXFvKaKWM,63587
|
|
192
193
|
datahub/ingestion/source/ge_profiling_config.py,sha256=WusEMGFPu17y99jTT-1tTOiN87Q1sY7nyxXvXnPs1-E,10489
|
|
193
194
|
datahub/ingestion/source/glue_profiling_config.py,sha256=vpMJH4Lf_qgR32BZy58suabri1yV5geaAPjzg2eORDc,2559
|
|
194
|
-
datahub/ingestion/source/kafka.py,sha256=lGTkbwPvi8ysa0HaPaoWVHYblsPLB1js92cHbj2keKY,24876
|
|
195
|
-
datahub/ingestion/source/kafka_connect.py,sha256=5KUlhn3876c41Z3kx5l4oJhbu0ekXZQRdxmu52vb_v8,55167
|
|
196
|
-
datahub/ingestion/source/kafka_schema_registry_base.py,sha256=13XjSwqyVhH1CJUFHAbWdmmv_Rw0Ju_9HQdBmIzPNNA,566
|
|
197
195
|
datahub/ingestion/source/ldap.py,sha256=Vnzg8tpwBYeyM-KBVVsUJvGZGBMJiCJ_i_FhxaFRQ9A,18627
|
|
198
196
|
datahub/ingestion/source/metabase.py,sha256=oemiMdzjfr82Hx6rdwTNBzFM8962LDkosYh7SD_I5cY,31717
|
|
199
197
|
datahub/ingestion/source/mlflow.py,sha256=SxCt4jtxQcpPWEI2rRNagCiE_6TWr2RroqmxRd_td1Y,11565
|
|
@@ -320,6 +318,10 @@ datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=hLT1Le_TEUoFXvsJSlrR
|
|
|
320
318
|
datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
321
319
|
datahub/ingestion/source/identity/azure_ad.py,sha256=GdmJFD4UMsb5353Z7phXRf-YsXR2woGLRJwBXUkgXq0,28809
|
|
322
320
|
datahub/ingestion/source/identity/okta.py,sha256=PnRokWLG8wSoNZlXJiRZiW6APTEHO09q4n2j_l6m3V0,30756
|
|
321
|
+
datahub/ingestion/source/kafka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
322
|
+
datahub/ingestion/source/kafka/kafka.py,sha256=wC5tZ31fvXWLjse9qH3YONPhMxXIZ9QV9vbBcWDAP_w,25352
|
|
323
|
+
datahub/ingestion/source/kafka/kafka_connect.py,sha256=5KUlhn3876c41Z3kx5l4oJhbu0ekXZQRdxmu52vb_v8,55167
|
|
324
|
+
datahub/ingestion/source/kafka/kafka_schema_registry_base.py,sha256=13XjSwqyVhH1CJUFHAbWdmmv_Rw0Ju_9HQdBmIzPNNA,566
|
|
323
325
|
datahub/ingestion/source/looker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
324
326
|
datahub/ingestion/source/looker/lkml_patched.py,sha256=XShEU7Wbz0DubDhYMjKf9wjKZrBJa2XPg9MIjp8rPhk,733
|
|
325
327
|
datahub/ingestion/source/looker/looker_common.py,sha256=cIrsc0nDEsODxx9tjvSm2A3-OlCkPKCu4W2L8PxzLWs,59926
|
|
@@ -447,7 +449,7 @@ datahub/ingestion/source/sql/mysql.py,sha256=nDWK4YbqomcJgnit9b8geUGrp_3eix4bt0_
|
|
|
447
449
|
datahub/ingestion/source/sql/oracle.py,sha256=exJJvMSC42Oj0IiFtpAPFatWkRu9mVu8sd7ofOdanqU,22460
|
|
448
450
|
datahub/ingestion/source/sql/postgres.py,sha256=uC1kYEI8VdxiZ1Y9IxMWzwmg11wtMqYN0e2fkok1rxo,11972
|
|
449
451
|
datahub/ingestion/source/sql/presto.py,sha256=PB-CS5MX2dSRFRHjlxfkLHGXLZXFNCsVAAyRBtY6HMg,3611
|
|
450
|
-
datahub/ingestion/source/sql/sql_common.py,sha256=
|
|
452
|
+
datahub/ingestion/source/sql/sql_common.py,sha256=SZkPK7aUiTdMHf5alG5724i1pyAn1jFUjcuDkKUoeL0,51813
|
|
451
453
|
datahub/ingestion/source/sql/sql_config.py,sha256=M-l_uXau0ODolLZHBzAXhy-Rq5yYxvJ6cLbCIea7Mww,9449
|
|
452
454
|
datahub/ingestion/source/sql/sql_generic.py,sha256=9AERvkK8kdJUeDOzCYJDb93xdv6Z4DGho0NfeHj5Uyg,2740
|
|
453
455
|
datahub/ingestion/source/sql/sql_generic_profiler.py,sha256=vWna43rv9ClfK9MBNTx8tdoUr35nD5d8ppgeamEEhSQ,12528
|
|
@@ -859,10 +861,10 @@ datahub/sql_parsing/_sqlglot_patch.py,sha256=iYJ8zOThHqqbamD5jdNr9iHTWD7ewNeHzPi
|
|
|
859
861
|
datahub/sql_parsing/query_types.py,sha256=AxoWZiLuXM6iZ2AXdSlD82gqPNr1ZFh78wn2MSTu2wY,2479
|
|
860
862
|
datahub/sql_parsing/schema_resolver.py,sha256=9wbJT80K4nsIHHOuQLso9QoFQAYwfSJZnqHwsaU3UTY,10197
|
|
861
863
|
datahub/sql_parsing/sql_parsing_aggregator.py,sha256=YzYmWNQjXuQYcfh91ZnqjuzCDWdmZpE-ZX14Kz6XFMs,69938
|
|
862
|
-
datahub/sql_parsing/sql_parsing_common.py,sha256=
|
|
864
|
+
datahub/sql_parsing/sql_parsing_common.py,sha256=h_V_m54hJ9EUh5kczq7cYOIeNeo4bgf0Px0H-Nq-UIg,2602
|
|
863
865
|
datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
|
|
864
|
-
datahub/sql_parsing/sqlglot_lineage.py,sha256=
|
|
865
|
-
datahub/sql_parsing/sqlglot_utils.py,sha256=
|
|
866
|
+
datahub/sql_parsing/sqlglot_lineage.py,sha256=K532_zvj7vEt2Ci2d3kxYZLZkq4S0ECBRZ2nHuY11a4,45927
|
|
867
|
+
datahub/sql_parsing/sqlglot_utils.py,sha256=8MYzkyekhup3ihVStRPuwneWPNu17xhBg5SG8iVfFRY,14431
|
|
866
868
|
datahub/sql_parsing/tool_meta_extractor.py,sha256=pE-pkRKBfNTXEJkaQM9NlG807mc-X6OtetgskJySCs8,2908
|
|
867
869
|
datahub/telemetry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
868
870
|
datahub/telemetry/stats.py,sha256=YltbtC3fe6rl1kcxn1A-mSnVpECTPm5k-brrUt7QxTI,967
|
|
@@ -888,7 +890,7 @@ datahub/utilities/dedup_list.py,sha256=dUSpe1AajfuwlHVJKNv-CzDXSCkaw0HgSMOsxqUkQ
|
|
|
888
890
|
datahub/utilities/delayed_iter.py,sha256=XlsI0DCXkVVejFKOW_uMT0E8DTqqOHQN3Ooak4EcULE,645
|
|
889
891
|
datahub/utilities/delta.py,sha256=hkpF8W7Lvg2gUJBQR3mmIzOxsRQ6i5cchRPFlAVoV10,1128
|
|
890
892
|
datahub/utilities/docs_build.py,sha256=uFMK3z1d4BExpsrvguHunidbEDAzQ8hoOP7iQ0A_IVw,211
|
|
891
|
-
datahub/utilities/file_backed_collections.py,sha256=
|
|
893
|
+
datahub/utilities/file_backed_collections.py,sha256=tS6hN0kxMaaTb8rB-vDqAd973mTKshERSC55JIe_3Cw,20557
|
|
892
894
|
datahub/utilities/global_warning_util.py,sha256=adrEl3WhetQ-bymrPINjd976ZFndhbvk3QosUYGsos8,261
|
|
893
895
|
datahub/utilities/hive_schema_to_avro.py,sha256=2-7NI9haCAYbyUmHTb-QPxjn4WbmnDDKAIGmHJ11-1E,11622
|
|
894
896
|
datahub/utilities/is_pytest.py,sha256=2m9T4S9IIKhI5RfTqrB2ZmumzHocdxBHpM1HroWj2XQ,138
|
|
@@ -965,8 +967,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
965
967
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
966
968
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
967
969
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
968
|
-
acryl_datahub-0.14.1.
|
|
969
|
-
acryl_datahub-0.14.1.
|
|
970
|
-
acryl_datahub-0.14.1.
|
|
971
|
-
acryl_datahub-0.14.1.
|
|
972
|
-
acryl_datahub-0.14.1.
|
|
970
|
+
acryl_datahub-0.14.1.13rc4.dist-info/METADATA,sha256=pKvm7N_prnqdEylMVyCtB6aReDQm2T-0LmvkhiwUdrU,171138
|
|
971
|
+
acryl_datahub-0.14.1.13rc4.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
972
|
+
acryl_datahub-0.14.1.13rc4.dist-info/entry_points.txt,sha256=VcQx0dnqaYLyeY_L5OaX7bLmmE-Il7TAXkxCKvEn2bA,9432
|
|
973
|
+
acryl_datahub-0.14.1.13rc4.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
974
|
+
acryl_datahub-0.14.1.13rc4.dist-info/RECORD,,
|
{acryl_datahub-0.14.1.13rc2.dist-info → acryl_datahub-0.14.1.13rc4.dist-info}/entry_points.txt
RENAMED
|
@@ -56,8 +56,8 @@ hive = datahub.ingestion.source.sql.hive:HiveSource
|
|
|
56
56
|
hive-metastore = datahub.ingestion.source.sql.hive_metastore:HiveMetastoreSource
|
|
57
57
|
iceberg = datahub.ingestion.source.iceberg.iceberg:IcebergSource
|
|
58
58
|
json-schema = datahub.ingestion.source.schema.json_schema:JsonSchemaSource
|
|
59
|
-
kafka = datahub.ingestion.source.kafka:KafkaSource
|
|
60
|
-
kafka-connect = datahub.ingestion.source.kafka_connect:KafkaConnectSource
|
|
59
|
+
kafka = datahub.ingestion.source.kafka.kafka:KafkaSource
|
|
60
|
+
kafka-connect = datahub.ingestion.source.kafka.kafka_connect:KafkaConnectSource
|
|
61
61
|
ldap = datahub.ingestion.source.ldap:LDAPSource
|
|
62
62
|
looker = datahub.ingestion.source.looker.looker_source:LookerDashboardSource
|
|
63
63
|
lookml = datahub.ingestion.source.looker.lookml_source:LookMLSource
|
datahub/__init__.py
CHANGED
datahub/configuration/kafka.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from pydantic import Field, validator
|
|
2
2
|
|
|
3
|
-
from datahub.configuration.common import ConfigModel
|
|
3
|
+
from datahub.configuration.common import ConfigModel, ConfigurationError
|
|
4
|
+
from datahub.configuration.kafka_consumer_config import CallableConsumerConfig
|
|
4
5
|
from datahub.configuration.validate_host_port import validate_host_port
|
|
5
6
|
|
|
6
7
|
|
|
@@ -36,6 +37,16 @@ class KafkaConsumerConnectionConfig(_KafkaConnectionConfig):
|
|
|
36
37
|
description="Extra consumer config serialized as JSON. These options will be passed into Kafka's DeserializingConsumer. See https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#deserializingconsumer and https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md .",
|
|
37
38
|
)
|
|
38
39
|
|
|
40
|
+
@validator("consumer_config")
|
|
41
|
+
@classmethod
|
|
42
|
+
def resolve_callback(cls, value: dict) -> dict:
|
|
43
|
+
if CallableConsumerConfig.is_callable_config(value):
|
|
44
|
+
try:
|
|
45
|
+
value = CallableConsumerConfig(value).callable_config()
|
|
46
|
+
except Exception as e:
|
|
47
|
+
raise ConfigurationError(e)
|
|
48
|
+
return value
|
|
49
|
+
|
|
39
50
|
|
|
40
51
|
class KafkaProducerConnectionConfig(_KafkaConnectionConfig):
|
|
41
52
|
"""Configuration class for holding connectivity information for Kafka producers"""
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Any, Dict, Optional
|
|
3
|
+
|
|
4
|
+
from datahub.ingestion.api.registry import import_path
|
|
5
|
+
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class CallableConsumerConfig:
|
|
10
|
+
CALLBACK_ATTRIBUTE: str = "oauth_cb"
|
|
11
|
+
|
|
12
|
+
def __init__(self, config: Dict[str, Any]):
|
|
13
|
+
self._config = config
|
|
14
|
+
|
|
15
|
+
self._resolve_oauth_callback()
|
|
16
|
+
|
|
17
|
+
def callable_config(self) -> Dict[str, Any]:
|
|
18
|
+
return self._config
|
|
19
|
+
|
|
20
|
+
@staticmethod
|
|
21
|
+
def is_callable_config(config: Dict[str, Any]) -> bool:
|
|
22
|
+
return CallableConsumerConfig.CALLBACK_ATTRIBUTE in config
|
|
23
|
+
|
|
24
|
+
def get_call_back_attribute(self) -> Optional[str]:
|
|
25
|
+
return self._config.get(CallableConsumerConfig.CALLBACK_ATTRIBUTE)
|
|
26
|
+
|
|
27
|
+
def _resolve_oauth_callback(self) -> None:
|
|
28
|
+
if not self.get_call_back_attribute():
|
|
29
|
+
return
|
|
30
|
+
|
|
31
|
+
call_back = self.get_call_back_attribute()
|
|
32
|
+
|
|
33
|
+
assert call_back # to silent lint
|
|
34
|
+
# Set the callback
|
|
35
|
+
self._config[CallableConsumerConfig.CALLBACK_ATTRIBUTE] = import_path(call_back)
|
|
@@ -16,8 +16,10 @@ from confluent_kafka.schema_registry.schema_registry_client import (
|
|
|
16
16
|
from datahub.ingestion.extractor import protobuf_util, schema_util
|
|
17
17
|
from datahub.ingestion.extractor.json_schema_util import JsonSchemaTranslator
|
|
18
18
|
from datahub.ingestion.extractor.protobuf_util import ProtobufSchema
|
|
19
|
-
from datahub.ingestion.source.kafka import KafkaSourceConfig, KafkaSourceReport
|
|
20
|
-
from datahub.ingestion.source.kafka_schema_registry_base import
|
|
19
|
+
from datahub.ingestion.source.kafka.kafka import KafkaSourceConfig, KafkaSourceReport
|
|
20
|
+
from datahub.ingestion.source.kafka.kafka_schema_registry_base import (
|
|
21
|
+
KafkaSchemaRegistryBase,
|
|
22
|
+
)
|
|
21
23
|
from datahub.metadata.com.linkedin.pegasus2avro.schema import (
|
|
22
24
|
KafkaSchema,
|
|
23
25
|
SchemaField,
|
|
File without changes
|
|
@@ -18,6 +18,7 @@ from confluent_kafka.schema_registry.schema_registry_client import SchemaRegistr
|
|
|
18
18
|
|
|
19
19
|
from datahub.configuration.common import AllowDenyPattern
|
|
20
20
|
from datahub.configuration.kafka import KafkaConsumerConnectionConfig
|
|
21
|
+
from datahub.configuration.kafka_consumer_config import CallableConsumerConfig
|
|
21
22
|
from datahub.configuration.source_common import (
|
|
22
23
|
DatasetSourceConfigMixin,
|
|
23
24
|
LowerCaseDatasetUrnConfigMixin,
|
|
@@ -49,7 +50,9 @@ from datahub.ingestion.api.source import (
|
|
|
49
50
|
)
|
|
50
51
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
51
52
|
from datahub.ingestion.source.common.subtypes import DatasetSubTypes
|
|
52
|
-
from datahub.ingestion.source.kafka_schema_registry_base import
|
|
53
|
+
from datahub.ingestion.source.kafka.kafka_schema_registry_base import (
|
|
54
|
+
KafkaSchemaRegistryBase,
|
|
55
|
+
)
|
|
53
56
|
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
54
57
|
StaleEntityRemovalHandler,
|
|
55
58
|
StaleEntityRemovalSourceReport,
|
|
@@ -143,7 +146,7 @@ class KafkaSourceConfig(
|
|
|
143
146
|
def get_kafka_consumer(
|
|
144
147
|
connection: KafkaConsumerConnectionConfig,
|
|
145
148
|
) -> confluent_kafka.Consumer:
|
|
146
|
-
|
|
149
|
+
consumer = confluent_kafka.Consumer(
|
|
147
150
|
{
|
|
148
151
|
"group.id": "test",
|
|
149
152
|
"bootstrap.servers": connection.bootstrap,
|
|
@@ -151,6 +154,13 @@ def get_kafka_consumer(
|
|
|
151
154
|
}
|
|
152
155
|
)
|
|
153
156
|
|
|
157
|
+
if CallableConsumerConfig.is_callable_config(connection.consumer_config):
|
|
158
|
+
# As per documentation, we need to explicitly call the poll method to make sure OAuth callback gets executed
|
|
159
|
+
# https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#kafka-client-configuration
|
|
160
|
+
consumer.poll(timeout=30)
|
|
161
|
+
|
|
162
|
+
return consumer
|
|
163
|
+
|
|
154
164
|
|
|
155
165
|
@dataclass
|
|
156
166
|
class KafkaSourceReport(StaleEntityRemovalSourceReport):
|
|
@@ -32,6 +32,7 @@ from datahub.emitter.mce_builder import (
|
|
|
32
32
|
make_data_platform_urn,
|
|
33
33
|
make_dataplatform_instance_urn,
|
|
34
34
|
make_dataset_urn_with_platform_instance,
|
|
35
|
+
make_schema_field_urn,
|
|
35
36
|
make_tag_urn,
|
|
36
37
|
)
|
|
37
38
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
@@ -669,7 +670,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
|
669
670
|
)
|
|
670
671
|
|
|
671
672
|
source_fields = [
|
|
672
|
-
|
|
673
|
+
make_schema_field_urn(dataset_urn, f)
|
|
673
674
|
for f in fk_dict["constrained_columns"]
|
|
674
675
|
]
|
|
675
676
|
foreign_dataset = make_dataset_urn_with_platform_instance(
|
|
@@ -679,7 +680,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
|
|
|
679
680
|
env=self.config.env,
|
|
680
681
|
)
|
|
681
682
|
foreign_fields = [
|
|
682
|
-
|
|
683
|
+
make_schema_field_urn(foreign_dataset, f)
|
|
683
684
|
for f in fk_dict["referred_columns"]
|
|
684
685
|
]
|
|
685
686
|
|
|
@@ -21,6 +21,9 @@ DIALECTS_WITH_CASE_INSENSITIVE_COLS = {
|
|
|
21
21
|
# See more below:
|
|
22
22
|
# https://documentation.sas.com/doc/en/pgmsascdc/9.4_3.5/acreldb/n0ejgx4895bofnn14rlguktfx5r3.htm
|
|
23
23
|
"teradata",
|
|
24
|
+
# For SQL server, the default collation rules mean that all identifiers (schema, table, column names)
|
|
25
|
+
# are case preserving but case insensitive.
|
|
26
|
+
"mssql",
|
|
24
27
|
}
|
|
25
28
|
DIALECTS_WITH_DEFAULT_UPPERCASE_COLS = {
|
|
26
29
|
# In some dialects, column identifiers are effectively case insensitive
|
|
@@ -28,6 +31,9 @@ DIALECTS_WITH_DEFAULT_UPPERCASE_COLS = {
|
|
|
28
31
|
# automatically lowercase unquoted identifiers.
|
|
29
32
|
"snowflake",
|
|
30
33
|
}
|
|
34
|
+
assert DIALECTS_WITH_DEFAULT_UPPERCASE_COLS.issubset(
|
|
35
|
+
DIALECTS_WITH_CASE_INSENSITIVE_COLS
|
|
36
|
+
)
|
|
31
37
|
|
|
32
38
|
|
|
33
39
|
class QueryType(enum.Enum):
|
|
@@ -5,7 +5,7 @@ import functools
|
|
|
5
5
|
import logging
|
|
6
6
|
import traceback
|
|
7
7
|
from collections import defaultdict
|
|
8
|
-
from typing import Any, Dict, List, Optional, Set, Tuple, Union
|
|
8
|
+
from typing import Any, Dict, List, Optional, Set, Tuple, TypeVar, Union
|
|
9
9
|
|
|
10
10
|
import pydantic.dataclasses
|
|
11
11
|
import sqlglot
|
|
@@ -873,6 +873,49 @@ def _translate_internal_column_lineage(
|
|
|
873
873
|
)
|
|
874
874
|
|
|
875
875
|
|
|
876
|
+
_StrOrNone = TypeVar("_StrOrNone", str, Optional[str])
|
|
877
|
+
|
|
878
|
+
|
|
879
|
+
def _normalize_db_or_schema(
|
|
880
|
+
db_or_schema: _StrOrNone,
|
|
881
|
+
dialect: sqlglot.Dialect,
|
|
882
|
+
) -> _StrOrNone:
|
|
883
|
+
if db_or_schema is None:
|
|
884
|
+
return None
|
|
885
|
+
|
|
886
|
+
# In snowflake, table identifiers must be uppercased to match sqlglot's behavior.
|
|
887
|
+
if is_dialect_instance(dialect, "snowflake"):
|
|
888
|
+
return db_or_schema.upper()
|
|
889
|
+
|
|
890
|
+
# In mssql, table identifiers must be lowercased.
|
|
891
|
+
elif is_dialect_instance(dialect, "mssql"):
|
|
892
|
+
return db_or_schema.lower()
|
|
893
|
+
|
|
894
|
+
return db_or_schema
|
|
895
|
+
|
|
896
|
+
|
|
897
|
+
def _simplify_select_into(statement: sqlglot.exp.Expression) -> sqlglot.exp.Expression:
|
|
898
|
+
"""
|
|
899
|
+
Check if the expression is a SELECT INTO statement. If so, converts it into a CTAS.
|
|
900
|
+
Other expressions are returned as-is.
|
|
901
|
+
"""
|
|
902
|
+
|
|
903
|
+
if not (isinstance(statement, sqlglot.exp.Select) and statement.args.get("into")):
|
|
904
|
+
return statement
|
|
905
|
+
|
|
906
|
+
# Convert from SELECT <cols> INTO <out> <expr>
|
|
907
|
+
# to CREATE TABLE <out> AS SELECT <cols> <expr>
|
|
908
|
+
into_expr: sqlglot.exp.Into = statement.args["into"].pop()
|
|
909
|
+
into_table = into_expr.this
|
|
910
|
+
|
|
911
|
+
create = sqlglot.exp.Create(
|
|
912
|
+
this=into_table,
|
|
913
|
+
kind="TABLE",
|
|
914
|
+
expression=statement,
|
|
915
|
+
)
|
|
916
|
+
return create
|
|
917
|
+
|
|
918
|
+
|
|
876
919
|
def _sqlglot_lineage_inner(
|
|
877
920
|
sql: sqlglot.exp.ExpOrStr,
|
|
878
921
|
schema_resolver: SchemaResolverInterface,
|
|
@@ -885,12 +928,9 @@ def _sqlglot_lineage_inner(
|
|
|
885
928
|
else:
|
|
886
929
|
dialect = get_dialect(default_dialect)
|
|
887
930
|
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
default_db = default_db.upper()
|
|
892
|
-
if default_schema:
|
|
893
|
-
default_schema = default_schema.upper()
|
|
931
|
+
default_db = _normalize_db_or_schema(default_db, dialect)
|
|
932
|
+
default_schema = _normalize_db_or_schema(default_schema, dialect)
|
|
933
|
+
|
|
894
934
|
if is_dialect_instance(dialect, "redshift") and not default_schema:
|
|
895
935
|
# On Redshift, there's no "USE SCHEMA <schema>" command. The default schema
|
|
896
936
|
# is public, and "current schema" is the one at the front of the search path.
|
|
@@ -918,6 +958,8 @@ def _sqlglot_lineage_inner(
|
|
|
918
958
|
# original_statement.sql(pretty=True, dialect=dialect),
|
|
919
959
|
# )
|
|
920
960
|
|
|
961
|
+
statement = _simplify_select_into(statement)
|
|
962
|
+
|
|
921
963
|
# Make sure the tables are resolved with the default db / schema.
|
|
922
964
|
# This only works for Unionable statements. For other types of statements,
|
|
923
965
|
# we have to do it manually afterwards, but that's slightly lower accuracy
|
|
@@ -61,7 +61,7 @@ def is_dialect_instance(
|
|
|
61
61
|
else:
|
|
62
62
|
platforms = list(platforms)
|
|
63
63
|
|
|
64
|
-
dialects = [
|
|
64
|
+
dialects = [get_dialect(platform) for platform in platforms]
|
|
65
65
|
|
|
66
66
|
if any(isinstance(dialect, dialect_class.__class__) for dialect_class in dialects):
|
|
67
67
|
return True
|
|
@@ -228,6 +228,12 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
|
|
|
228
228
|
else:
|
|
229
229
|
self._conn = ConnectionWrapper()
|
|
230
230
|
|
|
231
|
+
if sqlite3.sqlite_version_info < (3, 24, 0):
|
|
232
|
+
# We use the ON CONFLICT clause to implement UPSERTs with sqlite.
|
|
233
|
+
# This was added in 3.24.0 from 2018-06-04.
|
|
234
|
+
# See https://www.sqlite.org/lang_conflict.html
|
|
235
|
+
raise RuntimeError("SQLite version 3.24.0 or later is required")
|
|
236
|
+
|
|
231
237
|
# We keep a small cache in memory to avoid having to serialize/deserialize
|
|
232
238
|
# data from the database too often. We use an OrderedDict to build
|
|
233
239
|
# a poor-man's LRU cache.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
/datahub/ingestion/source/{kafka_schema_registry_base.py → kafka/kafka_schema_registry_base.py}
RENAMED
|
File without changes
|