acryl-datahub 0.15.0rc4__py3-none-any.whl → 0.15.0rc5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0rc4.dist-info → acryl_datahub-0.15.0rc5.dist-info}/METADATA +2437 -2437
- {acryl_datahub-0.15.0rc4.dist-info → acryl_datahub-0.15.0rc5.dist-info}/RECORD +8 -8
- datahub/__init__.py +1 -1
- datahub/ingestion/source/iceberg/iceberg.py +12 -5
- datahub/ingestion/source/kafka/kafka.py +21 -8
- {acryl_datahub-0.15.0rc4.dist-info → acryl_datahub-0.15.0rc5.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0rc4.dist-info → acryl_datahub-0.15.0rc5.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0rc4.dist-info → acryl_datahub-0.15.0rc5.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
datahub/__init__.py,sha256=
|
|
1
|
+
datahub/__init__.py,sha256=c5YiGS9ajJPufFiwc_4_Bv9DF1Ha6s0H9dd-rtKRF3Y,574
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
3
|
datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
|
|
4
4
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -312,14 +312,14 @@ datahub/ingestion/source/git/git_import.py,sha256=5CT6vMDb0MDctCtShnxb3JVihULtvk
|
|
|
312
312
|
datahub/ingestion/source/grafana/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
313
313
|
datahub/ingestion/source/grafana/grafana_source.py,sha256=3pU3xodPgS5lmnjuQ_u7F0XPzD_Y8MnPlMxRJ86qz4g,4960
|
|
314
314
|
datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
315
|
-
datahub/ingestion/source/iceberg/iceberg.py,sha256=
|
|
315
|
+
datahub/ingestion/source/iceberg/iceberg.py,sha256=fjqp3VBW5W5-54X_-ubkRZiAmdHvuMbxRbC4UYzEr4U,25900
|
|
316
316
|
datahub/ingestion/source/iceberg/iceberg_common.py,sha256=TS3_ZYZ47Fe02CmzEo1z0pvy7yjXuG1VlwqNxa0U6pc,8506
|
|
317
317
|
datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=hLT1Le_TEUoFXvsJSlrRB1qbTiTe-YVGCof5TFHMyd8,9908
|
|
318
318
|
datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
319
319
|
datahub/ingestion/source/identity/azure_ad.py,sha256=GdmJFD4UMsb5353Z7phXRf-YsXR2woGLRJwBXUkgXq0,28809
|
|
320
320
|
datahub/ingestion/source/identity/okta.py,sha256=PnRokWLG8wSoNZlXJiRZiW6APTEHO09q4n2j_l6m3V0,30756
|
|
321
321
|
datahub/ingestion/source/kafka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
322
|
-
datahub/ingestion/source/kafka/kafka.py,sha256=
|
|
322
|
+
datahub/ingestion/source/kafka/kafka.py,sha256=QUw8VCmqIhZJvUiFJmFmekFmy4nXCLD4EKJNC6jk6Y4,26092
|
|
323
323
|
datahub/ingestion/source/kafka/kafka_connect.py,sha256=5KUlhn3876c41Z3kx5l4oJhbu0ekXZQRdxmu52vb_v8,55167
|
|
324
324
|
datahub/ingestion/source/kafka/kafka_schema_registry_base.py,sha256=13XjSwqyVhH1CJUFHAbWdmmv_Rw0Ju_9HQdBmIzPNNA,566
|
|
325
325
|
datahub/ingestion/source/looker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -972,8 +972,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
972
972
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
973
973
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
974
974
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
975
|
-
acryl_datahub-0.15.
|
|
976
|
-
acryl_datahub-0.15.
|
|
977
|
-
acryl_datahub-0.15.
|
|
978
|
-
acryl_datahub-0.15.
|
|
979
|
-
acryl_datahub-0.15.
|
|
975
|
+
acryl_datahub-0.15.0rc5.dist-info/METADATA,sha256=SiiSRUUBz-MJZHnnGpn6342w8hkW9COEktKjlJDQQuw,171117
|
|
976
|
+
acryl_datahub-0.15.0rc5.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
977
|
+
acryl_datahub-0.15.0rc5.dist-info/entry_points.txt,sha256=3jOfMXB66r8zRDaqzRYpNc0tK-oUO-3tXlnGYDdVAmg,9440
|
|
978
|
+
acryl_datahub-0.15.0rc5.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
979
|
+
acryl_datahub-0.15.0rc5.dist-info/RECORD,,
|
datahub/__init__.py
CHANGED
|
@@ -9,6 +9,7 @@ from pyiceberg.exceptions import (
|
|
|
9
9
|
NoSuchIcebergTableError,
|
|
10
10
|
NoSuchNamespaceError,
|
|
11
11
|
NoSuchPropertyException,
|
|
12
|
+
NoSuchTableError,
|
|
12
13
|
)
|
|
13
14
|
from pyiceberg.schema import Schema, SchemaVisitorPerPrimitiveType, visit
|
|
14
15
|
from pyiceberg.table import Table
|
|
@@ -104,7 +105,7 @@ logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(
|
|
|
104
105
|
@capability(SourceCapability.DESCRIPTIONS, "Enabled by default.")
|
|
105
106
|
@capability(
|
|
106
107
|
SourceCapability.OWNERSHIP,
|
|
107
|
-
"
|
|
108
|
+
"Automatically ingests ownership information from table properties based on `user_ownership_property` and `group_ownership_property`",
|
|
108
109
|
)
|
|
109
110
|
@capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
|
|
110
111
|
class IcebergSource(StatefulIngestionSourceBase):
|
|
@@ -192,9 +193,7 @@ class IcebergSource(StatefulIngestionSourceBase):
|
|
|
192
193
|
table = thread_local.local_catalog.load_table(dataset_path)
|
|
193
194
|
time_taken = timer.elapsed_seconds()
|
|
194
195
|
self.report.report_table_load_time(time_taken)
|
|
195
|
-
LOGGER.debug(
|
|
196
|
-
f"Loaded table: {table.identifier}, time taken: {time_taken}"
|
|
197
|
-
)
|
|
196
|
+
LOGGER.debug(f"Loaded table: {table.name()}, time taken: {time_taken}")
|
|
198
197
|
yield from self._create_iceberg_workunit(dataset_name, table)
|
|
199
198
|
except NoSuchPropertyException as e:
|
|
200
199
|
self.report.report_warning(
|
|
@@ -206,12 +205,20 @@ class IcebergSource(StatefulIngestionSourceBase):
|
|
|
206
205
|
)
|
|
207
206
|
except NoSuchIcebergTableError as e:
|
|
208
207
|
self.report.report_warning(
|
|
209
|
-
"
|
|
208
|
+
"not-an-iceberg-table",
|
|
210
209
|
f"Failed to create workunit for {dataset_name}. {e}",
|
|
211
210
|
)
|
|
212
211
|
LOGGER.warning(
|
|
213
212
|
f"NoSuchIcebergTableError while processing table {dataset_path}, skipping it.",
|
|
214
213
|
)
|
|
214
|
+
except NoSuchTableError as e:
|
|
215
|
+
self.report.report_warning(
|
|
216
|
+
"no-such-table",
|
|
217
|
+
f"Failed to create workunit for {dataset_name}. {e}",
|
|
218
|
+
)
|
|
219
|
+
LOGGER.warning(
|
|
220
|
+
f"NoSuchTableError while processing table {dataset_path}, skipping it.",
|
|
221
|
+
)
|
|
215
222
|
except Exception as e:
|
|
216
223
|
self.report.report_failure("general", f"Failed to create workunit: {e}")
|
|
217
224
|
LOGGER.exception(
|
|
@@ -148,7 +148,7 @@ def get_kafka_consumer(
|
|
|
148
148
|
) -> confluent_kafka.Consumer:
|
|
149
149
|
consumer = confluent_kafka.Consumer(
|
|
150
150
|
{
|
|
151
|
-
"group.id": "
|
|
151
|
+
"group.id": "datahub-kafka-ingestion",
|
|
152
152
|
"bootstrap.servers": connection.bootstrap,
|
|
153
153
|
**connection.consumer_config,
|
|
154
154
|
}
|
|
@@ -164,6 +164,25 @@ def get_kafka_consumer(
|
|
|
164
164
|
return consumer
|
|
165
165
|
|
|
166
166
|
|
|
167
|
+
def get_kafka_admin_client(
|
|
168
|
+
connection: KafkaConsumerConnectionConfig,
|
|
169
|
+
) -> AdminClient:
|
|
170
|
+
client = AdminClient(
|
|
171
|
+
{
|
|
172
|
+
"group.id": "datahub-kafka-ingestion",
|
|
173
|
+
"bootstrap.servers": connection.bootstrap,
|
|
174
|
+
**connection.consumer_config,
|
|
175
|
+
}
|
|
176
|
+
)
|
|
177
|
+
if CallableConsumerConfig.is_callable_config(connection.consumer_config):
|
|
178
|
+
# As per documentation, we need to explicitly call the poll method to make sure OAuth callback gets executed
|
|
179
|
+
# https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#kafka-client-configuration
|
|
180
|
+
logger.debug("Initiating polling for kafka admin client")
|
|
181
|
+
client.poll(timeout=30)
|
|
182
|
+
logger.debug("Initiated polling for kafka admin client")
|
|
183
|
+
return client
|
|
184
|
+
|
|
185
|
+
|
|
167
186
|
@dataclass
|
|
168
187
|
class KafkaSourceReport(StaleEntityRemovalSourceReport):
|
|
169
188
|
topics_scanned: int = 0
|
|
@@ -278,13 +297,7 @@ class KafkaSource(StatefulIngestionSourceBase, TestableSource):
|
|
|
278
297
|
def init_kafka_admin_client(self) -> None:
|
|
279
298
|
try:
|
|
280
299
|
# TODO: Do we require separate config than existing consumer_config ?
|
|
281
|
-
self.admin_client =
|
|
282
|
-
{
|
|
283
|
-
"group.id": "test",
|
|
284
|
-
"bootstrap.servers": self.source_config.connection.bootstrap,
|
|
285
|
-
**self.source_config.connection.consumer_config,
|
|
286
|
-
}
|
|
287
|
-
)
|
|
300
|
+
self.admin_client = get_kafka_admin_client(self.source_config.connection)
|
|
288
301
|
except Exception as e:
|
|
289
302
|
logger.debug(e, exc_info=e)
|
|
290
303
|
self.report.report_warning(
|
|
File without changes
|
|
File without changes
|
|
File without changes
|