acryl-datahub 0.15.0rc4__py3-none-any.whl → 0.15.0rc5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,4 +1,4 @@
1
- datahub/__init__.py,sha256=s_nIg7mDSc39CXTxls5vnfHHXg9rzRp55gtGWfhtJWM,574
1
+ datahub/__init__.py,sha256=c5YiGS9ajJPufFiwc_4_Bv9DF1Ha6s0H9dd-rtKRF3Y,574
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
3
  datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
4
4
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -312,14 +312,14 @@ datahub/ingestion/source/git/git_import.py,sha256=5CT6vMDb0MDctCtShnxb3JVihULtvk
312
312
  datahub/ingestion/source/grafana/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
313
313
  datahub/ingestion/source/grafana/grafana_source.py,sha256=3pU3xodPgS5lmnjuQ_u7F0XPzD_Y8MnPlMxRJ86qz4g,4960
314
314
  datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
315
- datahub/ingestion/source/iceberg/iceberg.py,sha256=lfCettf4F_BjWt7Wh65AhXHerXPP5KLFEo_LCbEZZnI,25537
315
+ datahub/ingestion/source/iceberg/iceberg.py,sha256=fjqp3VBW5W5-54X_-ubkRZiAmdHvuMbxRbC4UYzEr4U,25900
316
316
  datahub/ingestion/source/iceberg/iceberg_common.py,sha256=TS3_ZYZ47Fe02CmzEo1z0pvy7yjXuG1VlwqNxa0U6pc,8506
317
317
  datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=hLT1Le_TEUoFXvsJSlrRB1qbTiTe-YVGCof5TFHMyd8,9908
318
318
  datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
319
319
  datahub/ingestion/source/identity/azure_ad.py,sha256=GdmJFD4UMsb5353Z7phXRf-YsXR2woGLRJwBXUkgXq0,28809
320
320
  datahub/ingestion/source/identity/okta.py,sha256=PnRokWLG8wSoNZlXJiRZiW6APTEHO09q4n2j_l6m3V0,30756
321
321
  datahub/ingestion/source/kafka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
322
- datahub/ingestion/source/kafka/kafka.py,sha256=D54QXYoEQJYVb7oWyvLVCGbJunS8ZTGYlMMdaBqpmMs,25475
322
+ datahub/ingestion/source/kafka/kafka.py,sha256=QUw8VCmqIhZJvUiFJmFmekFmy4nXCLD4EKJNC6jk6Y4,26092
323
323
  datahub/ingestion/source/kafka/kafka_connect.py,sha256=5KUlhn3876c41Z3kx5l4oJhbu0ekXZQRdxmu52vb_v8,55167
324
324
  datahub/ingestion/source/kafka/kafka_schema_registry_base.py,sha256=13XjSwqyVhH1CJUFHAbWdmmv_Rw0Ju_9HQdBmIzPNNA,566
325
325
  datahub/ingestion/source/looker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -972,8 +972,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
972
972
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
973
973
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
974
974
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
975
- acryl_datahub-0.15.0rc4.dist-info/METADATA,sha256=c65oplVIArqkkCm4xgv3OOJqeiK_9XKwiN5O-UPJ6Ss,171129
976
- acryl_datahub-0.15.0rc4.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
977
- acryl_datahub-0.15.0rc4.dist-info/entry_points.txt,sha256=3jOfMXB66r8zRDaqzRYpNc0tK-oUO-3tXlnGYDdVAmg,9440
978
- acryl_datahub-0.15.0rc4.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
979
- acryl_datahub-0.15.0rc4.dist-info/RECORD,,
975
+ acryl_datahub-0.15.0rc5.dist-info/METADATA,sha256=SiiSRUUBz-MJZHnnGpn6342w8hkW9COEktKjlJDQQuw,171117
976
+ acryl_datahub-0.15.0rc5.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
977
+ acryl_datahub-0.15.0rc5.dist-info/entry_points.txt,sha256=3jOfMXB66r8zRDaqzRYpNc0tK-oUO-3tXlnGYDdVAmg,9440
978
+ acryl_datahub-0.15.0rc5.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
979
+ acryl_datahub-0.15.0rc5.dist-info/RECORD,,
datahub/__init__.py CHANGED
@@ -3,7 +3,7 @@ import warnings
3
3
 
4
4
  # Published at https://pypi.org/project/acryl-datahub/.
5
5
  __package_name__ = "acryl-datahub"
6
- __version__ = "0.15.0rc4"
6
+ __version__ = "0.15.0rc5"
7
7
 
8
8
 
9
9
  def is_dev_mode() -> bool:
@@ -9,6 +9,7 @@ from pyiceberg.exceptions import (
9
9
  NoSuchIcebergTableError,
10
10
  NoSuchNamespaceError,
11
11
  NoSuchPropertyException,
12
+ NoSuchTableError,
12
13
  )
13
14
  from pyiceberg.schema import Schema, SchemaVisitorPerPrimitiveType, visit
14
15
  from pyiceberg.table import Table
@@ -104,7 +105,7 @@ logging.getLogger("azure.core.pipeline.policies.http_logging_policy").setLevel(
104
105
  @capability(SourceCapability.DESCRIPTIONS, "Enabled by default.")
105
106
  @capability(
106
107
  SourceCapability.OWNERSHIP,
107
- "Optionally enabled via configuration by specifying which Iceberg table property holds user or group ownership.",
108
+ "Automatically ingests ownership information from table properties based on `user_ownership_property` and `group_ownership_property`",
108
109
  )
109
110
  @capability(SourceCapability.DELETION_DETECTION, "Enabled via stateful ingestion")
110
111
  class IcebergSource(StatefulIngestionSourceBase):
@@ -192,9 +193,7 @@ class IcebergSource(StatefulIngestionSourceBase):
192
193
  table = thread_local.local_catalog.load_table(dataset_path)
193
194
  time_taken = timer.elapsed_seconds()
194
195
  self.report.report_table_load_time(time_taken)
195
- LOGGER.debug(
196
- f"Loaded table: {table.identifier}, time taken: {time_taken}"
197
- )
196
+ LOGGER.debug(f"Loaded table: {table.name()}, time taken: {time_taken}")
198
197
  yield from self._create_iceberg_workunit(dataset_name, table)
199
198
  except NoSuchPropertyException as e:
200
199
  self.report.report_warning(
@@ -206,12 +205,20 @@ class IcebergSource(StatefulIngestionSourceBase):
206
205
  )
207
206
  except NoSuchIcebergTableError as e:
208
207
  self.report.report_warning(
209
- "no-iceberg-table",
208
+ "not-an-iceberg-table",
210
209
  f"Failed to create workunit for {dataset_name}. {e}",
211
210
  )
212
211
  LOGGER.warning(
213
212
  f"NoSuchIcebergTableError while processing table {dataset_path}, skipping it.",
214
213
  )
214
+ except NoSuchTableError as e:
215
+ self.report.report_warning(
216
+ "no-such-table",
217
+ f"Failed to create workunit for {dataset_name}. {e}",
218
+ )
219
+ LOGGER.warning(
220
+ f"NoSuchTableError while processing table {dataset_path}, skipping it.",
221
+ )
215
222
  except Exception as e:
216
223
  self.report.report_failure("general", f"Failed to create workunit: {e}")
217
224
  LOGGER.exception(
@@ -148,7 +148,7 @@ def get_kafka_consumer(
148
148
  ) -> confluent_kafka.Consumer:
149
149
  consumer = confluent_kafka.Consumer(
150
150
  {
151
- "group.id": "test",
151
+ "group.id": "datahub-kafka-ingestion",
152
152
  "bootstrap.servers": connection.bootstrap,
153
153
  **connection.consumer_config,
154
154
  }
@@ -164,6 +164,25 @@ def get_kafka_consumer(
164
164
  return consumer
165
165
 
166
166
 
167
+ def get_kafka_admin_client(
168
+ connection: KafkaConsumerConnectionConfig,
169
+ ) -> AdminClient:
170
+ client = AdminClient(
171
+ {
172
+ "group.id": "datahub-kafka-ingestion",
173
+ "bootstrap.servers": connection.bootstrap,
174
+ **connection.consumer_config,
175
+ }
176
+ )
177
+ if CallableConsumerConfig.is_callable_config(connection.consumer_config):
178
+ # As per documentation, we need to explicitly call the poll method to make sure OAuth callback gets executed
179
+ # https://docs.confluent.io/platform/current/clients/confluent-kafka-python/html/index.html#kafka-client-configuration
180
+ logger.debug("Initiating polling for kafka admin client")
181
+ client.poll(timeout=30)
182
+ logger.debug("Initiated polling for kafka admin client")
183
+ return client
184
+
185
+
167
186
  @dataclass
168
187
  class KafkaSourceReport(StaleEntityRemovalSourceReport):
169
188
  topics_scanned: int = 0
@@ -278,13 +297,7 @@ class KafkaSource(StatefulIngestionSourceBase, TestableSource):
278
297
  def init_kafka_admin_client(self) -> None:
279
298
  try:
280
299
  # TODO: Do we require separate config than existing consumer_config ?
281
- self.admin_client = AdminClient(
282
- {
283
- "group.id": "test",
284
- "bootstrap.servers": self.source_config.connection.bootstrap,
285
- **self.source_config.connection.consumer_config,
286
- }
287
- )
300
+ self.admin_client = get_kafka_admin_client(self.source_config.connection)
288
301
  except Exception as e:
289
302
  logger.debug(e, exc_info=e)
290
303
  self.report.report_warning(