acryl-datahub 1.0.0.2rc2__py3-none-any.whl → 1.0.0.2rc4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (49) hide show
  1. {acryl_datahub-1.0.0.2rc2.dist-info → acryl_datahub-1.0.0.2rc4.dist-info}/METADATA +2499 -2499
  2. {acryl_datahub-1.0.0.2rc2.dist-info → acryl_datahub-1.0.0.2rc4.dist-info}/RECORD +48 -49
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/datajob/dataflow.py +15 -0
  5. datahub/api/entities/datajob/datajob.py +17 -0
  6. datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
  7. datahub/api/entities/dataset/dataset.py +2 -2
  8. datahub/api/entities/structuredproperties/structuredproperties.py +1 -1
  9. datahub/cli/migrate.py +6 -6
  10. datahub/configuration/common.py +1 -1
  11. datahub/ingestion/api/common.py +9 -0
  12. datahub/ingestion/api/source.py +4 -1
  13. datahub/ingestion/api/source_helpers.py +26 -1
  14. datahub/ingestion/run/pipeline.py +0 -6
  15. datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
  16. datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
  17. datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
  18. datahub/ingestion/source/fivetran/fivetran.py +1 -0
  19. datahub/ingestion/source/fivetran/fivetran_log_api.py +1 -1
  20. datahub/ingestion/source/iceberg/iceberg.py +97 -9
  21. datahub/ingestion/source/kafka/kafka.py +1 -4
  22. datahub/ingestion/source/kafka_connect/sink_connectors.py +1 -1
  23. datahub/ingestion/source/kafka_connect/source_connectors.py +1 -1
  24. datahub/ingestion/source/looker/looker_source.py +2 -3
  25. datahub/ingestion/source/mlflow.py +3 -0
  26. datahub/ingestion/source/mode.py +2 -2
  27. datahub/ingestion/source/nifi.py +3 -3
  28. datahub/ingestion/source/openapi.py +3 -3
  29. datahub/ingestion/source/openapi_parser.py +8 -8
  30. datahub/ingestion/source/powerbi/config.py +1 -1
  31. datahub/ingestion/source/powerbi/powerbi.py +2 -2
  32. datahub/ingestion/source/redshift/profile.py +2 -2
  33. datahub/ingestion/source/sigma/sigma.py +6 -2
  34. datahub/ingestion/source/snowflake/snowflake_utils.py +1 -1
  35. datahub/ingestion/source/tableau/tableau.py +4 -4
  36. datahub/ingestion/source/tableau/tableau_common.py +2 -2
  37. datahub/ingestion/source/unity/source.py +1 -1
  38. datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
  39. datahub/ingestion/transformer/add_dataset_ownership.py +1 -1
  40. datahub/ingestion/transformer/dataset_domain.py +1 -1
  41. datahub/lite/lite_util.py +2 -2
  42. datahub/testing/mcp_diff.py +1 -1
  43. datahub/utilities/file_backed_collections.py +6 -6
  44. datahub/utilities/hive_schema_to_avro.py +2 -2
  45. datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
  46. {acryl_datahub-1.0.0.2rc2.dist-info → acryl_datahub-1.0.0.2rc4.dist-info}/WHEEL +0 -0
  47. {acryl_datahub-1.0.0.2rc2.dist-info → acryl_datahub-1.0.0.2rc4.dist-info}/entry_points.txt +0 -0
  48. {acryl_datahub-1.0.0.2rc2.dist-info → acryl_datahub-1.0.0.2rc4.dist-info}/licenses/LICENSE +0 -0
  49. {acryl_datahub-1.0.0.2rc2.dist-info → acryl_datahub-1.0.0.2rc4.dist-info}/top_level.txt +0 -0
@@ -250,7 +250,7 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
250
250
  rowid INTEGER PRIMARY KEY AUTOINCREMENT,
251
251
  key TEXT UNIQUE,
252
252
  value BLOB
253
- {"".join(f", {column_name} BLOB" for column_name in self.extra_columns.keys())}
253
+ {"".join(f", {column_name} BLOB" for column_name in self.extra_columns)}
254
254
  )"""
255
255
  )
256
256
 
@@ -267,7 +267,7 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
267
267
  if self.indexes_created:
268
268
  return
269
269
  # The key column will automatically be indexed, but we need indexes for the extra columns.
270
- for column_name in self.extra_columns.keys():
270
+ for column_name in self.extra_columns:
271
271
  self._conn.execute(
272
272
  f"CREATE INDEX {self.tablename}_{column_name} ON {self.tablename} ({column_name})"
273
273
  )
@@ -305,12 +305,12 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
305
305
  f"""INSERT INTO {self.tablename} (
306
306
  key,
307
307
  value
308
- {"".join(f", {column_name}" for column_name in self.extra_columns.keys())}
308
+ {"".join(f", {column_name}" for column_name in self.extra_columns)}
309
309
  )
310
310
  VALUES ({", ".join(["?"] * (2 + len(self.extra_columns)))})
311
311
  ON CONFLICT (key) DO UPDATE SET
312
312
  value = excluded.value
313
- {"".join(f", {column_name} = excluded.{column_name}" for column_name in self.extra_columns.keys())}
313
+ {"".join(f", {column_name} = excluded.{column_name}" for column_name in self.extra_columns)}
314
314
  """,
315
315
  items_to_write,
316
316
  )
@@ -321,7 +321,7 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
321
321
  f"""INSERT INTO {self.tablename} (
322
322
  key,
323
323
  value
324
- {"".join(f", {column_name}" for column_name in self.extra_columns.keys())}
324
+ {"".join(f", {column_name}" for column_name in self.extra_columns)}
325
325
  )
326
326
  VALUES ({", ".join(["?"] * (2 + len(self.extra_columns)))})""",
327
327
  item,
@@ -330,7 +330,7 @@ class FileBackedDict(MutableMapping[str, _VT], Closeable, Generic[_VT]):
330
330
  self._conn.execute(
331
331
  f"""UPDATE {self.tablename} SET
332
332
  value = ?
333
- {"".join(f", {column_name} = ?" for column_name in self.extra_columns.keys())}
333
+ {"".join(f", {column_name} = ?" for column_name in self.extra_columns)}
334
334
  WHERE key = ?""",
335
335
  (*item[1:], item[0]),
336
336
  )
@@ -155,7 +155,7 @@ class HiveColumnToAvroConverter:
155
155
 
156
156
  @staticmethod
157
157
  def _parse_basic_datatype_string(s: str) -> Dict[str, object]:
158
- if s in HiveColumnToAvroConverter._PRIVIMITE_HIVE_TYPE_TO_AVRO_TYPE.keys():
158
+ if s in HiveColumnToAvroConverter._PRIVIMITE_HIVE_TYPE_TO_AVRO_TYPE:
159
159
  return {
160
160
  "type": HiveColumnToAvroConverter._PRIVIMITE_HIVE_TYPE_TO_AVRO_TYPE[s],
161
161
  "native_data_type": s,
@@ -218,7 +218,7 @@ class HiveColumnToAvroConverter:
218
218
  buf = ""
219
219
  level = 0
220
220
  for c in s:
221
- if c in HiveColumnToAvroConverter._BRACKETS.keys():
221
+ if c in HiveColumnToAvroConverter._BRACKETS:
222
222
  level += 1
223
223
  buf += c
224
224
  elif c in HiveColumnToAvroConverter._BRACKETS.values():
@@ -1,45 +0,0 @@
1
- import functools
2
- from typing import Iterable
3
-
4
- from datahub.emitter.mce_builder import get_sys_time
5
- from datahub.ingestion.api.common import PipelineContext, RecordEnvelope
6
- from datahub.ingestion.api.transform import Transformer
7
- from datahub.ingestion.api.workunit import MetadataWorkUnit
8
- from datahub.ingestion.transformer.auto_helper_transformer import AutoHelperTransformer
9
- from datahub.metadata.schema_classes import SystemMetadataClass
10
-
11
-
12
- def auto_system_metadata(
13
- ctx: PipelineContext,
14
- stream: Iterable[MetadataWorkUnit],
15
- ) -> Iterable[MetadataWorkUnit]:
16
- if not ctx.pipeline_config:
17
- raise ValueError("Pipeline config is required for system metadata")
18
- set_system_metadata = ctx.pipeline_config.flags.set_system_metadata
19
- set_pipeline_name = ctx.pipeline_config.flags.set_system_metadata_pipeline_name
20
-
21
- for workunit in stream:
22
- if set_system_metadata:
23
- workunit.metadata.systemMetadata = SystemMetadataClass(
24
- lastObserved=get_sys_time(), runId=ctx.run_id
25
- )
26
- if set_pipeline_name:
27
- workunit.metadata.systemMetadata.pipelineName = ctx.pipeline_name
28
-
29
- yield workunit
30
-
31
-
32
- class SystemMetadataTransformer(Transformer):
33
- def __init__(self, ctx: PipelineContext):
34
- self._inner_transformer = AutoHelperTransformer(
35
- functools.partial(auto_system_metadata, ctx)
36
- )
37
-
38
- def transform(
39
- self, record_envelopes: Iterable[RecordEnvelope]
40
- ) -> Iterable[RecordEnvelope]:
41
- yield from self._inner_transformer.transform(record_envelopes)
42
-
43
- @classmethod
44
- def create(cls, config_dict: dict, ctx: PipelineContext) -> Transformer:
45
- raise NotImplementedError(f"{cls.__name__} cannot be created from config")