acryl-datahub 1.2.0.9rc1__py3-none-any.whl → 1.2.0.10rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,7 +1,7 @@
1
- acryl_datahub-1.2.0.9rc1.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.2.0.10rc1.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=_H3eOOLcAfSj64l30V-SpArSKBd8L7socBTn4sC-zB0,323
4
+ datahub/_version.py,sha256=gbUD5kbOI-y4DH5wBU0PRgKeHjSTCNQ6fgZcMvqwDpg,324
5
5
  datahub/entrypoints.py,sha256=9Qf-37rNnTzbGlx8S75OCDazIclFp6zWNcCEL1zCZto,9015
6
6
  datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -267,8 +267,8 @@ datahub/ingestion/source/bigquery_v2/bigquery_helper.py,sha256=QER3gY8e_k1_eNVj7
267
267
  datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py,sha256=9_sfX8BE2vt9RjBMyq27UxCxBaSlD5o3L4gQxrwlPvA,4961
268
268
  datahub/ingestion/source/bigquery_v2/bigquery_queries.py,sha256=2syDMaRpYEbtGUVejVAK5d6g8HqM54ZyEM908uLJ55o,3393
269
269
  datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=v7_zkZzymKPmZKWAxnxmvmHC-8TQVGHUT-pBQFNehqc,7962
270
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=DHw5Z_rxj_fR09p7SO0UmDdvYEa_ViIRnLE9CFxPAAk,32525
271
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=29E_25jLpMIgWcYRC0ZcYd1fvaFtSi2T8S6hSwiTDTY,51090
270
+ datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=zbYb1EYnCJxgvsU8oT_76l0q_BW1exVjMWM1GAgd1nc,32600
271
+ datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=c9a-SlZDOYNiS__vC5ezVVNM0UHasXWxWNRZkkP_aOo,51552
272
272
  datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py,sha256=cATxwi5IPzj3BldRRAVcLqzSFmmYEPvqa7U0RFJbaAc,7645
273
273
  datahub/ingestion/source/bigquery_v2/common.py,sha256=IinOy-RO4UZGxSf5scaN02672BzZuNsjJZ56axti6iI,4016
274
274
  datahub/ingestion/source/bigquery_v2/lineage.py,sha256=jju14mJbAUMA_K3j2yq-TdZV202cjd5rBAsDPJGEVno,44900
@@ -967,7 +967,7 @@ datahub/sdk/datajob.py,sha256=5kU0txTDcn2ce3AhNry83TazPVhoYZ2rAPPNWM1_FP8,13677
967
967
  datahub/sdk/dataset.py,sha256=-C4TCJAs1PFkLAgkUZEU1JOg3orm7AAIkqjw7oo_4PQ,31400
968
968
  datahub/sdk/entity.py,sha256=Q29AbpS58L4gD8ETwoNIwG-ouytz4c0MSSFi6-jLl_4,6742
969
969
  datahub/sdk/entity_client.py,sha256=NGVA2CwLqK16EgOPrPiIFodjPD6sM7eQ5E3w5Yl89cM,9428
970
- datahub/sdk/lineage_client.py,sha256=DRwUCyi-dDCrH5r_ktqvrryCJNPZ5Tr91y6G7dGaKBk,33754
970
+ datahub/sdk/lineage_client.py,sha256=qSe2TEt4HKRVytAsDokkfzqErZiL46c0TMe6g2C5hAg,33766
971
971
  datahub/sdk/main_client.py,sha256=LAymeMOkrjjJjQQ8Nc7G3hvF3P8Y0k0AXrDEGDGt4iU,5706
972
972
  datahub/sdk/mlmodel.py,sha256=cO5R8BYVljmQ0w33RIOuZmj4nq8OJCDVAZGTQI6YFS8,12628
973
973
  datahub/sdk/mlmodelgroup.py,sha256=wlZZHny0UORpF0fRYuVkWLSQwIHX_fWl5lPb1NKR6dM,8194
@@ -1114,8 +1114,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1114
1114
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1115
1115
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1116
1116
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1117
- acryl_datahub-1.2.0.9rc1.dist-info/METADATA,sha256=b0i8DTQvhz0wjm4LlnQ4cAEB3Fzj4VWzmznolB87FoU,186907
1118
- acryl_datahub-1.2.0.9rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1119
- acryl_datahub-1.2.0.9rc1.dist-info/entry_points.txt,sha256=qopCAD6qrsijaZ9mTw3UlPCKsE00C3t9MbkkWow7pi4,9943
1120
- acryl_datahub-1.2.0.9rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1121
- acryl_datahub-1.2.0.9rc1.dist-info/RECORD,,
1117
+ acryl_datahub-1.2.0.10rc1.dist-info/METADATA,sha256=RZTwZLgqey2EHL-O4LEZfy5CEJc5WYKTwMzdOl6aoFU,186878
1118
+ acryl_datahub-1.2.0.10rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1119
+ acryl_datahub-1.2.0.10rc1.dist-info/entry_points.txt,sha256=qopCAD6qrsijaZ9mTw3UlPCKsE00C3t9MbkkWow7pi4,9943
1120
+ acryl_datahub-1.2.0.10rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1121
+ acryl_datahub-1.2.0.10rc1.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.2.0.9rc1"
3
+ __version__ = "1.2.0.10rc1"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -283,23 +283,30 @@ class BigQuerySchemaApi:
283
283
  with self.report.list_datasets_timer:
284
284
  self.report.num_list_datasets_api_requests += 1
285
285
  datasets = self.bq_client.list_datasets(project_id, max_results=maxResults)
286
- return [
287
- BigqueryDataset(
288
- name=d.dataset_id,
289
- labels=d.labels,
290
- location=(
291
- d._properties.get("location")
292
- if hasattr(d, "_properties") and isinstance(d._properties, dict)
293
- else None
294
- ),
295
- # TODO: Fetch dataset description individually impacts overall performance if the number of datasets is high (hundreds); instead we should fetch in batch for all datasets.
296
- # TODO: Given we are calling get_dataset for each dataset, we may consume and publish other fields too, such as created, modified, etc...
297
- # https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.client.Client#google_cloud_bigquery_client_Client_get_dataset
298
- # https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.dataset.Dataset
299
- comment=self.bq_client.get_dataset(d.reference).description,
286
+ result = []
287
+ for d in datasets:
288
+ # TODO: Fetch dataset description individually impacts overall performance if the number of datasets is high (hundreds); instead we should fetch in batch for all datasets.
289
+ # https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.client.Client#google_cloud_bigquery_client_Client_get_dataset
290
+ # https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.dataset.Dataset
291
+ dataset = self.bq_client.get_dataset(d.reference)
292
+
293
+ location = (
294
+ d._properties.get("location")
295
+ if hasattr(d, "_properties") and isinstance(d._properties, dict)
296
+ else None
297
+ )
298
+
299
+ result.append(
300
+ BigqueryDataset(
301
+ name=d.dataset_id,
302
+ labels=d.labels,
303
+ location=location,
304
+ comment=dataset.description,
305
+ created=dataset.created,
306
+ last_altered=dataset.modified,
307
+ )
300
308
  )
301
- for d in datasets
302
- ]
309
+ return result
303
310
 
304
311
  # This is not used anywhere
305
312
  def get_datasets_for_project_id_with_information_schema(
@@ -12,6 +12,7 @@ from datahub.emitter.mce_builder import (
12
12
  make_dataset_urn_with_platform_instance,
13
13
  make_schema_field_urn,
14
14
  make_tag_urn,
15
+ make_ts_millis,
15
16
  )
16
17
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
17
18
  from datahub.emitter.mcp_builder import BigQueryDatasetKey, ContainerKey, ProjectIdKey
@@ -300,6 +301,8 @@ class BigQuerySchemaGenerator:
300
301
  description: Optional[str] = None,
301
302
  tags: Optional[Dict[str, str]] = None,
302
303
  extra_properties: Optional[Dict[str, str]] = None,
304
+ created: Optional[int] = None,
305
+ last_modified: Optional[int] = None,
303
306
  ) -> Iterable[MetadataWorkUnit]:
304
307
  schema_container_key = self.gen_dataset_key(project_id, dataset)
305
308
 
@@ -349,6 +352,8 @@ class BigQuerySchemaGenerator:
349
352
  ),
350
353
  tags=tags_joined,
351
354
  extra_properties=extra_properties,
355
+ created=created,
356
+ last_modified=last_modified,
352
357
  )
353
358
 
354
359
  def _process_project(
@@ -484,6 +489,12 @@ class BigQuerySchemaGenerator:
484
489
  else None
485
490
  ),
486
491
  description=bigquery_dataset.comment,
492
+ created=make_ts_millis(bigquery_dataset.created)
493
+ if bigquery_dataset.created
494
+ else None,
495
+ last_modified=make_ts_millis(bigquery_dataset.last_altered)
496
+ if bigquery_dataset.last_altered
497
+ else None,
487
498
  )
488
499
 
489
500
  columns = None
@@ -905,9 +905,9 @@ class LineageClient:
905
905
  direction: Literal["upstream", "downstream"],
906
906
  ) -> LineageResult:
907
907
  """Create a LineageResult from entity and entry data."""
908
- platform = entity.get("platform", {}).get("name") or entity.get(
909
- "dataPlatformInstance", {}
910
- ).get("platform", {}).get("name")
908
+ platform = (entity.get("platform") or {}).get("name") or (
909
+ (entity.get("dataPlatformInstance") or {}).get("platform") or {}
910
+ ).get("name")
911
911
 
912
912
  result = LineageResult(
913
913
  urn=entity["urn"],