acryl-datahub 1.2.0.9rc1__py3-none-any.whl → 1.2.0.9rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,7 +1,7 @@
1
- acryl_datahub-1.2.0.9rc1.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.2.0.9rc2.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=_H3eOOLcAfSj64l30V-SpArSKBd8L7socBTn4sC-zB0,323
4
+ datahub/_version.py,sha256=czL9t4OxwP0dn-BD3TMnolCh7B9D-oNG5I9JL0ME8FI,323
5
5
  datahub/entrypoints.py,sha256=9Qf-37rNnTzbGlx8S75OCDazIclFp6zWNcCEL1zCZto,9015
6
6
  datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -267,8 +267,8 @@ datahub/ingestion/source/bigquery_v2/bigquery_helper.py,sha256=QER3gY8e_k1_eNVj7
267
267
  datahub/ingestion/source/bigquery_v2/bigquery_platform_resource_helper.py,sha256=9_sfX8BE2vt9RjBMyq27UxCxBaSlD5o3L4gQxrwlPvA,4961
268
268
  datahub/ingestion/source/bigquery_v2/bigquery_queries.py,sha256=2syDMaRpYEbtGUVejVAK5d6g8HqM54ZyEM908uLJ55o,3393
269
269
  datahub/ingestion/source/bigquery_v2/bigquery_report.py,sha256=v7_zkZzymKPmZKWAxnxmvmHC-8TQVGHUT-pBQFNehqc,7962
270
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=DHw5Z_rxj_fR09p7SO0UmDdvYEa_ViIRnLE9CFxPAAk,32525
271
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=29E_25jLpMIgWcYRC0ZcYd1fvaFtSi2T8S6hSwiTDTY,51090
270
+ datahub/ingestion/source/bigquery_v2/bigquery_schema.py,sha256=zbYb1EYnCJxgvsU8oT_76l0q_BW1exVjMWM1GAgd1nc,32600
271
+ datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py,sha256=c9a-SlZDOYNiS__vC5ezVVNM0UHasXWxWNRZkkP_aOo,51552
272
272
  datahub/ingestion/source/bigquery_v2/bigquery_test_connection.py,sha256=cATxwi5IPzj3BldRRAVcLqzSFmmYEPvqa7U0RFJbaAc,7645
273
273
  datahub/ingestion/source/bigquery_v2/common.py,sha256=IinOy-RO4UZGxSf5scaN02672BzZuNsjJZ56axti6iI,4016
274
274
  datahub/ingestion/source/bigquery_v2/lineage.py,sha256=jju14mJbAUMA_K3j2yq-TdZV202cjd5rBAsDPJGEVno,44900
@@ -1114,8 +1114,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1114
1114
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1115
1115
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1116
1116
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1117
- acryl_datahub-1.2.0.9rc1.dist-info/METADATA,sha256=b0i8DTQvhz0wjm4LlnQ4cAEB3Fzj4VWzmznolB87FoU,186907
1118
- acryl_datahub-1.2.0.9rc1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1119
- acryl_datahub-1.2.0.9rc1.dist-info/entry_points.txt,sha256=qopCAD6qrsijaZ9mTw3UlPCKsE00C3t9MbkkWow7pi4,9943
1120
- acryl_datahub-1.2.0.9rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1121
- acryl_datahub-1.2.0.9rc1.dist-info/RECORD,,
1117
+ acryl_datahub-1.2.0.9rc2.dist-info/METADATA,sha256=gQoHdltvvyFIK3SgD-5zF_BTJGyiAKWM7rcfqN4ZTy4,186875
1118
+ acryl_datahub-1.2.0.9rc2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1119
+ acryl_datahub-1.2.0.9rc2.dist-info/entry_points.txt,sha256=qopCAD6qrsijaZ9mTw3UlPCKsE00C3t9MbkkWow7pi4,9943
1120
+ acryl_datahub-1.2.0.9rc2.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1121
+ acryl_datahub-1.2.0.9rc2.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.2.0.9rc1"
3
+ __version__ = "1.2.0.9rc2"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -283,23 +283,30 @@ class BigQuerySchemaApi:
283
283
  with self.report.list_datasets_timer:
284
284
  self.report.num_list_datasets_api_requests += 1
285
285
  datasets = self.bq_client.list_datasets(project_id, max_results=maxResults)
286
- return [
287
- BigqueryDataset(
288
- name=d.dataset_id,
289
- labels=d.labels,
290
- location=(
291
- d._properties.get("location")
292
- if hasattr(d, "_properties") and isinstance(d._properties, dict)
293
- else None
294
- ),
295
- # TODO: Fetch dataset description individually impacts overall performance if the number of datasets is high (hundreds); instead we should fetch in batch for all datasets.
296
- # TODO: Given we are calling get_dataset for each dataset, we may consume and publish other fields too, such as created, modified, etc...
297
- # https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.client.Client#google_cloud_bigquery_client_Client_get_dataset
298
- # https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.dataset.Dataset
299
- comment=self.bq_client.get_dataset(d.reference).description,
286
+ result = []
287
+ for d in datasets:
288
+ # TODO: Fetch dataset description individually impacts overall performance if the number of datasets is high (hundreds); instead we should fetch in batch for all datasets.
289
+ # https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.client.Client#google_cloud_bigquery_client_Client_get_dataset
290
+ # https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.dataset.Dataset
291
+ dataset = self.bq_client.get_dataset(d.reference)
292
+
293
+ location = (
294
+ d._properties.get("location")
295
+ if hasattr(d, "_properties") and isinstance(d._properties, dict)
296
+ else None
297
+ )
298
+
299
+ result.append(
300
+ BigqueryDataset(
301
+ name=d.dataset_id,
302
+ labels=d.labels,
303
+ location=location,
304
+ comment=dataset.description,
305
+ created=dataset.created,
306
+ last_altered=dataset.modified,
307
+ )
300
308
  )
301
- for d in datasets
302
- ]
309
+ return result
303
310
 
304
311
  # This is not used anywhere
305
312
  def get_datasets_for_project_id_with_information_schema(
@@ -12,6 +12,7 @@ from datahub.emitter.mce_builder import (
12
12
  make_dataset_urn_with_platform_instance,
13
13
  make_schema_field_urn,
14
14
  make_tag_urn,
15
+ make_ts_millis,
15
16
  )
16
17
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
17
18
  from datahub.emitter.mcp_builder import BigQueryDatasetKey, ContainerKey, ProjectIdKey
@@ -300,6 +301,8 @@ class BigQuerySchemaGenerator:
300
301
  description: Optional[str] = None,
301
302
  tags: Optional[Dict[str, str]] = None,
302
303
  extra_properties: Optional[Dict[str, str]] = None,
304
+ created: Optional[int] = None,
305
+ last_modified: Optional[int] = None,
303
306
  ) -> Iterable[MetadataWorkUnit]:
304
307
  schema_container_key = self.gen_dataset_key(project_id, dataset)
305
308
 
@@ -349,6 +352,8 @@ class BigQuerySchemaGenerator:
349
352
  ),
350
353
  tags=tags_joined,
351
354
  extra_properties=extra_properties,
355
+ created=created,
356
+ last_modified=last_modified,
352
357
  )
353
358
 
354
359
  def _process_project(
@@ -484,6 +489,12 @@ class BigQuerySchemaGenerator:
484
489
  else None
485
490
  ),
486
491
  description=bigquery_dataset.comment,
492
+ created=make_ts_millis(bigquery_dataset.created)
493
+ if bigquery_dataset.created
494
+ else None,
495
+ last_modified=make_ts_millis(bigquery_dataset.last_altered)
496
+ if bigquery_dataset.last_altered
497
+ else None,
487
498
  )
488
499
 
489
500
  columns = None