acryl-datahub 0.15.0rc22__py3-none-any.whl → 0.15.0rc23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,4 +1,4 @@
1
- datahub/__init__.py,sha256=T0tNQ0v5Y2QyvLqZg1tU0kxvIjYvmZ8eZdrD_d8Uwe4,575
1
+ datahub/__init__.py,sha256=eOmo10Qg3UHdXM-mhXsProWUviox9Ng9kfUMS-B8xpo,575
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
3
  datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
4
4
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -93,7 +93,7 @@ datahub/configuration/common.py,sha256=Ngj2-HKPEhCMbcx3phUqyoOHayhqWNt1t0e2hO3GQ
93
93
  datahub/configuration/config_loader.py,sha256=4V8rrbKvCbfEys2Tlw2uZXb3yC9Hpoubn2O8GXhGe3A,5785
94
94
  datahub/configuration/connection_resolver.py,sha256=n4-6MwMiOEDgTouxO0SMjTILKVhJPo6-naE6FuR5qMs,1516
95
95
  datahub/configuration/datetimes.py,sha256=nayNc0mmlVKH6oVv9ud6C1dDUiZPGabW-YZxvrkosPg,2870
96
- datahub/configuration/git.py,sha256=s55eUHxKqVZgtVsISaDyS-1F4iZBiybbjYsjbp5LU5o,6135
96
+ datahub/configuration/git.py,sha256=q9iac6cc6oZ3RVSPTyuR2VMsmt2wr-uVaCLWohdKVV0,6461
97
97
  datahub/configuration/import_resolver.py,sha256=b4Ie9L7knN1LALEVMxTcNFSklDD6CVE-4Ipy4ZYhNYA,369
98
98
  datahub/configuration/json_loader.py,sha256=vIDnjwXWi9yHDO8KW64EupOzOb_sspehGCD7xGHzg84,302
99
99
  datahub/configuration/kafka.py,sha256=MlIwpd5FFyOyjdDXW_X9JTLNk7f988sPMgevkcZYVgI,2579
@@ -427,7 +427,7 @@ datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81
427
427
  datahub/ingestion/source/snowflake/snowflake_config.py,sha256=LZqnTELtzRNf0vsKG-xXggXyt13S9RYvHOZEZHRjgNk,18851
428
428
  datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=yzv-01FdmfDSCJY5rqKNNodXxzg3SS5DF7oA4WXArOA,17793
429
429
  datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
430
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=EnTJoRIQKcZOIYfb_NUff_YA8IdIroaFD1JHUn-M6ok,23346
430
+ datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=suMICPFPvoV6shkjD_14JunLc8jAZBINzlFk2mYldkU,23676
431
431
  datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
432
432
  datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=fu-8S9eADIXZcd_kHc6cBeMa-on9RF9qG3yqjJnS3DE,26085
433
433
  datahub/ingestion/source/snowflake/snowflake_query.py,sha256=yDu_1aTAG7eLEh1w1FGmn2-c6NJZURdslnI6fC_4B_0,38723
@@ -976,8 +976,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
976
976
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
977
977
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
978
978
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
979
- acryl_datahub-0.15.0rc22.dist-info/METADATA,sha256=48jbXm5fKitlO7rhjtNA1FcJT9Y7ypQ25EtatHbSeqY,173559
980
- acryl_datahub-0.15.0rc22.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
981
- acryl_datahub-0.15.0rc22.dist-info/entry_points.txt,sha256=Yj0PWB0LQOq4Rj2fyR6ETx4BUGw4TOcNL0ZNoAZ9kQg,9504
982
- acryl_datahub-0.15.0rc22.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
983
- acryl_datahub-0.15.0rc22.dist-info/RECORD,,
979
+ acryl_datahub-0.15.0rc23.dist-info/METADATA,sha256=cPlJko8JF1pZEIihXKsAct2ai4okUHAMu8e3sAha7mU,173559
980
+ acryl_datahub-0.15.0rc23.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
981
+ acryl_datahub-0.15.0rc23.dist-info/entry_points.txt,sha256=Yj0PWB0LQOq4Rj2fyR6ETx4BUGw4TOcNL0ZNoAZ9kQg,9504
982
+ acryl_datahub-0.15.0rc23.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
983
+ acryl_datahub-0.15.0rc23.dist-info/RECORD,,
datahub/__init__.py CHANGED
@@ -3,7 +3,7 @@ import warnings
3
3
 
4
4
  # Published at https://pypi.org/project/acryl-datahub/.
5
5
  __package_name__ = "acryl-datahub"
6
- __version__ = "0.15.0rc22"
6
+ __version__ = "0.15.0rc23"
7
7
 
8
8
 
9
9
  def is_dev_mode() -> bool:
@@ -24,7 +24,11 @@ class GitReference(ConfigModel):
24
24
  "main",
25
25
  description="Branch on which your files live by default. Typically main or master. This can also be a commit hash.",
26
26
  )
27
-
27
+ url_subdir: Optional[str] = Field(
28
+ default=None,
29
+ description="Prefix to prepend when generating URLs for files - useful when files are in a subdirectory. "
30
+ "Only affects URL generation, not git operations.",
31
+ )
28
32
  url_template: Optional[str] = Field(
29
33
  None,
30
34
  description=f"Template for generating a URL to a file in the repo e.g. '{_GITHUB_URL_TEMPLATE}'. We can infer this for GitHub and GitLab repos, and it is otherwise required."
@@ -68,6 +72,8 @@ class GitReference(ConfigModel):
68
72
 
69
73
  def get_url_for_file_path(self, file_path: str) -> str:
70
74
  assert self.url_template
75
+ if self.url_subdir:
76
+ file_path = f"{self.url_subdir}/{file_path}"
71
77
  return self.url_template.format(
72
78
  repo_url=self.repo, branch=self.branch, file_path=file_path
73
79
  )
@@ -414,9 +414,13 @@ class SnowflakeLineageExtractor(SnowflakeCommonMixin, Closeable):
414
414
  except Exception as e:
415
415
  self.report.num_upstream_lineage_edge_parsing_failed += 1
416
416
  upstream_tables = db_row.get("UPSTREAM_TABLES")
417
+ downstream_table = db_row.get("DOWNSTREAM_TABLE_NAME")
417
418
  self.structured_reporter.warning(
418
419
  "Failed to parse lineage edge",
419
- context=f"Upstreams: {upstream_tables} Downstreams: {db_row.get('DOWNSTREAM_TABLE_NAME')}",
420
+ # Tricky: sometimes the full row data is too large, and so the context
421
+ # message gets truncated. By pulling out the upstreams and downstream
422
+ # list, we can at least get the important fields if truncation does occur.
423
+ context=f"Upstreams: {upstream_tables} Downstream: {downstream_table} Full row: {db_row}",
420
424
  exc=e,
421
425
  )
422
426
  return None