acryl-datahub 0.15.0.1rc4__py3-none-any.whl → 0.15.0.1rc6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,4 +1,4 @@
1
- datahub/__init__.py,sha256=xj1tZWzMrO5870oYWLdzPRRtS9Tvh00hceW9--1K46o,576
1
+ datahub/__init__.py,sha256=lThsP3KmoFgrvSScMvSH0uLu7H2JJshAHDrzuIjHRjs,576
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
3
  datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
4
4
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -164,7 +164,7 @@ datahub/ingestion/glossary/classifier.py,sha256=daLxnVv_JlfB_jBOxH5LrU_xQRndrsGo
164
164
  datahub/ingestion/glossary/classifier_registry.py,sha256=yFOYLQhDgCLqXYMG3L1BquXafeLcZDcmp8meyw6k9ts,307
165
165
  datahub/ingestion/glossary/datahub_classifier.py,sha256=8VhwuLDhyOqqOr0jqAPIgorb4eAOnvTr4m13Y2Wy1-E,7515
166
166
  datahub/ingestion/graph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
167
- datahub/ingestion/graph/client.py,sha256=oBlM6RSo0SPFJ-yit2eFFOB3rOpnjKtQ83YNiWGd334,64584
167
+ datahub/ingestion/graph/client.py,sha256=AYDFwP9a_M-fCZv-PcWMSr5tc53XWJl372SWKwdu37E,64651
168
168
  datahub/ingestion/graph/config.py,sha256=3b_Gxa5wcBnphP63bBiAFdWS7PJhUHRE1WZL_q4Cw8k,749
169
169
  datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
170
170
  datahub/ingestion/graph/filters.py,sha256=UeUZQHoimavIYx-jXLA0WGkOUe10TaO8uEZkfa-QgNE,6188
@@ -265,7 +265,7 @@ datahub/ingestion/source/data_lake_common/config.py,sha256=qUk83B01hjuBKHvVz8SmX
265
265
  datahub/ingestion/source/data_lake_common/data_lake_utils.py,sha256=nxu7osuzqxScPFc-1ODA2M1c_xPNPpRH_SMMU7zKOIE,6212
266
266
  datahub/ingestion/source/data_lake_common/path_spec.py,sha256=u3u2eMe70V5vur-j8mYtupZdoeA2hSeK262Whdsc2YU,23506
267
267
  datahub/ingestion/source/datahub/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
268
- datahub/ingestion/source/datahub/config.py,sha256=pOXt0b1PX6D7dtD4RuKwdmr6sQKnXSf6LHxfPUMhP8s,3658
268
+ datahub/ingestion/source/datahub/config.py,sha256=rqZFvEmjxjBcW2cTEPYDVTAk3OLzuGIjEFghXPNeZNY,3955
269
269
  datahub/ingestion/source/datahub/datahub_api_reader.py,sha256=hlKADVEPoTFiRGKqRsMF5mL4fSu_IrIW8Nx7LpEzvkM,2134
270
270
  datahub/ingestion/source/datahub/datahub_database_reader.py,sha256=F8JrOjSrmJ2B6m1MWh83A1EYFDcGMla749HUeQWMnL0,9464
271
271
  datahub/ingestion/source/datahub/datahub_kafka_reader.py,sha256=8x9_u5kRjgSmu7c295ZIZjxP6bgoZZbWsKRicuLStRQ,4145
@@ -432,7 +432,7 @@ datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81
432
432
  datahub/ingestion/source/snowflake/snowflake_config.py,sha256=LZqnTELtzRNf0vsKG-xXggXyt13S9RYvHOZEZHRjgNk,18851
433
433
  datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=yzv-01FdmfDSCJY5rqKNNodXxzg3SS5DF7oA4WXArOA,17793
434
434
  datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
435
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=uMGmMEl4hWEmN7GxMyDBdwlIPAW7WmOnu41kZ0dvCG4,21551
435
+ datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=kpZvGbWmLMEUBslGToeA3oBO__8V2FzPvUKJSNw_3VM,21598
436
436
  datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
437
437
  datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=8QEihOfivalVR9vLo6vCUL-vnZfAGgMio0uhPYX0jTo,25883
438
438
  datahub/ingestion/source/snowflake/snowflake_query.py,sha256=yDu_1aTAG7eLEh1w1FGmn2-c6NJZURdslnI6fC_4B_0,38723
@@ -863,7 +863,7 @@ datahub/specific/chart.py,sha256=DsLA5qHBIMNc1pIZ1AC5kLvwpRDd79Q56N4SANOofps,113
863
863
  datahub/specific/custom_properties.py,sha256=Ob8L9b9QIbUvHfzWo4L-SNY1QSRhgRy30kLRDdenGEs,1024
864
864
  datahub/specific/dashboard.py,sha256=kRfyJsm7piugxBg0IfIbLmvv6Smk3D44IGVw8THLqPE,15100
865
865
  datahub/specific/datajob.py,sha256=5pEBrN6llpgS7jWYEfrvqpbT2vMVVpepH71jIUJUo4U,18480
866
- datahub/specific/dataproduct.py,sha256=Mt-QlndY4Die87XwakYTAcvyDzaB5fmyn1NpQGGcZyI,5235
866
+ datahub/specific/dataproduct.py,sha256=lVv3TGkZyZ0t9CUXLnkwMhr8GK1HB-fiyRyjxTdvb7s,5259
867
867
  datahub/specific/dataset.py,sha256=TAI8SRhhhsv1zEi3lGv24NX6PTJDrEyt5v0Sdg-uFY8,13568
868
868
  datahub/specific/form.py,sha256=jVI0JD-o2-XkD1suW_ITnTZUF0GNbGjaNb9-PXdfdkA,4549
869
869
  datahub/specific/ownership.py,sha256=KlYnk7o0Tq2EVugW7qRWR9D3v0C8PuqIdwgUzYwlkDM,1446
@@ -879,7 +879,7 @@ datahub/sql_parsing/split_statements.py,sha256=uZhAXLaRxDfmK0lPBW2oM_YVdJfSMhdgn
879
879
  datahub/sql_parsing/sql_parsing_aggregator.py,sha256=LBs1RjRqh3natrx4WfgRQGNpI56o12jtbABO5ipEBWA,69889
880
880
  datahub/sql_parsing/sql_parsing_common.py,sha256=h_V_m54hJ9EUh5kczq7cYOIeNeo4bgf0Px0H-Nq-UIg,2602
881
881
  datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
882
- datahub/sql_parsing/sqlglot_lineage.py,sha256=CLDOc0HNqL_539eahOP3QOoldIYC6CF29id4Xe3TlEM,47018
882
+ datahub/sql_parsing/sqlglot_lineage.py,sha256=gUVq3NwZUzQByJs43JZXz8lZf0ZVzVt0FzaW5wZOwK4,47460
883
883
  datahub/sql_parsing/sqlglot_utils.py,sha256=n6yufzEGwSlFeCSU540hEldIuab0q8KGqm9x0vSawkc,14699
884
884
  datahub/sql_parsing/tool_meta_extractor.py,sha256=7tY4FAClhFcqwc23lGVlnT6Dequ_5Xcpbt0hDvnlLzM,6670
885
885
  datahub/telemetry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -981,8 +981,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
981
981
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
982
982
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
983
983
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
984
- acryl_datahub-0.15.0.1rc4.dist-info/METADATA,sha256=i_9pmgefT8nmwwsz_l6hvTGQHT3yUBsFF8kImMd9aco,173642
985
- acryl_datahub-0.15.0.1rc4.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
986
- acryl_datahub-0.15.0.1rc4.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
987
- acryl_datahub-0.15.0.1rc4.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
988
- acryl_datahub-0.15.0.1rc4.dist-info/RECORD,,
984
+ acryl_datahub-0.15.0.1rc6.dist-info/METADATA,sha256=NwalxQqxf_XeT9QBcWc4wsNKE11Hv59p6ZZ8HRGVyGc,173642
985
+ acryl_datahub-0.15.0.1rc6.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
986
+ acryl_datahub-0.15.0.1rc6.dist-info/entry_points.txt,sha256=xnPSPLK3bJGADxe4TDS4wL4u0FT_PGlahDa-ENYdYCQ,9512
987
+ acryl_datahub-0.15.0.1rc6.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
988
+ acryl_datahub-0.15.0.1rc6.dist-info/RECORD,,
datahub/__init__.py CHANGED
@@ -3,7 +3,7 @@ import warnings
3
3
 
4
4
  # Published at https://pypi.org/project/acryl-datahub/.
5
5
  __package_name__ = "acryl-datahub"
6
- __version__ = "0.15.0.1rc4"
6
+ __version__ = "0.15.0.1rc6"
7
7
 
8
8
 
9
9
  def is_dev_mode() -> bool:
@@ -188,9 +188,12 @@ class DataHubGraph(DatahubRestEmitter):
188
188
  retry_max_times=emitter._retry_max_times,
189
189
  extra_headers=emitter._session.headers,
190
190
  disable_ssl_verification=emitter._session.verify is False,
191
- # TODO: Support these headers.
192
- # ca_certificate_path=emitter._ca_certificate_path,
193
- # client_certificate_path=emitter._client_certificate_path,
191
+ ca_certificate_path=(
192
+ emitter._session.verify
193
+ if isinstance(emitter._session.verify, str)
194
+ else None
195
+ ),
196
+ client_certificate_path=emitter._session.cert,
194
197
  )
195
198
  )
196
199
 
@@ -14,6 +14,17 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
14
14
  DEFAULT_DATABASE_TABLE_NAME = "metadata_aspect_v2"
15
15
  DEFAULT_KAFKA_TOPIC_NAME = "MetadataChangeLog_Timeseries_v1"
16
16
  DEFAULT_DATABASE_BATCH_SIZE = 10_000
17
+ DEFAULT_EXCLUDE_ASPECTS = {
18
+ "dataHubIngestionSourceKey",
19
+ "dataHubIngestionSourceInfo",
20
+ "datahubIngestionRunSummary",
21
+ "datahubIngestionCheckpoint",
22
+ "dataHubSecretKey",
23
+ "dataHubSecretValue",
24
+ "globalSettingsKey",
25
+ "globalSettingsInfo",
26
+ "testResults",
27
+ }
17
28
 
18
29
 
19
30
  class DataHubSourceConfig(StatefulIngestionConfigBase):
@@ -44,7 +55,7 @@ class DataHubSourceConfig(StatefulIngestionConfigBase):
44
55
  )
45
56
 
46
57
  exclude_aspects: Set[str] = Field(
47
- default_factory=set,
58
+ default=DEFAULT_EXCLUDE_ASPECTS,
48
59
  description="Set of aspect names to exclude from ingestion",
49
60
  )
50
61
 
@@ -4,7 +4,7 @@ from dataclasses import dataclass
4
4
  from datetime import datetime
5
5
  from typing import Any, Collection, Iterable, List, Optional, Set, Tuple, Type
6
6
 
7
- from pydantic import BaseModel, validator
7
+ from pydantic import BaseModel, Field, validator
8
8
 
9
9
  from datahub.configuration.datetimes import parse_absolute_time
10
10
  from datahub.ingestion.api.closeable import Closeable
@@ -72,8 +72,8 @@ class ColumnUpstreamJob(BaseModel):
72
72
 
73
73
 
74
74
  class ColumnUpstreamLineage(BaseModel):
75
- column_name: str
76
- upstreams: List[ColumnUpstreamJob]
75
+ column_name: Optional[str]
76
+ upstreams: List[ColumnUpstreamJob] = Field(default_factory=list)
77
77
 
78
78
 
79
79
  class UpstreamTableNode(BaseModel):
@@ -131,7 +131,7 @@ class DataProductPatchBuilder(MetadataPatchProposal):
131
131
  self._add_patch(
132
132
  DataProductProperties.ASPECT_NAME,
133
133
  "add",
134
- path=f"/assets/{asset_urn}",
134
+ path=f"/assets/{self.quote(asset_urn)}",
135
135
  value=DataProductAssociation(destinationUrn=asset_urn),
136
136
  )
137
137
  return self
@@ -140,7 +140,7 @@ class DataProductPatchBuilder(MetadataPatchProposal):
140
140
  self._add_patch(
141
141
  DataProductProperties.ASPECT_NAME,
142
142
  "remove",
143
- path=f"/assets/{asset_urn}",
143
+ path=f"/assets/{self.quote(asset_urn)}",
144
144
  value={},
145
145
  )
146
146
  return self
@@ -66,6 +66,7 @@ SQL_LINEAGE_TIMEOUT_ENABLED = get_boolean_env_variable(
66
66
  "SQL_LINEAGE_TIMEOUT_ENABLED", True
67
67
  )
68
68
  SQL_LINEAGE_TIMEOUT_SECONDS = 10
69
+ SQL_PARSER_TRACE = get_boolean_env_variable("DATAHUB_SQL_PARSER_TRACE", False)
69
70
 
70
71
 
71
72
  # These rules are a subset of the rules in sqlglot.optimizer.optimizer.RULES.
@@ -365,10 +366,11 @@ def _prepare_query_columns(
365
366
 
366
367
  return node
367
368
 
368
- # logger.debug(
369
- # "Prior to case normalization sql %s",
370
- # statement.sql(pretty=True, dialect=dialect),
371
- # )
369
+ if SQL_PARSER_TRACE:
370
+ logger.debug(
371
+ "Prior to case normalization sql %s",
372
+ statement.sql(pretty=True, dialect=dialect),
373
+ )
372
374
  statement = statement.transform(_sqlglot_force_column_normalizer, copy=False)
373
375
  # logger.debug(
374
376
  # "Sql after casing normalization %s",
@@ -562,7 +564,7 @@ def _select_statement_cll( # noqa: C901
562
564
  )
563
565
  )
564
566
 
565
- # TODO: Also extract referenced columns (aka auxillary / non-SELECT lineage)
567
+ # TODO: Also extract referenced columns (aka auxiliary / non-SELECT lineage)
566
568
  except (sqlglot.errors.OptimizeError, ValueError, IndexError) as e:
567
569
  raise SqlUnderstandingError(
568
570
  f"sqlglot failed to compute some lineage: {e}"
@@ -1022,6 +1024,14 @@ def _sqlglot_lineage_inner(
1022
1024
  logger.debug(
1023
1025
  f"Resolved {total_schemas_resolved} of {total_tables_discovered} table schemas"
1024
1026
  )
1027
+ if SQL_PARSER_TRACE:
1028
+ for qualified_table, schema_info in table_name_schema_mapping.items():
1029
+ logger.debug(
1030
+ "Table name %s resolved to %s with schema %s",
1031
+ qualified_table,
1032
+ table_name_urn_mapping[qualified_table],
1033
+ schema_info,
1034
+ )
1025
1035
 
1026
1036
  column_lineage: Optional[List[_ColumnLineageInfo]] = None
1027
1037
  try: