acryl-datahub 1.1.0.5rc6__py3-none-any.whl → 1.1.0.5rc7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (38) hide show
  1. {acryl_datahub-1.1.0.5rc6.dist-info → acryl_datahub-1.1.0.5rc7.dist-info}/METADATA +2547 -2547
  2. {acryl_datahub-1.1.0.5rc6.dist-info → acryl_datahub-1.1.0.5rc7.dist-info}/RECORD +38 -35
  3. datahub/_version.py +1 -1
  4. datahub/ingestion/api/report.py +183 -35
  5. datahub/ingestion/autogenerated/capability_summary.json +3366 -0
  6. datahub/ingestion/autogenerated/lineage.json +401 -0
  7. datahub/ingestion/autogenerated/lineage_helper.py +30 -128
  8. datahub/ingestion/run/pipeline.py +4 -1
  9. datahub/ingestion/source/bigquery_v2/bigquery.py +23 -22
  10. datahub/ingestion/source/cassandra/cassandra_profiling.py +6 -5
  11. datahub/ingestion/source/common/subtypes.py +1 -1
  12. datahub/ingestion/source/data_lake_common/object_store.py +40 -0
  13. datahub/ingestion/source/dremio/dremio_source.py +6 -3
  14. datahub/ingestion/source/gcs/gcs_source.py +4 -1
  15. datahub/ingestion/source/ge_data_profiler.py +28 -20
  16. datahub/ingestion/source/kafka_connect/source_connectors.py +59 -4
  17. datahub/ingestion/source/mock_data/datahub_mock_data.py +45 -0
  18. datahub/ingestion/source/redshift/usage.py +4 -3
  19. datahub/ingestion/source/s3/source.py +19 -3
  20. datahub/ingestion/source/snowflake/snowflake_queries.py +47 -3
  21. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  22. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  23. datahub/ingestion/source/unity/proxy.py +4 -3
  24. datahub/ingestion/source/unity/source.py +10 -8
  25. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  26. datahub/metadata/_internal_schema_classes.py +85 -4
  27. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +2 -0
  28. datahub/metadata/schema.avsc +54 -1
  29. datahub/metadata/schemas/CorpUserSettings.avsc +17 -1
  30. datahub/metadata/schemas/GlobalSettingsInfo.avsc +37 -0
  31. datahub/sdk/lineage_client.py +2 -0
  32. datahub/sql_parsing/sql_parsing_aggregator.py +3 -3
  33. datahub/sql_parsing/sqlglot_lineage.py +2 -0
  34. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  35. {acryl_datahub-1.1.0.5rc6.dist-info → acryl_datahub-1.1.0.5rc7.dist-info}/WHEEL +0 -0
  36. {acryl_datahub-1.1.0.5rc6.dist-info → acryl_datahub-1.1.0.5rc7.dist-info}/entry_points.txt +0 -0
  37. {acryl_datahub-1.1.0.5rc6.dist-info → acryl_datahub-1.1.0.5rc7.dist-info}/licenses/LICENSE +0 -0
  38. {acryl_datahub-1.1.0.5rc6.dist-info → acryl_datahub-1.1.0.5rc7.dist-info}/top_level.txt +0 -0
@@ -768,10 +768,11 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
768
768
 
769
769
  def gen_schema_containers(self, schema: Schema) -> Iterable[MetadataWorkUnit]:
770
770
  domain_urn = self._gen_domain_urn(f"{schema.catalog.name}.{schema.name}")
771
- schema_tags = self.unity_catalog_api_proxy.get_schema_tags(
772
- schema.catalog.name
773
- ).get(f"{schema.catalog.name}.{schema.name}", [])
774
- if schema_tags:
771
+ schema_tags = []
772
+ if self.config.include_tags:
773
+ schema_tags = self.unity_catalog_api_proxy.get_schema_tags(
774
+ schema.catalog.name
775
+ ).get(f"{schema.catalog.name}.{schema.name}", [])
775
776
  logger.debug(f"Schema tags for {schema.name}: {schema_tags}")
776
777
  # Generate platform resources for schema tags
777
778
  yield from self.gen_platform_resources(schema_tags)
@@ -809,10 +810,11 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
809
810
 
810
811
  def gen_catalog_containers(self, catalog: Catalog) -> Iterable[MetadataWorkUnit]:
811
812
  domain_urn = self._gen_domain_urn(catalog.name)
812
- catalog_tags = self.unity_catalog_api_proxy.get_catalog_tags(catalog.name).get(
813
- catalog.name, []
814
- )
815
- if catalog_tags:
813
+ catalog_tags = []
814
+ if self.config.include_tags:
815
+ catalog_tags = self.unity_catalog_api_proxy.get_catalog_tags(
816
+ catalog.name
817
+ ).get(catalog.name, [])
816
818
  logger.debug(f"Schema tags for {catalog.name}: {catalog_tags}")
817
819
  # Generate platform resources for schema tags
818
820
  yield from self.gen_platform_resources(catalog_tags)
@@ -84,9 +84,10 @@ class SnowflakeAssertionCompiler(AssertionCompiler):
84
84
 
85
85
  dmf_definitions_path = self.output_dir / DMF_DEFINITIONS_FILE_NAME
86
86
  dmf_associations_path = self.output_dir / DMF_ASSOCIATIONS_FILE_NAME
87
- with (dmf_definitions_path).open("w") as definitions, (
88
- dmf_associations_path
89
- ).open("w") as associations:
87
+ with (
88
+ (dmf_definitions_path).open("w") as definitions,
89
+ (dmf_associations_path).open("w") as associations,
90
+ ):
90
91
  for assertion_spec in assertion_config_spec.assertions:
91
92
  result.report.num_processed += 1
92
93
  try:
@@ -14127,26 +14127,39 @@ class CorpUserHomePageSettingsClass(DictWrapper):
14127
14127
 
14128
14128
  RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.identity.CorpUserHomePageSettings")
14129
14129
  def __init__(self,
14130
- pageTemplate: str,
14130
+ pageTemplate: Union[None, str]=None,
14131
+ dismissedAnnouncements: Union[None, List[str]]=None,
14131
14132
  ):
14132
14133
  super().__init__()
14133
14134
 
14134
14135
  self.pageTemplate = pageTemplate
14136
+ self.dismissedAnnouncements = dismissedAnnouncements
14135
14137
 
14136
14138
  def _restore_defaults(self) -> None:
14137
- self.pageTemplate = str()
14139
+ self.pageTemplate = self.RECORD_SCHEMA.fields_dict["pageTemplate"].default
14140
+ self.dismissedAnnouncements = self.RECORD_SCHEMA.fields_dict["dismissedAnnouncements"].default
14138
14141
 
14139
14142
 
14140
14143
  @property
14141
- def pageTemplate(self) -> str:
14144
+ def pageTemplate(self) -> Union[None, str]:
14142
14145
  """The page template that will be rendered in the UI by default for this user"""
14143
14146
  return self._inner_dict.get('pageTemplate') # type: ignore
14144
14147
 
14145
14148
  @pageTemplate.setter
14146
- def pageTemplate(self, value: str) -> None:
14149
+ def pageTemplate(self, value: Union[None, str]) -> None:
14147
14150
  self._inner_dict['pageTemplate'] = value
14148
14151
 
14149
14152
 
14153
+ @property
14154
+ def dismissedAnnouncements(self) -> Union[None, List[str]]:
14155
+ """The list of announcement urns that have been dismissed by the user"""
14156
+ return self._inner_dict.get('dismissedAnnouncements') # type: ignore
14157
+
14158
+ @dismissedAnnouncements.setter
14159
+ def dismissedAnnouncements(self, value: Union[None, List[str]]) -> None:
14160
+ self._inner_dict['dismissedAnnouncements'] = value
14161
+
14162
+
14150
14163
  class CorpUserInfoClass(_Aspect):
14151
14164
  """Linkedin corp user information"""
14152
14165
 
@@ -24394,6 +24407,59 @@ class DataHubSecretValueClass(_Aspect):
24394
24407
  self._inner_dict['created'] = value
24395
24408
 
24396
24409
 
24410
+ class ApplicationsSettingsClass(DictWrapper):
24411
+ # No docs available.
24412
+
24413
+ RECORD_SCHEMA = get_schema_type("com.linkedin.pegasus2avro.settings.global.ApplicationsSettings")
24414
+ def __init__(self,
24415
+ enabled: bool,
24416
+ config: Union[None, str]=None,
24417
+ configVersion: Union[None, str]=None,
24418
+ ):
24419
+ super().__init__()
24420
+
24421
+ self.enabled = enabled
24422
+ self.config = config
24423
+ self.configVersion = configVersion
24424
+
24425
+ def _restore_defaults(self) -> None:
24426
+ self.enabled = bool()
24427
+ self.config = self.RECORD_SCHEMA.fields_dict["config"].default
24428
+ self.configVersion = self.RECORD_SCHEMA.fields_dict["configVersion"].default
24429
+
24430
+
24431
+ @property
24432
+ def enabled(self) -> bool:
24433
+ # No docs available.
24434
+ return self._inner_dict.get('enabled') # type: ignore
24435
+
24436
+ @enabled.setter
24437
+ def enabled(self, value: bool) -> None:
24438
+ self._inner_dict['enabled'] = value
24439
+
24440
+
24441
+ @property
24442
+ def config(self) -> Union[None, str]:
24443
+ """The configuration for the feature, in JSON format."""
24444
+ return self._inner_dict.get('config') # type: ignore
24445
+
24446
+ @config.setter
24447
+ def config(self, value: Union[None, str]) -> None:
24448
+ self._inner_dict['config'] = value
24449
+
24450
+
24451
+ @property
24452
+ def configVersion(self) -> Union[None, str]:
24453
+ """The version of the configuration schema that has been used to serialize
24454
+ the config.
24455
+ If not provided, the version is assumed to be the latest version."""
24456
+ return self._inner_dict.get('configVersion') # type: ignore
24457
+
24458
+ @configVersion.setter
24459
+ def configVersion(self, value: Union[None, str]) -> None:
24460
+ self._inner_dict['configVersion'] = value
24461
+
24462
+
24397
24463
  class DocPropagationFeatureSettingsClass(DictWrapper):
24398
24464
  # No docs available.
24399
24465
 
@@ -24502,6 +24568,7 @@ class GlobalSettingsInfoClass(_Aspect):
24502
24568
  views: Union[None, "GlobalViewsSettingsClass"]=None,
24503
24569
  docPropagation: Optional[Union["DocPropagationFeatureSettingsClass", None]]=None,
24504
24570
  homePage: Union[None, "GlobalHomePageSettingsClass"]=None,
24571
+ applications: Union[None, "ApplicationsSettingsClass"]=None,
24505
24572
  ):
24506
24573
  super().__init__()
24507
24574
 
@@ -24513,12 +24580,14 @@ class GlobalSettingsInfoClass(_Aspect):
24513
24580
  else:
24514
24581
  self.docPropagation = docPropagation
24515
24582
  self.homePage = homePage
24583
+ self.applications = applications
24516
24584
 
24517
24585
  def _restore_defaults(self) -> None:
24518
24586
  self.sso = self.RECORD_SCHEMA.fields_dict["sso"].default
24519
24587
  self.views = self.RECORD_SCHEMA.fields_dict["views"].default
24520
24588
  self.docPropagation = _json_converter.from_json_object(self.RECORD_SCHEMA.fields_dict["docPropagation"].default, writers_schema=self.RECORD_SCHEMA.fields_dict["docPropagation"].type)
24521
24589
  self.homePage = self.RECORD_SCHEMA.fields_dict["homePage"].default
24590
+ self.applications = self.RECORD_SCHEMA.fields_dict["applications"].default
24522
24591
 
24523
24592
 
24524
24593
  @property
@@ -24561,6 +24630,16 @@ class GlobalSettingsInfoClass(_Aspect):
24561
24630
  self._inner_dict['homePage'] = value
24562
24631
 
24563
24632
 
24633
+ @property
24634
+ def applications(self) -> Union[None, "ApplicationsSettingsClass"]:
24635
+ """Settings related to applications. If not enabled, applications won't show up in navigation"""
24636
+ return self._inner_dict.get('applications') # type: ignore
24637
+
24638
+ @applications.setter
24639
+ def applications(self, value: Union[None, "ApplicationsSettingsClass"]) -> None:
24640
+ self._inner_dict['applications'] = value
24641
+
24642
+
24564
24643
  class GlobalViewsSettingsClass(DictWrapper):
24565
24644
  """Settings for DataHub Views feature."""
24566
24645
 
@@ -27086,6 +27165,7 @@ __SCHEMA_TYPES = {
27086
27165
  'com.linkedin.pegasus2avro.schemafield.SchemaFieldAliases': SchemaFieldAliasesClass,
27087
27166
  'com.linkedin.pegasus2avro.schemafield.SchemaFieldInfo': SchemaFieldInfoClass,
27088
27167
  'com.linkedin.pegasus2avro.secret.DataHubSecretValue': DataHubSecretValueClass,
27168
+ 'com.linkedin.pegasus2avro.settings.global.ApplicationsSettings': ApplicationsSettingsClass,
27089
27169
  'com.linkedin.pegasus2avro.settings.global.DocPropagationFeatureSettings': DocPropagationFeatureSettingsClass,
27090
27170
  'com.linkedin.pegasus2avro.settings.global.GlobalHomePageSettings': GlobalHomePageSettingsClass,
27091
27171
  'com.linkedin.pegasus2avro.settings.global.GlobalSettingsInfo': GlobalSettingsInfoClass,
@@ -27592,6 +27672,7 @@ __SCHEMA_TYPES = {
27592
27672
  'SchemaFieldAliases': SchemaFieldAliasesClass,
27593
27673
  'SchemaFieldInfo': SchemaFieldInfoClass,
27594
27674
  'DataHubSecretValue': DataHubSecretValueClass,
27675
+ 'ApplicationsSettings': ApplicationsSettingsClass,
27595
27676
  'DocPropagationFeatureSettings': DocPropagationFeatureSettingsClass,
27596
27677
  'GlobalHomePageSettings': GlobalHomePageSettingsClass,
27597
27678
  'GlobalSettingsInfo': GlobalSettingsInfoClass,
@@ -7,6 +7,7 @@
7
7
  # pylint: skip-file
8
8
  # fmt: off
9
9
  # isort: skip_file
10
+ from ......schema_classes import ApplicationsSettingsClass
10
11
  from ......schema_classes import DocPropagationFeatureSettingsClass
11
12
  from ......schema_classes import GlobalHomePageSettingsClass
12
13
  from ......schema_classes import GlobalSettingsInfoClass
@@ -15,6 +16,7 @@ from ......schema_classes import OidcSettingsClass
15
16
  from ......schema_classes import SsoSettingsClass
16
17
 
17
18
 
19
+ ApplicationsSettings = ApplicationsSettingsClass
18
20
  DocPropagationFeatureSettings = DocPropagationFeatureSettingsClass
19
21
  GlobalHomePageSettings = GlobalHomePageSettingsClass
20
22
  GlobalSettingsInfo = GlobalSettingsInfoClass
@@ -751,9 +751,25 @@
751
751
  "class": "com.linkedin.pegasus2avro.common.urn.Urn"
752
752
  },
753
753
  "Urn": "Urn",
754
- "type": "string",
754
+ "type": [
755
+ "null",
756
+ "string"
757
+ ],
755
758
  "name": "pageTemplate",
759
+ "default": null,
756
760
  "doc": "The page template that will be rendered in the UI by default for this user"
761
+ },
762
+ {
763
+ "type": [
764
+ "null",
765
+ {
766
+ "type": "array",
767
+ "items": "string"
768
+ }
769
+ ],
770
+ "name": "dismissedAnnouncements",
771
+ "default": null,
772
+ "doc": "The list of announcement urns that have been dismissed by the user"
757
773
  }
758
774
  ],
759
775
  "doc": "Settings related to the home page for a user"
@@ -18531,6 +18547,43 @@
18531
18547
  "name": "homePage",
18532
18548
  "default": null,
18533
18549
  "doc": "Global settings related to the home page for an instance"
18550
+ },
18551
+ {
18552
+ "type": [
18553
+ "null",
18554
+ {
18555
+ "type": "record",
18556
+ "name": "ApplicationsSettings",
18557
+ "namespace": "com.linkedin.pegasus2avro.settings.global",
18558
+ "fields": [
18559
+ {
18560
+ "type": "boolean",
18561
+ "name": "enabled"
18562
+ },
18563
+ {
18564
+ "type": [
18565
+ "null",
18566
+ "string"
18567
+ ],
18568
+ "name": "config",
18569
+ "default": null,
18570
+ "doc": "The configuration for the feature, in JSON format."
18571
+ },
18572
+ {
18573
+ "type": [
18574
+ "null",
18575
+ "string"
18576
+ ],
18577
+ "name": "configVersion",
18578
+ "default": null,
18579
+ "doc": "The version of the configuration schema that has been used to serialize\n the config.\nIf not provided, the version is assumed to be the latest version."
18580
+ }
18581
+ ]
18582
+ }
18583
+ ],
18584
+ "name": "applications",
18585
+ "default": null,
18586
+ "doc": "Settings related to applications. If not enabled, applications won't show up in navigation"
18534
18587
  }
18535
18588
  ],
18536
18589
  "doc": "DataHub Global platform settings. Careful - these should not be modified by the outside world!"
@@ -172,10 +172,26 @@
172
172
  "java": {
173
173
  "class": "com.linkedin.pegasus2avro.common.urn.Urn"
174
174
  },
175
- "type": "string",
175
+ "type": [
176
+ "null",
177
+ "string"
178
+ ],
176
179
  "name": "pageTemplate",
180
+ "default": null,
177
181
  "doc": "The page template that will be rendered in the UI by default for this user",
178
182
  "Urn": "Urn"
183
+ },
184
+ {
185
+ "type": [
186
+ "null",
187
+ {
188
+ "type": "array",
189
+ "items": "string"
190
+ }
191
+ ],
192
+ "name": "dismissedAnnouncements",
193
+ "default": null,
194
+ "doc": "The list of announcement urns that have been dismissed by the user"
179
195
  }
180
196
  ],
181
197
  "doc": "Settings related to the home page for a user"
@@ -307,6 +307,43 @@
307
307
  "name": "homePage",
308
308
  "default": null,
309
309
  "doc": "Global settings related to the home page for an instance"
310
+ },
311
+ {
312
+ "type": [
313
+ "null",
314
+ {
315
+ "type": "record",
316
+ "name": "ApplicationsSettings",
317
+ "namespace": "com.linkedin.pegasus2avro.settings.global",
318
+ "fields": [
319
+ {
320
+ "type": "boolean",
321
+ "name": "enabled"
322
+ },
323
+ {
324
+ "type": [
325
+ "null",
326
+ "string"
327
+ ],
328
+ "name": "config",
329
+ "default": null,
330
+ "doc": "The configuration for the feature, in JSON format."
331
+ },
332
+ {
333
+ "type": [
334
+ "null",
335
+ "string"
336
+ ],
337
+ "name": "configVersion",
338
+ "default": null,
339
+ "doc": "The version of the configuration schema that has been used to serialize\n the config.\nIf not provided, the version is assumed to be the latest version."
340
+ }
341
+ ]
342
+ }
343
+ ],
344
+ "name": "applications",
345
+ "default": null,
346
+ "doc": "Settings related to applications. If not enabled, applications won't show up in navigation"
310
347
  }
311
348
  ],
312
349
  "doc": "DataHub Global platform settings. Careful - these should not be modified by the outside world!"
@@ -478,6 +478,7 @@ class LineageClient:
478
478
  env: str = "PROD",
479
479
  default_db: Optional[str] = None,
480
480
  default_schema: Optional[str] = None,
481
+ default_dialect: Optional[str] = None,
481
482
  ) -> None:
482
483
  """Add lineage by parsing a SQL query."""
483
484
  from datahub.sql_parsing.sqlglot_lineage import (
@@ -493,6 +494,7 @@ class LineageClient:
493
494
  platform_instance=platform_instance,
494
495
  env=env,
495
496
  graph=self._client._graph,
497
+ default_dialect=default_dialect,
496
498
  )
497
499
 
498
500
  if parsed_result.debug_info.table_error:
@@ -1494,9 +1494,9 @@ class SqlParsingAggregator(Closeable):
1494
1494
  return
1495
1495
 
1496
1496
  # If a query doesn't involve any allowed tables, skip it.
1497
- if downstream_urn is None and not any(
1498
- self.is_allowed_table(urn) for urn in query.upstreams
1499
- ):
1497
+ if (
1498
+ downstream_urn is None or not self.is_allowed_table(downstream_urn)
1499
+ ) and not any(self.is_allowed_table(urn) for urn in query.upstreams):
1500
1500
  self.report.num_queries_skipped_due_to_filters += 1
1501
1501
  return
1502
1502
 
@@ -1580,6 +1580,7 @@ def create_lineage_sql_parsed_result(
1580
1580
  default_schema: Optional[str] = None,
1581
1581
  graph: Optional[DataHubGraph] = None,
1582
1582
  schema_aware: bool = True,
1583
+ default_dialect: Optional[str] = None,
1583
1584
  ) -> SqlParsingResult:
1584
1585
  schema_resolver = create_schema_resolver(
1585
1586
  platform=platform,
@@ -1599,6 +1600,7 @@ def create_lineage_sql_parsed_result(
1599
1600
  schema_resolver=schema_resolver,
1600
1601
  default_db=default_db,
1601
1602
  default_schema=default_schema,
1603
+ default_dialect=default_dialect,
1602
1604
  )
1603
1605
  except Exception as e:
1604
1606
  return SqlParsingResult.make_from_error(e)
@@ -272,8 +272,11 @@ class SQLAlchemyQueryCombiner:
272
272
  self.report.uncombined_queries_issued += 1
273
273
  return _sa_execute_underlying_method(conn, query, *args, **kwargs)
274
274
 
275
- with _sa_execute_method_patching_lock, unittest.mock.patch(
276
- "sqlalchemy.engine.Connection.execute", _sa_execute_fake
275
+ with (
276
+ _sa_execute_method_patching_lock,
277
+ unittest.mock.patch(
278
+ "sqlalchemy.engine.Connection.execute", _sa_execute_fake
279
+ ),
277
280
  ):
278
281
  yield self
279
282