acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.2.0.1rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (223) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/METADATA +2617 -2590
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/RECORD +223 -189
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/entry_points.txt +2 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/dataset/dataset.py +1 -1
  6. datahub/api/entities/external/__init__.py +0 -0
  7. datahub/api/entities/external/external_entities.py +239 -0
  8. datahub/api/entities/external/external_tag.py +145 -0
  9. datahub/api/entities/external/lake_formation_external_entites.py +161 -0
  10. datahub/api/entities/external/restricted_text.py +247 -0
  11. datahub/api/entities/external/unity_catalog_external_entites.py +173 -0
  12. datahub/cli/check_cli.py +88 -7
  13. datahub/cli/cli_utils.py +63 -0
  14. datahub/cli/container_cli.py +5 -0
  15. datahub/cli/delete_cli.py +124 -27
  16. datahub/cli/docker_check.py +107 -12
  17. datahub/cli/docker_cli.py +149 -227
  18. datahub/cli/exists_cli.py +0 -2
  19. datahub/cli/get_cli.py +0 -2
  20. datahub/cli/iceberg_cli.py +5 -0
  21. datahub/cli/ingest_cli.py +3 -15
  22. datahub/cli/migrate.py +2 -0
  23. datahub/cli/put_cli.py +1 -4
  24. datahub/cli/quickstart_versioning.py +50 -7
  25. datahub/cli/specific/assertions_cli.py +0 -4
  26. datahub/cli/specific/datacontract_cli.py +0 -3
  27. datahub/cli/specific/dataproduct_cli.py +0 -11
  28. datahub/cli/specific/dataset_cli.py +1 -8
  29. datahub/cli/specific/forms_cli.py +0 -4
  30. datahub/cli/specific/group_cli.py +0 -2
  31. datahub/cli/specific/structuredproperties_cli.py +1 -4
  32. datahub/cli/specific/user_cli.py +0 -2
  33. datahub/cli/state_cli.py +0 -2
  34. datahub/cli/timeline_cli.py +0 -2
  35. datahub/configuration/pydantic_migration_helpers.py +7 -5
  36. datahub/emitter/rest_emitter.py +70 -12
  37. datahub/entrypoints.py +4 -3
  38. datahub/ingestion/api/decorators.py +15 -3
  39. datahub/ingestion/api/report.py +332 -3
  40. datahub/ingestion/api/sink.py +3 -0
  41. datahub/ingestion/api/source.py +48 -44
  42. datahub/ingestion/autogenerated/__init__.py +0 -0
  43. datahub/ingestion/autogenerated/capability_summary.json +3449 -0
  44. datahub/ingestion/autogenerated/lineage.json +401 -0
  45. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  46. datahub/ingestion/extractor/schema_util.py +13 -4
  47. datahub/ingestion/glossary/classification_mixin.py +5 -0
  48. datahub/ingestion/graph/client.py +100 -15
  49. datahub/ingestion/graph/config.py +1 -0
  50. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +20 -10
  51. datahub/ingestion/run/pipeline.py +54 -2
  52. datahub/ingestion/sink/datahub_rest.py +13 -0
  53. datahub/ingestion/source/abs/source.py +1 -1
  54. datahub/ingestion/source/aws/aws_common.py +4 -0
  55. datahub/ingestion/source/aws/glue.py +489 -244
  56. datahub/ingestion/source/aws/tag_entities.py +292 -0
  57. datahub/ingestion/source/azure/azure_common.py +2 -2
  58. datahub/ingestion/source/bigquery_v2/bigquery.py +50 -23
  59. datahub/ingestion/source/bigquery_v2/bigquery_config.py +1 -1
  60. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +1 -0
  61. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +2 -0
  62. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  63. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  64. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  65. datahub/ingestion/source/cassandra/cassandra.py +1 -1
  66. datahub/ingestion/source/cassandra/cassandra_profiling.py +6 -5
  67. datahub/ingestion/source/common/subtypes.py +45 -0
  68. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  69. datahub/ingestion/source/data_lake_common/path_spec.py +10 -21
  70. datahub/ingestion/source/datahub/datahub_database_reader.py +1 -2
  71. datahub/ingestion/source/dbt/dbt_cloud.py +10 -2
  72. datahub/ingestion/source/dbt/dbt_common.py +6 -2
  73. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  74. datahub/ingestion/source/debug/__init__.py +0 -0
  75. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  76. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  77. datahub/ingestion/source/dremio/dremio_config.py +2 -0
  78. datahub/ingestion/source/dremio/dremio_reporting.py +23 -2
  79. datahub/ingestion/source/dremio/dremio_source.py +94 -81
  80. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  81. datahub/ingestion/source/file.py +3 -0
  82. datahub/ingestion/source/fivetran/fivetran.py +34 -26
  83. datahub/ingestion/source/gcs/gcs_source.py +13 -2
  84. datahub/ingestion/source/ge_data_profiler.py +76 -28
  85. datahub/ingestion/source/ge_profiling_config.py +11 -0
  86. datahub/ingestion/source/hex/api.py +26 -1
  87. datahub/ingestion/source/iceberg/iceberg.py +3 -1
  88. datahub/ingestion/source/identity/azure_ad.py +1 -1
  89. datahub/ingestion/source/identity/okta.py +1 -14
  90. datahub/ingestion/source/kafka/kafka.py +16 -0
  91. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  92. datahub/ingestion/source/kafka_connect/source_connectors.py +59 -4
  93. datahub/ingestion/source/looker/looker_source.py +1 -0
  94. datahub/ingestion/source/mlflow.py +11 -1
  95. datahub/ingestion/source/mock_data/__init__.py +0 -0
  96. datahub/ingestion/source/mock_data/datahub_mock_data.py +507 -0
  97. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  98. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  99. datahub/ingestion/source/nifi.py +1 -1
  100. datahub/ingestion/source/powerbi/powerbi.py +1 -5
  101. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  102. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  103. datahub/ingestion/source/preset.py +2 -2
  104. datahub/ingestion/source/qlik_sense/qlik_sense.py +1 -0
  105. datahub/ingestion/source/redshift/redshift.py +21 -1
  106. datahub/ingestion/source/redshift/usage.py +4 -3
  107. datahub/ingestion/source/s3/report.py +4 -2
  108. datahub/ingestion/source/s3/source.py +367 -115
  109. datahub/ingestion/source/sac/sac.py +3 -1
  110. datahub/ingestion/source/salesforce.py +6 -3
  111. datahub/ingestion/source/sigma/sigma.py +7 -1
  112. datahub/ingestion/source/slack/slack.py +2 -1
  113. datahub/ingestion/source/snowflake/snowflake_config.py +30 -7
  114. datahub/ingestion/source/snowflake/snowflake_queries.py +348 -82
  115. datahub/ingestion/source/snowflake/snowflake_summary.py +5 -0
  116. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  117. datahub/ingestion/source/snowflake/snowflake_utils.py +2 -7
  118. datahub/ingestion/source/snowflake/snowflake_v2.py +16 -2
  119. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  120. datahub/ingestion/source/sql/athena.py +119 -11
  121. datahub/ingestion/source/sql/athena_properties_extractor.py +777 -0
  122. datahub/ingestion/source/sql/clickhouse.py +3 -1
  123. datahub/ingestion/source/sql/cockroachdb.py +0 -1
  124. datahub/ingestion/source/sql/hana.py +3 -1
  125. datahub/ingestion/source/sql/hive_metastore.py +3 -11
  126. datahub/ingestion/source/sql/mariadb.py +0 -1
  127. datahub/ingestion/source/sql/mssql/source.py +239 -34
  128. datahub/ingestion/source/sql/mysql.py +0 -1
  129. datahub/ingestion/source/sql/oracle.py +1 -1
  130. datahub/ingestion/source/sql/postgres.py +0 -1
  131. datahub/ingestion/source/sql/sql_common.py +121 -34
  132. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  133. datahub/ingestion/source/sql/teradata.py +997 -235
  134. datahub/ingestion/source/sql/vertica.py +10 -6
  135. datahub/ingestion/source/sql_queries.py +2 -2
  136. datahub/ingestion/source/state/stateful_ingestion_base.py +1 -1
  137. datahub/ingestion/source/superset.py +58 -3
  138. datahub/ingestion/source/tableau/tableau.py +58 -37
  139. datahub/ingestion/source/tableau/tableau_common.py +4 -2
  140. datahub/ingestion/source/tableau/tableau_constant.py +0 -4
  141. datahub/ingestion/source/unity/config.py +5 -0
  142. datahub/ingestion/source/unity/proxy.py +118 -0
  143. datahub/ingestion/source/unity/source.py +195 -17
  144. datahub/ingestion/source/unity/tag_entities.py +295 -0
  145. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  146. datahub/ingestion/source/usage/starburst_trino_usage.py +3 -0
  147. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  148. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  149. datahub/metadata/_internal_schema_classes.py +1522 -569
  150. datahub/metadata/_urns/urn_defs.py +1826 -1658
  151. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  152. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  153. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  154. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +4 -0
  155. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +29 -0
  156. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +4 -0
  157. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +25 -0
  158. datahub/metadata/schema.avsc +17758 -17097
  159. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  160. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  161. datahub/metadata/schemas/Applications.avsc +38 -0
  162. datahub/metadata/schemas/ChartKey.avsc +1 -0
  163. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  164. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  165. datahub/metadata/schemas/CorpUserSettings.avsc +41 -0
  166. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  167. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  168. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  169. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  170. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +237 -0
  171. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  172. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +175 -0
  173. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  174. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  175. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  176. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  177. datahub/metadata/schemas/DataProductKey.avsc +1 -0
  178. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  179. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  180. datahub/metadata/schemas/GlobalSettingsInfo.avsc +62 -0
  181. datahub/metadata/schemas/GlossaryTermKey.avsc +1 -0
  182. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  183. datahub/metadata/schemas/LogicalParent.avsc +140 -0
  184. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  185. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  186. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  187. datahub/metadata/schemas/MLModelGroupKey.avsc +9 -0
  188. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  189. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  190. datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -1
  191. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  192. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  193. datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
  194. datahub/metadata/schemas/__init__.py +3 -3
  195. datahub/sdk/__init__.py +2 -0
  196. datahub/sdk/_all_entities.py +7 -0
  197. datahub/sdk/_shared.py +116 -0
  198. datahub/sdk/chart.py +315 -0
  199. datahub/sdk/container.py +7 -0
  200. datahub/sdk/dashboard.py +432 -0
  201. datahub/sdk/dataflow.py +7 -0
  202. datahub/sdk/datajob.py +45 -13
  203. datahub/sdk/dataset.py +8 -2
  204. datahub/sdk/entity_client.py +82 -2
  205. datahub/sdk/lineage_client.py +683 -82
  206. datahub/sdk/main_client.py +46 -16
  207. datahub/sdk/mlmodel.py +101 -38
  208. datahub/sdk/mlmodelgroup.py +7 -0
  209. datahub/sdk/search_client.py +4 -3
  210. datahub/sdk/search_filters.py +95 -27
  211. datahub/specific/chart.py +1 -1
  212. datahub/specific/dataproduct.py +4 -0
  213. datahub/sql_parsing/sql_parsing_aggregator.py +29 -17
  214. datahub/sql_parsing/sqlglot_lineage.py +62 -13
  215. datahub/telemetry/telemetry.py +17 -11
  216. datahub/testing/sdk_v2_helpers.py +7 -1
  217. datahub/upgrade/upgrade.py +56 -14
  218. datahub/utilities/server_config_util.py +8 -0
  219. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  220. datahub/utilities/stats_collections.py +4 -0
  221. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/WHEEL +0 -0
  222. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/licenses/LICENSE +0 -0
  223. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.2.0.1rc1.dist-info}/top_level.txt +0 -0
@@ -54,6 +54,7 @@ from datahub.ingestion.source.common.data_reader import DataReader
54
54
  from datahub.ingestion.source.common.subtypes import (
55
55
  DatasetContainerSubTypes,
56
56
  DatasetSubTypes,
57
+ SourceCapabilityModifier,
57
58
  )
58
59
  from datahub.ingestion.source.sql.sql_config import SQLCommonConfig
59
60
  from datahub.ingestion.source.sql.sql_report import SQLSourceReport
@@ -76,33 +77,36 @@ from datahub.ingestion.source.state.stale_entity_removal_handler import (
76
77
  from datahub.ingestion.source.state.stateful_ingestion_base import (
77
78
  StatefulIngestionSourceBase,
78
79
  )
79
- from datahub.metadata.com.linkedin.pegasus2avro.common import StatusClass
80
- from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot
81
- from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
82
- from datahub.metadata.com.linkedin.pegasus2avro.schema import (
80
+ from datahub.metadata.schema_classes import (
83
81
  ArrayTypeClass,
84
82
  BooleanTypeClass,
85
83
  BytesTypeClass,
84
+ DataPlatformInstanceClass,
85
+ DatasetLineageTypeClass,
86
+ DatasetPropertiesClass,
87
+ DatasetSnapshotClass,
86
88
  DateTypeClass,
87
89
  EnumTypeClass,
88
- ForeignKeyConstraint,
89
- MySqlDDL,
90
+ FineGrainedLineageClass,
91
+ FineGrainedLineageDownstreamTypeClass,
92
+ FineGrainedLineageUpstreamTypeClass,
93
+ ForeignKeyConstraintClass,
94
+ GlobalTagsClass,
95
+ MetadataChangeEventClass,
96
+ MySqlDDLClass,
90
97
  NullTypeClass,
91
98
  NumberTypeClass,
92
99
  RecordTypeClass,
93
- SchemaField,
94
- SchemaFieldDataType,
95
- SchemaMetadata,
100
+ SchemaFieldClass,
101
+ SchemaFieldDataTypeClass,
102
+ SchemaMetadataClass,
103
+ StatusClass,
96
104
  StringTypeClass,
97
- TimeTypeClass,
98
- )
99
- from datahub.metadata.schema_classes import (
100
- DataPlatformInstanceClass,
101
- DatasetLineageTypeClass,
102
- DatasetPropertiesClass,
103
- GlobalTagsClass,
104
105
  SubTypesClass,
105
106
  TagAssociationClass,
107
+ TimeTypeClass,
108
+ UpstreamClass,
109
+ UpstreamLineageClass,
106
110
  ViewPropertiesClass,
107
111
  )
108
112
  from datahub.sql_parsing.schema_resolver import SchemaResolver
@@ -112,6 +116,7 @@ from datahub.utilities.registries.domain_registry import DomainRegistry
112
116
  from datahub.utilities.sqlalchemy_type_converter import (
113
117
  get_native_data_type_for_sqlalchemy_type,
114
118
  )
119
+ from datahub.utilities.urns.field_paths import get_simple_field_path_from_v2_field_path
115
120
 
116
121
  if TYPE_CHECKING:
117
122
  from datahub.ingestion.source.ge_data_profiler import (
@@ -198,7 +203,7 @@ def make_sqlalchemy_type(name: str) -> Type[TypeEngine]:
198
203
 
199
204
  def get_column_type(
200
205
  sql_report: SQLSourceReport, dataset_name: str, column_type: Any
201
- ) -> SchemaFieldDataType:
206
+ ) -> SchemaFieldDataTypeClass:
202
207
  """
203
208
  Maps SQLAlchemy types (https://docs.sqlalchemy.org/en/13/core/type_basics.html) to corresponding schema types
204
209
  """
@@ -223,7 +228,7 @@ def get_column_type(
223
228
  )
224
229
  TypeClass = NullTypeClass
225
230
 
226
- return SchemaFieldDataType(type=TypeClass())
231
+ return SchemaFieldDataTypeClass(type=TypeClass())
227
232
 
228
233
 
229
234
  def get_schema_metadata(
@@ -232,10 +237,10 @@ def get_schema_metadata(
232
237
  platform: str,
233
238
  columns: List[dict],
234
239
  pk_constraints: Optional[dict] = None,
235
- foreign_keys: Optional[List[ForeignKeyConstraint]] = None,
236
- canonical_schema: Optional[List[SchemaField]] = None,
240
+ foreign_keys: Optional[List[ForeignKeyConstraintClass]] = None,
241
+ canonical_schema: Optional[List[SchemaFieldClass]] = None,
237
242
  simplify_nested_field_paths: bool = False,
238
- ) -> SchemaMetadata:
243
+ ) -> SchemaMetadataClass:
239
244
  if (
240
245
  simplify_nested_field_paths
241
246
  and canonical_schema is not None
@@ -243,12 +248,12 @@ def get_schema_metadata(
243
248
  ):
244
249
  canonical_schema = downgrade_schema_from_v2(canonical_schema)
245
250
 
246
- schema_metadata = SchemaMetadata(
251
+ schema_metadata = SchemaMetadataClass(
247
252
  schemaName=dataset_name,
248
253
  platform=make_data_platform_urn(platform),
249
254
  version=0,
250
255
  hash="",
251
- platformSchema=MySqlDDL(tableSchema=""),
256
+ platformSchema=MySqlDDLClass(tableSchema=""),
252
257
  fields=canonical_schema or [],
253
258
  )
254
259
  if foreign_keys is not None and foreign_keys != []:
@@ -287,6 +292,10 @@ class ProfileMetadata:
287
292
  SourceCapability.CONTAINERS,
288
293
  "Enabled by default",
289
294
  supported=True,
295
+ subtype_modifier=[
296
+ SourceCapabilityModifier.DATABASE,
297
+ SourceCapabilityModifier.SCHEMA,
298
+ ],
290
299
  )
291
300
  @capability(
292
301
  SourceCapability.DESCRIPTIONS,
@@ -298,6 +307,20 @@ class ProfileMetadata:
298
307
  "Enabled by default",
299
308
  supported=True,
300
309
  )
310
+ @capability(
311
+ SourceCapability.LINEAGE_COARSE,
312
+ "Enabled by default to get lineage for views via `include_view_lineage`",
313
+ subtype_modifier=[SourceCapabilityModifier.VIEW],
314
+ )
315
+ @capability(
316
+ SourceCapability.LINEAGE_FINE,
317
+ "Enabled by default to get lineage for views via `include_view_column_lineage`",
318
+ subtype_modifier=[SourceCapabilityModifier.VIEW],
319
+ )
320
+ @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
321
+ @capability(
322
+ SourceCapability.DELETION_DETECTION, "Enabled by default via stateful ingestion"
323
+ )
301
324
  class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
302
325
  """A Base class for all SQL Sources that use SQLAlchemy to extend"""
303
326
 
@@ -570,6 +593,10 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
570
593
  )
571
594
 
572
595
  # Generate workunit for aggregated SQL parsing results
596
+ yield from self._generate_aggregator_workunits()
597
+
598
+ def _generate_aggregator_workunits(self) -> Iterable[MetadataWorkUnit]:
599
+ """Generate work units from SQL parsing aggregator. Can be overridden by subclasses."""
573
600
  for mcp in self.aggregator.gen_metadata():
574
601
  yield mcp.as_workunit()
575
602
 
@@ -590,7 +617,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
590
617
  schema: str,
591
618
  fk_dict: Dict[str, str],
592
619
  inspector: Inspector,
593
- ) -> ForeignKeyConstraint:
620
+ ) -> ForeignKeyConstraintClass:
594
621
  referred_schema: Optional[str] = fk_dict.get("referred_schema")
595
622
 
596
623
  if not referred_schema:
@@ -617,7 +644,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
617
644
  for f in fk_dict["referred_columns"]
618
645
  ]
619
646
 
620
- return ForeignKeyConstraint(
647
+ return ForeignKeyConstraintClass(
621
648
  fk_dict["name"], foreign_fields, source_fields, foreign_dataset
622
649
  )
623
650
 
@@ -714,7 +741,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
714
741
  self.config.platform_instance,
715
742
  self.config.env,
716
743
  )
717
- dataset_snapshot = DatasetSnapshot(
744
+ dataset_snapshot = DatasetSnapshotClass(
718
745
  urn=dataset_urn,
719
746
  aspects=[StatusClass(removed=False)],
720
747
  )
@@ -742,6 +769,30 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
742
769
  tags=extra_tags,
743
770
  partition_keys=partitions,
744
771
  )
772
+
773
+ if self.config.include_table_location_lineage and location_urn:
774
+ self.aggregator.add_known_lineage_mapping(
775
+ upstream_urn=location_urn,
776
+ downstream_urn=dataset_snapshot.urn,
777
+ lineage_type=DatasetLineageTypeClass.COPY,
778
+ )
779
+ external_upstream_table = UpstreamClass(
780
+ dataset=location_urn,
781
+ type=DatasetLineageTypeClass.COPY,
782
+ )
783
+
784
+ yield MetadataChangeProposalWrapper(
785
+ entityUrn=dataset_snapshot.urn,
786
+ aspect=UpstreamLineageClass(
787
+ upstreams=[external_upstream_table],
788
+ fineGrainedLineages=self.get_fine_grained_lineages(
789
+ dataset_urn=dataset_snapshot.urn,
790
+ upstream_dataset_urn=location_urn,
791
+ schema_fields=schema_fields,
792
+ ),
793
+ ),
794
+ ).as_workunit()
795
+
745
796
  schema_metadata = get_schema_metadata(
746
797
  self.report,
747
798
  dataset_name,
@@ -762,7 +813,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
762
813
  yield from self.add_table_to_schema_container(
763
814
  dataset_urn=dataset_urn, db_name=db_name, schema=schema
764
815
  )
765
- mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
816
+ mce = MetadataChangeEventClass(proposedSnapshot=dataset_snapshot)
766
817
  yield SqlWorkUnit(id=dataset_name, mce=mce)
767
818
  dpi_aspect = self.get_dataplatform_instance_aspect(dataset_urn=dataset_urn)
768
819
  if dpi_aspect:
@@ -797,7 +848,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
797
848
  schema: str,
798
849
  table: str,
799
850
  data_reader: Optional[DataReader],
800
- schema_metadata: SchemaMetadata,
851
+ schema_metadata: SchemaMetadataClass,
801
852
  ) -> None:
802
853
  try:
803
854
  if (
@@ -908,7 +959,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
908
959
 
909
960
  def _get_foreign_keys(
910
961
  self, dataset_urn: str, inspector: Inspector, schema: str, table: str
911
- ) -> List[ForeignKeyConstraint]:
962
+ ) -> List[ForeignKeyConstraintClass]:
912
963
  try:
913
964
  foreign_keys = [
914
965
  self.get_foreign_key_metadata(dataset_urn, schema, fk_rec, inspector)
@@ -922,6 +973,42 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
922
973
  foreign_keys = []
923
974
  return foreign_keys
924
975
 
976
+ def get_fine_grained_lineages(
977
+ self,
978
+ dataset_urn: str,
979
+ upstream_dataset_urn: str,
980
+ schema_fields: List[SchemaFieldClass],
981
+ ) -> Optional[List[FineGrainedLineageClass]]:
982
+ fine_grained_lineages: List[FineGrainedLineageClass] = []
983
+
984
+ for schema_field in schema_fields:
985
+ try:
986
+ field_path_v1 = get_simple_field_path_from_v2_field_path(
987
+ schema_field.fieldPath
988
+ )
989
+ fine_grained_lineages.append(
990
+ FineGrainedLineageClass(
991
+ downstreamType=FineGrainedLineageDownstreamTypeClass.FIELD,
992
+ downstreams=[make_schema_field_urn(dataset_urn, field_path_v1)],
993
+ upstreamType=FineGrainedLineageUpstreamTypeClass.FIELD_SET,
994
+ upstreams=[
995
+ make_schema_field_urn(
996
+ upstream_dataset_urn,
997
+ get_simple_field_path_from_v2_field_path(
998
+ schema_field.fieldPath
999
+ ),
1000
+ )
1001
+ ],
1002
+ )
1003
+ )
1004
+ except Exception as e:
1005
+ logger.warning(
1006
+ f"Error processing field path for {dataset_urn}: {str(e)}"
1007
+ )
1008
+ continue
1009
+
1010
+ return fine_grained_lineages if fine_grained_lineages else None
1011
+
925
1012
  def get_schema_fields(
926
1013
  self,
927
1014
  dataset_name: str,
@@ -930,7 +1017,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
930
1017
  pk_constraints: Optional[dict] = None,
931
1018
  partition_keys: Optional[List[str]] = None,
932
1019
  tags: Optional[Dict[str, List[str]]] = None,
933
- ) -> List[SchemaField]:
1020
+ ) -> List[SchemaFieldClass]:
934
1021
  canonical_schema = []
935
1022
  for column in columns:
936
1023
  column_tags: Optional[List[str]] = None
@@ -955,14 +1042,14 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
955
1042
  pk_constraints: Optional[dict] = None,
956
1043
  partition_keys: Optional[List[str]] = None,
957
1044
  tags: Optional[List[str]] = None,
958
- ) -> List[SchemaField]:
1045
+ ) -> List[SchemaFieldClass]:
959
1046
  gtc: Optional[GlobalTagsClass] = None
960
1047
  if tags:
961
1048
  tags_str = [make_tag_urn(t) for t in tags]
962
1049
  tags_tac = [TagAssociationClass(t) for t in tags_str]
963
1050
  gtc = GlobalTagsClass(tags_tac)
964
1051
  full_type = column.get("full_type")
965
- field = SchemaField(
1052
+ field = SchemaFieldClass(
966
1053
  fieldPath=column["name"],
967
1054
  type=get_column_type(self.report, dataset_name, column["type"]),
968
1055
  nativeDataType=(
@@ -1092,7 +1179,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
1092
1179
  default_schema=default_schema,
1093
1180
  )
1094
1181
 
1095
- dataset_snapshot = DatasetSnapshot(
1182
+ dataset_snapshot = DatasetSnapshotClass(
1096
1183
  urn=dataset_urn,
1097
1184
  aspects=[StatusClass(removed=False)],
1098
1185
  )
@@ -1111,7 +1198,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
1111
1198
  dataset_snapshot.aspects.append(dataset_properties)
1112
1199
  if schema_metadata:
1113
1200
  dataset_snapshot.aspects.append(schema_metadata)
1114
- mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
1201
+ mce = MetadataChangeEventClass(proposedSnapshot=dataset_snapshot)
1115
1202
  yield SqlWorkUnit(id=dataset_name, mce=mce)
1116
1203
  dpi_aspect = self.get_dataplatform_instance_aspect(dataset_urn=dataset_urn)
1117
1204
  if dpi_aspect:
@@ -57,10 +57,11 @@ class GenericProfiler:
57
57
  platform: Optional[str] = None,
58
58
  profiler_args: Optional[Dict] = None,
59
59
  ) -> Iterable[MetadataWorkUnit]:
60
+ # We don't run ge profiling queries if table profiling is enabled or if the row count is 0.
60
61
  ge_profile_requests: List[GEProfilerRequest] = [
61
62
  cast(GEProfilerRequest, request)
62
63
  for request in requests
63
- if not request.profile_table_level_only
64
+ if not request.profile_table_level_only or request.table.rows_count == 0
64
65
  ]
65
66
  table_level_profile_requests: List[TableProfilerRequest] = [
66
67
  request for request in requests if request.profile_table_level_only