acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (414) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/assertion/assertion.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +1 -1
  7. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  8. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  9. datahub/api/entities/dataset/dataset.py +26 -23
  10. datahub/api/entities/external/__init__.py +0 -0
  11. datahub/api/entities/external/external_entities.py +724 -0
  12. datahub/api/entities/external/external_tag.py +147 -0
  13. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  14. datahub/api/entities/external/restricted_text.py +172 -0
  15. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  16. datahub/api/entities/forms/forms.py +3 -3
  17. datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
  18. datahub/api/graphql/operation.py +10 -6
  19. datahub/cli/check_cli.py +88 -7
  20. datahub/cli/cli_utils.py +63 -0
  21. datahub/cli/config_utils.py +18 -10
  22. datahub/cli/container_cli.py +5 -0
  23. datahub/cli/delete_cli.py +125 -27
  24. datahub/cli/docker_check.py +110 -14
  25. datahub/cli/docker_cli.py +153 -229
  26. datahub/cli/exists_cli.py +0 -2
  27. datahub/cli/get_cli.py +0 -2
  28. datahub/cli/graphql_cli.py +1422 -0
  29. datahub/cli/iceberg_cli.py +5 -0
  30. datahub/cli/ingest_cli.py +3 -15
  31. datahub/cli/migrate.py +2 -0
  32. datahub/cli/put_cli.py +1 -4
  33. datahub/cli/quickstart_versioning.py +53 -10
  34. datahub/cli/specific/assertions_cli.py +37 -6
  35. datahub/cli/specific/datacontract_cli.py +54 -7
  36. datahub/cli/specific/dataproduct_cli.py +2 -15
  37. datahub/cli/specific/dataset_cli.py +1 -8
  38. datahub/cli/specific/forms_cli.py +0 -4
  39. datahub/cli/specific/group_cli.py +0 -2
  40. datahub/cli/specific/structuredproperties_cli.py +1 -4
  41. datahub/cli/specific/user_cli.py +172 -3
  42. datahub/cli/state_cli.py +0 -2
  43. datahub/cli/timeline_cli.py +0 -2
  44. datahub/configuration/common.py +40 -1
  45. datahub/configuration/connection_resolver.py +5 -2
  46. datahub/configuration/env_vars.py +331 -0
  47. datahub/configuration/import_resolver.py +7 -4
  48. datahub/configuration/kafka.py +21 -1
  49. datahub/configuration/pydantic_migration_helpers.py +6 -13
  50. datahub/configuration/source_common.py +3 -2
  51. datahub/configuration/validate_field_deprecation.py +5 -2
  52. datahub/configuration/validate_field_removal.py +8 -2
  53. datahub/configuration/validate_field_rename.py +6 -5
  54. datahub/configuration/validate_multiline_string.py +5 -2
  55. datahub/emitter/mce_builder.py +8 -4
  56. datahub/emitter/rest_emitter.py +103 -30
  57. datahub/entrypoints.py +6 -3
  58. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
  59. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  60. datahub/ingestion/api/decorators.py +15 -3
  61. datahub/ingestion/api/report.py +381 -3
  62. datahub/ingestion/api/sink.py +27 -2
  63. datahub/ingestion/api/source.py +165 -58
  64. datahub/ingestion/api/source_protocols.py +23 -0
  65. datahub/ingestion/autogenerated/__init__.py +0 -0
  66. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  67. datahub/ingestion/autogenerated/lineage.json +402 -0
  68. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  69. datahub/ingestion/extractor/schema_util.py +13 -4
  70. datahub/ingestion/glossary/classification_mixin.py +5 -0
  71. datahub/ingestion/graph/client.py +330 -25
  72. datahub/ingestion/graph/config.py +3 -2
  73. datahub/ingestion/graph/filters.py +30 -11
  74. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  75. datahub/ingestion/run/pipeline.py +81 -11
  76. datahub/ingestion/run/pipeline_config.py +2 -2
  77. datahub/ingestion/sink/datahub_kafka.py +1 -0
  78. datahub/ingestion/sink/datahub_rest.py +13 -5
  79. datahub/ingestion/sink/file.py +1 -0
  80. datahub/ingestion/source/abs/config.py +1 -1
  81. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  82. datahub/ingestion/source/abs/source.py +15 -30
  83. datahub/ingestion/source/aws/aws_common.py +185 -13
  84. datahub/ingestion/source/aws/glue.py +517 -244
  85. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  86. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  87. datahub/ingestion/source/aws/tag_entities.py +270 -0
  88. datahub/ingestion/source/azure/azure_common.py +3 -3
  89. datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
  90. datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
  91. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
  92. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
  93. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  94. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  95. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  96. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  97. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  98. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  99. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  100. datahub/ingestion/source/cassandra/cassandra.py +6 -8
  101. datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
  102. datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
  103. datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
  104. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  105. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  106. datahub/ingestion/source/common/subtypes.py +53 -0
  107. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  108. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  109. datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
  110. datahub/ingestion/source/datahub/config.py +12 -9
  111. datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
  112. datahub/ingestion/source/datahub/datahub_source.py +10 -0
  113. datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
  114. datahub/ingestion/source/dbt/dbt_common.py +224 -9
  115. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  116. datahub/ingestion/source/debug/__init__.py +0 -0
  117. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  118. datahub/ingestion/source/delta_lake/config.py +9 -5
  119. datahub/ingestion/source/delta_lake/source.py +8 -0
  120. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  121. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  122. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  123. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  124. datahub/ingestion/source/dremio/dremio_source.py +132 -98
  125. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  126. datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
  127. datahub/ingestion/source/excel/__init__.py +0 -0
  128. datahub/ingestion/source/excel/config.py +92 -0
  129. datahub/ingestion/source/excel/excel_file.py +539 -0
  130. datahub/ingestion/source/excel/profiling.py +308 -0
  131. datahub/ingestion/source/excel/report.py +49 -0
  132. datahub/ingestion/source/excel/source.py +662 -0
  133. datahub/ingestion/source/excel/util.py +18 -0
  134. datahub/ingestion/source/feast.py +8 -10
  135. datahub/ingestion/source/file.py +3 -0
  136. datahub/ingestion/source/fivetran/config.py +66 -7
  137. datahub/ingestion/source/fivetran/fivetran.py +227 -43
  138. datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
  139. datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
  140. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  141. datahub/ingestion/source/fivetran/response_models.py +97 -0
  142. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  143. datahub/ingestion/source/gcs/gcs_source.py +32 -4
  144. datahub/ingestion/source/ge_data_profiler.py +108 -31
  145. datahub/ingestion/source/ge_profiling_config.py +26 -11
  146. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  147. datahub/ingestion/source/grafana/field_utils.py +307 -0
  148. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  149. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  150. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  151. datahub/ingestion/source/grafana/lineage.py +202 -0
  152. datahub/ingestion/source/grafana/models.py +137 -0
  153. datahub/ingestion/source/grafana/report.py +90 -0
  154. datahub/ingestion/source/grafana/types.py +16 -0
  155. datahub/ingestion/source/hex/api.py +28 -1
  156. datahub/ingestion/source/hex/hex.py +16 -5
  157. datahub/ingestion/source/hex/mapper.py +16 -2
  158. datahub/ingestion/source/hex/model.py +2 -0
  159. datahub/ingestion/source/hex/query_fetcher.py +1 -1
  160. datahub/ingestion/source/iceberg/iceberg.py +123 -59
  161. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  162. datahub/ingestion/source/identity/azure_ad.py +1 -1
  163. datahub/ingestion/source/identity/okta.py +1 -14
  164. datahub/ingestion/source/kafka/kafka.py +16 -0
  165. datahub/ingestion/source/kafka_connect/common.py +2 -2
  166. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  167. datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
  168. datahub/ingestion/source/looker/looker_common.py +148 -79
  169. datahub/ingestion/source/looker/looker_config.py +15 -4
  170. datahub/ingestion/source/looker/looker_constant.py +4 -0
  171. datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
  172. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  173. datahub/ingestion/source/looker/looker_source.py +503 -547
  174. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  175. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  176. datahub/ingestion/source/looker/lookml_config.py +31 -3
  177. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  178. datahub/ingestion/source/looker/lookml_source.py +96 -117
  179. datahub/ingestion/source/looker/view_upstream.py +494 -1
  180. datahub/ingestion/source/metabase.py +32 -6
  181. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  182. datahub/ingestion/source/metadata/lineage.py +9 -9
  183. datahub/ingestion/source/mlflow.py +12 -2
  184. datahub/ingestion/source/mock_data/__init__.py +0 -0
  185. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  186. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  187. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  188. datahub/ingestion/source/mode.py +26 -5
  189. datahub/ingestion/source/mongodb.py +11 -1
  190. datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
  191. datahub/ingestion/source/nifi.py +2 -2
  192. datahub/ingestion/source/openapi.py +1 -1
  193. datahub/ingestion/source/powerbi/config.py +47 -21
  194. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  195. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  196. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
  197. datahub/ingestion/source/powerbi/powerbi.py +10 -6
  198. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  199. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  200. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  201. datahub/ingestion/source/preset.py +3 -3
  202. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  203. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  204. datahub/ingestion/source/redash.py +1 -1
  205. datahub/ingestion/source/redshift/config.py +15 -9
  206. datahub/ingestion/source/redshift/datashares.py +1 -1
  207. datahub/ingestion/source/redshift/lineage.py +386 -687
  208. datahub/ingestion/source/redshift/query.py +23 -19
  209. datahub/ingestion/source/redshift/redshift.py +52 -111
  210. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  211. datahub/ingestion/source/redshift/report.py +0 -2
  212. datahub/ingestion/source/redshift/usage.py +6 -5
  213. datahub/ingestion/source/s3/report.py +4 -2
  214. datahub/ingestion/source/s3/source.py +449 -248
  215. datahub/ingestion/source/sac/sac.py +3 -1
  216. datahub/ingestion/source/salesforce.py +28 -13
  217. datahub/ingestion/source/schema/json_schema.py +14 -14
  218. datahub/ingestion/source/schema_inference/object.py +22 -6
  219. datahub/ingestion/source/sigma/data_classes.py +3 -0
  220. datahub/ingestion/source/sigma/sigma.py +7 -1
  221. datahub/ingestion/source/slack/slack.py +10 -16
  222. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  223. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  224. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  225. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  226. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  227. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  228. datahub/ingestion/source/snowflake/constants.py +3 -0
  229. datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
  230. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
  231. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
  232. datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
  233. datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
  234. datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
  235. datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
  236. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
  237. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  238. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  239. datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
  240. datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
  241. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  242. datahub/ingestion/source/sql/athena.py +217 -25
  243. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  244. datahub/ingestion/source/sql/clickhouse.py +24 -8
  245. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  246. datahub/ingestion/source/sql/druid.py +2 -2
  247. datahub/ingestion/source/sql/hana.py +3 -1
  248. datahub/ingestion/source/sql/hive.py +4 -3
  249. datahub/ingestion/source/sql/hive_metastore.py +19 -20
  250. datahub/ingestion/source/sql/mariadb.py +0 -1
  251. datahub/ingestion/source/sql/mssql/job_models.py +3 -1
  252. datahub/ingestion/source/sql/mssql/source.py +336 -57
  253. datahub/ingestion/source/sql/mysql.py +154 -4
  254. datahub/ingestion/source/sql/oracle.py +5 -5
  255. datahub/ingestion/source/sql/postgres.py +142 -6
  256. datahub/ingestion/source/sql/presto.py +2 -1
  257. datahub/ingestion/source/sql/sql_common.py +281 -49
  258. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  259. datahub/ingestion/source/sql/sql_types.py +22 -0
  260. datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
  261. datahub/ingestion/source/sql/teradata.py +1028 -245
  262. datahub/ingestion/source/sql/trino.py +11 -1
  263. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  264. datahub/ingestion/source/sql/vertica.py +14 -7
  265. datahub/ingestion/source/sql_queries.py +219 -121
  266. datahub/ingestion/source/state/checkpoint.py +8 -29
  267. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  268. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  269. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  270. datahub/ingestion/source/superset.py +314 -67
  271. datahub/ingestion/source/tableau/tableau.py +135 -59
  272. datahub/ingestion/source/tableau/tableau_common.py +9 -2
  273. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  274. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  275. datahub/ingestion/source/unity/config.py +160 -40
  276. datahub/ingestion/source/unity/connection.py +61 -0
  277. datahub/ingestion/source/unity/connection_test.py +1 -0
  278. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  279. datahub/ingestion/source/unity/proxy.py +794 -51
  280. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  281. datahub/ingestion/source/unity/proxy_types.py +36 -2
  282. datahub/ingestion/source/unity/report.py +15 -3
  283. datahub/ingestion/source/unity/source.py +465 -131
  284. datahub/ingestion/source/unity/tag_entities.py +197 -0
  285. datahub/ingestion/source/unity/usage.py +46 -4
  286. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  287. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
  288. datahub/ingestion/source/usage/usage_common.py +4 -3
  289. datahub/ingestion/source/vertexai/vertexai.py +1 -1
  290. datahub/ingestion/source_config/pulsar.py +3 -1
  291. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  292. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  293. datahub/ingestion/transformer/base_transformer.py +8 -5
  294. datahub/ingestion/transformer/set_browse_path.py +112 -0
  295. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  296. datahub/metadata/_internal_schema_classes.py +6806 -4871
  297. datahub/metadata/_urns/urn_defs.py +1767 -1539
  298. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  299. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  300. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  301. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  302. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  303. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
  304. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  305. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  306. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  307. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  308. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  309. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  310. datahub/metadata/schema.avsc +18395 -16979
  311. datahub/metadata/schemas/Actors.avsc +38 -1
  312. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  313. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  314. datahub/metadata/schemas/Applications.avsc +38 -0
  315. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  316. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  317. datahub/metadata/schemas/ChartKey.avsc +1 -0
  318. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  319. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  320. datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  321. datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
  322. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  323. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  324. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  325. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  326. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  327. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  328. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  329. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  330. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  331. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  332. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  333. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  334. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  335. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  336. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  337. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  338. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  339. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  340. datahub/metadata/schemas/DomainKey.avsc +2 -1
  341. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  342. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  343. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  344. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  345. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  346. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  347. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  348. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  349. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  350. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  351. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  352. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  353. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  354. datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
  355. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  356. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  357. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  358. datahub/metadata/schemas/Operation.avsc +4 -2
  359. datahub/metadata/schemas/Ownership.avsc +69 -0
  360. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  361. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  362. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  363. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  364. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  365. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  366. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  367. datahub/sdk/__init__.py +2 -0
  368. datahub/sdk/_all_entities.py +7 -0
  369. datahub/sdk/_shared.py +249 -5
  370. datahub/sdk/chart.py +386 -0
  371. datahub/sdk/container.py +7 -0
  372. datahub/sdk/dashboard.py +453 -0
  373. datahub/sdk/dataflow.py +7 -0
  374. datahub/sdk/datajob.py +45 -13
  375. datahub/sdk/dataset.py +56 -2
  376. datahub/sdk/entity_client.py +111 -9
  377. datahub/sdk/lineage_client.py +663 -82
  378. datahub/sdk/main_client.py +50 -16
  379. datahub/sdk/mlmodel.py +120 -38
  380. datahub/sdk/mlmodelgroup.py +7 -0
  381. datahub/sdk/search_client.py +7 -3
  382. datahub/sdk/search_filters.py +304 -36
  383. datahub/secret/datahub_secret_store.py +3 -0
  384. datahub/secret/environment_secret_store.py +29 -0
  385. datahub/secret/file_secret_store.py +49 -0
  386. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  387. datahub/specific/aspect_helpers/siblings.py +73 -0
  388. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  389. datahub/specific/chart.py +1 -1
  390. datahub/specific/datajob.py +15 -1
  391. datahub/specific/dataproduct.py +4 -0
  392. datahub/specific/dataset.py +39 -59
  393. datahub/sql_parsing/split_statements.py +13 -0
  394. datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
  395. datahub/sql_parsing/sqlglot_lineage.py +196 -42
  396. datahub/sql_parsing/sqlglot_utils.py +12 -4
  397. datahub/sql_parsing/tool_meta_extractor.py +1 -3
  398. datahub/telemetry/telemetry.py +28 -14
  399. datahub/testing/sdk_v2_helpers.py +7 -1
  400. datahub/upgrade/upgrade.py +73 -17
  401. datahub/utilities/file_backed_collections.py +8 -9
  402. datahub/utilities/is_pytest.py +3 -2
  403. datahub/utilities/logging_manager.py +22 -6
  404. datahub/utilities/mapping.py +29 -2
  405. datahub/utilities/sample_data.py +5 -4
  406. datahub/utilities/server_config_util.py +10 -1
  407. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  408. datahub/utilities/stats_collections.py +4 -0
  409. datahub/utilities/urns/urn.py +41 -2
  410. datahub/emitter/sql_parsing_builder.py +0 -306
  411. datahub/ingestion/source/redshift/lineage_v2.py +0 -466
  412. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
  413. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
  414. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -13,6 +13,7 @@ from datahub.configuration.common import (
13
13
  from datahub.emitter.aspect import JSON_CONTENT_TYPE
14
14
  from datahub.emitter.mce_builder import datahub_guid, make_data_platform_urn
15
15
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
16
+ from datahub.emitter.rest_emitter import EmitMode
16
17
  from datahub.ingestion.api.common import PipelineContext, RecordEnvelope
17
18
  from datahub.ingestion.api.pipeline_run_listener import PipelineRunListener
18
19
  from datahub.ingestion.api.sink import NoopWriteCallback, Sink
@@ -111,6 +112,7 @@ class DatahubIngestionRunSummaryProvider(PipelineRunListener):
111
112
  def __init__(self, sink: Sink, report_recipe: bool, ctx: PipelineContext) -> None:
112
113
  assert ctx.pipeline_config is not None
113
114
 
115
+ self.ctx = ctx
114
116
  self.sink: Sink = sink
115
117
  self.report_recipe = report_recipe
116
118
  ingestion_source_key = self.generate_unique_key(ctx.pipeline_config)
@@ -191,18 +193,25 @@ class DatahubIngestionRunSummaryProvider(PipelineRunListener):
191
193
  )
192
194
  return json.dumps(converted_recipe)
193
195
 
194
- def _emit_aspect(self, entity_urn: Urn, aspect_value: _Aspect) -> None:
195
- self.sink.write_record_async(
196
- RecordEnvelope(
197
- record=MetadataChangeProposalWrapper(
198
- entityUrn=str(entity_urn),
199
- aspect=aspect_value,
200
- ),
201
- metadata={},
202
- ),
203
- NoopWriteCallback(),
196
+ def _emit_aspect(
197
+ self, entity_urn: Urn, aspect_value: _Aspect, try_sync: bool = False
198
+ ) -> None:
199
+ mcp = MetadataChangeProposalWrapper(
200
+ entityUrn=str(entity_urn),
201
+ aspect=aspect_value,
204
202
  )
205
203
 
204
+ if try_sync and self.ctx.graph:
205
+ self.ctx.graph.emit_mcp(mcp, emit_mode=EmitMode.SYNC_PRIMARY)
206
+ else:
207
+ self.sink.write_record_async(
208
+ RecordEnvelope(
209
+ record=mcp,
210
+ metadata={},
211
+ ),
212
+ NoopWriteCallback(),
213
+ )
214
+
206
215
  def on_start(self, ctx: PipelineContext) -> None:
207
216
  assert ctx.pipeline_config is not None
208
217
  # Construct the dataHubExecutionRequestInput aspect
@@ -223,6 +232,7 @@ class DatahubIngestionRunSummaryProvider(PipelineRunListener):
223
232
  self._emit_aspect(
224
233
  entity_urn=self.execution_request_input_urn,
225
234
  aspect_value=execution_input_aspect,
235
+ try_sync=True,
226
236
  )
227
237
 
228
238
  def on_completion(
@@ -258,4 +268,4 @@ class DatahubIngestionRunSummaryProvider(PipelineRunListener):
258
268
  entity_urn=self.execution_request_input_urn,
259
269
  aspect_value=execution_result_aspect,
260
270
  )
261
- self.sink.close()
271
+ # Note: sink.close() is handled by the pipeline's context manager
@@ -44,6 +44,10 @@ from datahub.ingestion.transformer.transform_registry import transform_registry
44
44
  from datahub.sdk._attribution import KnownAttribution, change_default_attribution
45
45
  from datahub.telemetry import stats
46
46
  from datahub.telemetry.telemetry import telemetry_instance
47
+ from datahub.upgrade.upgrade import (
48
+ is_server_default_cli_ahead,
49
+ retrieve_version_stats,
50
+ )
47
51
  from datahub.utilities._custom_package_loader import model_version_name
48
52
  from datahub.utilities.global_warning_util import (
49
53
  clear_global_warnings,
@@ -171,7 +175,10 @@ class Pipeline:
171
175
  self.last_time_printed = int(time.time())
172
176
  self.cli_report = CliReport()
173
177
 
174
- with contextlib.ExitStack() as exit_stack, contextlib.ExitStack() as inner_exit_stack:
178
+ with (
179
+ contextlib.ExitStack() as exit_stack,
180
+ contextlib.ExitStack() as inner_exit_stack,
181
+ ):
175
182
  self.graph: Optional[DataHubGraph] = None
176
183
  with _add_init_error_context("connect to DataHub"):
177
184
  if self.config.datahub_api:
@@ -258,6 +265,11 @@ class Pipeline:
258
265
  with _add_init_error_context("configure transformers"):
259
266
  self._configure_transforms()
260
267
 
268
+ # Register completion callback with sink to handle final reporting
269
+ self.sink.register_pre_shutdown_callback(
270
+ self._notify_reporters_on_ingestion_completion
271
+ )
272
+
261
273
  # If all of the initialization succeeds, we can preserve the exit stack until the pipeline run.
262
274
  # We need to use an exit stack so that if we have an exception during initialization,
263
275
  # things that were already initialized are still cleaned up.
@@ -337,8 +349,48 @@ class Pipeline:
337
349
  for reporter in self.reporters:
338
350
  try:
339
351
  reporter.on_start(ctx=self.ctx)
340
- except Exception as e:
341
- logger.warning("Reporting failed on start", exc_info=e)
352
+ except Exception:
353
+ logger.warning("Reporting failed on start", exc_info=True)
354
+
355
+ def _warn_old_cli_version(self) -> None:
356
+ """
357
+ Check if the server default CLI version is ahead of the CLI version being used.
358
+ If so, add a warning to the report.
359
+ """
360
+
361
+ try:
362
+ version_stats = retrieve_version_stats(timeout=2.0, graph=self.graph)
363
+ except RuntimeError as e:
364
+ # Handle case where there's no event loop available (e.g., in ThreadPoolExecutor)
365
+ if "no current event loop" in str(e):
366
+ logger.debug("Skipping version check - no event loop available")
367
+ return
368
+ raise
369
+
370
+ if not version_stats or not self.graph:
371
+ return
372
+
373
+ if is_server_default_cli_ahead(version_stats):
374
+ server_default_version = (
375
+ version_stats.server.current_server_default_cli_version.version
376
+ if version_stats.server.current_server_default_cli_version
377
+ else None
378
+ )
379
+ current_version = version_stats.client.current.version
380
+
381
+ logger.debug(
382
+ f"""
383
+ client_version: {current_version}
384
+ server_default_version: {server_default_version}
385
+ server_default_cli_ahead: True
386
+ """
387
+ )
388
+
389
+ self.source.get_report().warning(
390
+ title="Server default CLI version is ahead of CLI version",
391
+ message="Please upgrade the CLI version being used",
392
+ context=f"Server Default CLI version: {server_default_version}, Used CLI version: {current_version}",
393
+ )
342
394
 
343
395
  def _notify_reporters_on_ingestion_completion(self) -> None:
344
396
  for reporter in self.reporters:
@@ -360,8 +412,8 @@ class Pipeline:
360
412
  report=self._get_structured_report(),
361
413
  ctx=self.ctx,
362
414
  )
363
- except Exception as e:
364
- logger.warning("Reporting failed on completion", exc_info=e)
415
+ except Exception:
416
+ logger.warning("Reporting failed on completion", exc_info=True)
365
417
 
366
418
  @classmethod
367
419
  def create(
@@ -395,7 +447,20 @@ class Pipeline:
395
447
  return True
396
448
  return False
397
449
 
450
+ def _set_platform(self) -> None:
451
+ platform = self.source.infer_platform()
452
+ if platform:
453
+ self.source.get_report().set_platform(platform)
454
+ else:
455
+ self.source.get_report().warning(
456
+ message="Platform not found",
457
+ title="Platform not found",
458
+ context="Platform not found",
459
+ )
460
+
398
461
  def run(self) -> None:
462
+ self._set_platform()
463
+ self._warn_old_cli_version()
399
464
  with self.exit_stack, self.inner_exit_stack:
400
465
  if self.config.flags.generate_memory_profiles:
401
466
  import memray
@@ -461,10 +526,10 @@ class Pipeline:
461
526
 
462
527
  except (RuntimeError, SystemExit):
463
528
  raise
464
- except Exception as e:
529
+ except Exception:
465
530
  logger.error(
466
531
  "Failed to process some records. Continuing.",
467
- exc_info=e,
532
+ exc_info=True,
468
533
  )
469
534
  # TODO: Transformer errors should be reported more loudly / as part of the pipeline report.
470
535
 
@@ -493,9 +558,9 @@ class Pipeline:
493
558
 
494
559
  self.process_commits()
495
560
  self.final_status = PipelineStatus.COMPLETED
496
- except (SystemExit, KeyboardInterrupt) as e:
561
+ except (SystemExit, KeyboardInterrupt):
497
562
  self.final_status = PipelineStatus.CANCELLED
498
- logger.error("Caught error", exc_info=e)
563
+ logger.error("Caught error", exc_info=True)
499
564
  raise
500
565
  except Exception as exc:
501
566
  self.final_status = PipelineStatus.ERROR
@@ -503,8 +568,6 @@ class Pipeline:
503
568
  finally:
504
569
  clear_global_warnings()
505
570
 
506
- self._notify_reporters_on_ingestion_completion()
507
-
508
571
  def transform(self, records: Iterable[RecordEnvelope]) -> Iterable[RecordEnvelope]:
509
572
  """
510
573
  Transforms the given sequence of records by passing the records through the transformers
@@ -578,15 +641,22 @@ class Pipeline:
578
641
  sink_failures = len(self.sink.get_report().failures)
579
642
  sink_warnings = len(self.sink.get_report().warnings)
580
643
  global_warnings = len(get_global_warnings())
644
+ source_aspects = self.source.get_report().get_aspects_dict()
645
+ source_aspects_by_subtype = (
646
+ self.source.get_report().get_aspects_by_subtypes_dict()
647
+ )
581
648
 
582
649
  telemetry_instance.ping(
583
650
  "ingest_stats",
584
651
  {
585
652
  "source_type": self.source_type,
653
+ "source_aspects": source_aspects,
654
+ "source_aspects_by_subtype": source_aspects_by_subtype,
586
655
  "sink_type": self.sink_type,
587
656
  "transformer_types": [
588
657
  transformer.type for transformer in self.config.transformers or []
589
658
  ],
659
+ "extractor_type": self.config.source.extractor,
590
660
  "records_written": stats.discretize(
591
661
  self.sink.get_report().total_records_written
592
662
  ),
@@ -6,7 +6,7 @@ from typing import Any, Dict, List, Optional
6
6
 
7
7
  from pydantic import Field, validator
8
8
 
9
- from datahub.configuration.common import ConfigModel, DynamicTypedConfig
9
+ from datahub.configuration.common import ConfigModel, DynamicTypedConfig, HiddenFromDocs
10
10
  from datahub.ingestion.graph.config import DatahubClientConfig
11
11
  from datahub.ingestion.sink.file import FileSinkConfig
12
12
 
@@ -85,7 +85,7 @@ class PipelineConfig(ConfigModel):
85
85
  source: SourceConfig
86
86
  sink: Optional[DynamicTypedConfig] = None
87
87
  transformers: Optional[List[DynamicTypedConfig]] = None
88
- flags: FlagsConfig = Field(default=FlagsConfig(), hidden_from_docs=True)
88
+ flags: HiddenFromDocs[FlagsConfig] = FlagsConfig()
89
89
  reporting: List[ReporterConfig] = []
90
90
  run_id: str = DEFAULT_RUN_ID
91
91
  datahub_api: Optional[DatahubClientConfig] = None
@@ -74,4 +74,5 @@ class DatahubKafkaSink(Sink[KafkaSinkConfig, SinkReport]):
74
74
  callback(err, f"Failed to write record: {err}")
75
75
 
76
76
  def close(self) -> None:
77
+ super().close()
77
78
  self.emitter.flush()
@@ -3,7 +3,6 @@ import contextlib
3
3
  import dataclasses
4
4
  import functools
5
5
  import logging
6
- import os
7
6
  import threading
8
7
  import uuid
9
8
  from enum import auto
@@ -16,6 +15,10 @@ from datahub.configuration.common import (
16
15
  ConfigurationError,
17
16
  OperationalError,
18
17
  )
18
+ from datahub.configuration.env_vars import (
19
+ get_rest_sink_default_max_threads,
20
+ get_rest_sink_default_mode,
21
+ )
19
22
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
20
23
  from datahub.emitter.mcp_builder import mcps_from_mce
21
24
  from datahub.emitter.rest_emitter import (
@@ -47,9 +50,7 @@ from datahub.utilities.server_config_util import set_gms_config
47
50
 
48
51
  logger = logging.getLogger(__name__)
49
52
 
50
- _DEFAULT_REST_SINK_MAX_THREADS = int(
51
- os.getenv("DATAHUB_REST_SINK_DEFAULT_MAX_THREADS", 15)
52
- )
53
+ _DEFAULT_REST_SINK_MAX_THREADS = get_rest_sink_default_max_threads()
53
54
 
54
55
 
55
56
  class RestSinkMode(ConfigEnum):
@@ -63,13 +64,14 @@ class RestSinkMode(ConfigEnum):
63
64
 
64
65
 
65
66
  _DEFAULT_REST_SINK_MODE = pydantic.parse_obj_as(
66
- RestSinkMode, os.getenv("DATAHUB_REST_SINK_DEFAULT_MODE", RestSinkMode.ASYNC_BATCH)
67
+ RestSinkMode, get_rest_sink_default_mode() or RestSinkMode.ASYNC_BATCH
67
68
  )
68
69
 
69
70
 
70
71
  class DatahubRestSinkConfig(DatahubClientConfig):
71
72
  mode: RestSinkMode = _DEFAULT_REST_SINK_MODE
72
73
  endpoint: RestSinkEndpoint = DEFAULT_REST_EMITTER_ENDPOINT
74
+ server_config_refresh_interval: Optional[int] = None
73
75
 
74
76
  # These only apply in async modes.
75
77
  max_threads: pydantic.PositiveInt = _DEFAULT_REST_SINK_MAX_THREADS
@@ -90,6 +92,7 @@ class DatahubRestSinkConfig(DatahubClientConfig):
90
92
  @dataclasses.dataclass
91
93
  class DataHubRestSinkReport(SinkReport):
92
94
  mode: Optional[RestSinkMode] = None
95
+ endpoint: Optional[RestSinkEndpoint] = None
93
96
  max_threads: Optional[int] = None
94
97
  gms_version: Optional[str] = None
95
98
  pending_requests: int = 0
@@ -140,6 +143,7 @@ class DatahubRestSink(Sink[DatahubRestSinkConfig, DataHubRestSinkReport]):
140
143
 
141
144
  self.report.gms_version = gms_config.service_version
142
145
  self.report.mode = self.config.mode
146
+ self.report.endpoint = self.config.endpoint
143
147
  self.report.max_threads = self.config.max_threads
144
148
  logger.debug("Setting env variables to override config")
145
149
  logger.debug("Setting gms config")
@@ -346,6 +350,10 @@ class DatahubRestSink(Sink[DatahubRestSinkConfig, DataHubRestSinkReport]):
346
350
  )
347
351
 
348
352
  def close(self):
353
+ # Execute pre-shutdown callbacks first (handled by parent class)
354
+ super().close()
355
+
356
+ # Then perform sink-specific shutdown
349
357
  with self.report.main_thread_blocking_timer:
350
358
  self.executor.shutdown()
351
359
 
@@ -79,6 +79,7 @@ class FileSink(Sink[FileSinkConfig, SinkReport]):
79
79
  write_callback.on_success(record_envelope, {})
80
80
 
81
81
  def close(self):
82
+ super().close()
82
83
  self.file.write("\n]")
83
84
  self.file.close()
84
85
 
@@ -151,7 +151,7 @@ class DataLakeSourceConfig(
151
151
  raise ValueError("platform must not be empty")
152
152
  return platform
153
153
 
154
- @pydantic.root_validator()
154
+ @pydantic.root_validator(skip_on_failure=True)
155
155
  def ensure_profiling_pattern_is_passed_to_profiling(
156
156
  cls, values: Dict[str, Any]
157
157
  ) -> Dict[str, Any]:
@@ -72,7 +72,7 @@ class DataLakeProfilerConfig(ConfigModel):
72
72
  description="Whether to profile for the sample values for all columns.",
73
73
  )
74
74
 
75
- @pydantic.root_validator()
75
+ @pydantic.root_validator(skip_on_failure=True)
76
76
  def ensure_field_level_settings_are_normalized(
77
77
  cls: "DataLakeProfilerConfig", values: Dict[str, Any]
78
78
  ) -> Dict[str, Any]:
@@ -44,7 +44,11 @@ from datahub.ingestion.source.azure.abs_utils import (
44
44
  get_key_prefix,
45
45
  strip_abs_prefix,
46
46
  )
47
- from datahub.ingestion.source.data_lake_common.data_lake_utils import ContainerWUCreator
47
+ from datahub.ingestion.source.common.subtypes import SourceCapabilityModifier
48
+ from datahub.ingestion.source.data_lake_common.data_lake_utils import (
49
+ ContainerWUCreator,
50
+ add_partition_columns_to_schema,
51
+ )
48
52
  from datahub.ingestion.source.schema_inference import avro, csv_tsv, json, parquet
49
53
  from datahub.ingestion.source.state.stale_entity_removal_handler import (
50
54
  StaleEntityRemovalHandler,
@@ -53,10 +57,7 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
53
57
  StatefulIngestionSourceBase,
54
58
  )
55
59
  from datahub.metadata.com.linkedin.pegasus2avro.schema import (
56
- SchemaField,
57
- SchemaFieldDataType,
58
60
  SchemaMetadata,
59
- StringTypeClass,
60
61
  )
61
62
  from datahub.metadata.schema_classes import (
62
63
  DataPlatformInstanceClass,
@@ -128,6 +129,14 @@ class TableData:
128
129
  @support_status(SupportStatus.INCUBATING)
129
130
  @capability(SourceCapability.DATA_PROFILING, "Optionally enabled via configuration")
130
131
  @capability(SourceCapability.TAGS, "Can extract ABS object/container tags if enabled")
132
+ @capability(
133
+ SourceCapability.CONTAINERS,
134
+ "Extract ABS containers and folders",
135
+ subtype_modifier=[
136
+ SourceCapabilityModifier.FOLDER,
137
+ SourceCapabilityModifier.ABS_CONTAINER,
138
+ ],
139
+ )
131
140
  class ABSSource(StatefulIngestionSourceBase):
132
141
  source_config: DataLakeSourceConfig
133
142
  report: DataLakeSourceReport
@@ -223,36 +232,12 @@ class ABSSource(StatefulIngestionSourceBase):
223
232
  fields = sorted(fields, key=lambda f: f.fieldPath)
224
233
 
225
234
  if self.source_config.add_partition_columns_to_schema:
226
- self.add_partition_columns_to_schema(
235
+ add_partition_columns_to_schema(
227
236
  fields=fields, path_spec=path_spec, full_path=table_data.full_path
228
237
  )
229
238
 
230
239
  return fields
231
240
 
232
- def add_partition_columns_to_schema(
233
- self, path_spec: PathSpec, full_path: str, fields: List[SchemaField]
234
- ) -> None:
235
- vars = path_spec.get_named_vars(full_path)
236
- if vars is not None and "partition" in vars:
237
- for partition in vars["partition"].values():
238
- partition_arr = partition.split("=")
239
- if len(partition_arr) != 2:
240
- logger.debug(
241
- f"Could not derive partition key from partition field {partition}"
242
- )
243
- continue
244
- partition_key = partition_arr[0]
245
- fields.append(
246
- SchemaField(
247
- fieldPath=f"{partition_key}",
248
- nativeDataType="string",
249
- type=SchemaFieldDataType(StringTypeClass()),
250
- isPartitioningKey=True,
251
- nullable=True,
252
- recursive=False,
253
- )
254
- )
255
-
256
241
  def _create_table_operation_aspect(self, table_data: TableData) -> OperationClass:
257
242
  reported_time = int(time.time() * 1000)
258
243
 
@@ -533,7 +518,7 @@ class ABSSource(StatefulIngestionSourceBase):
533
518
  )
534
519
  path_spec.sample_files = False
535
520
  for obj in container_client.list_blobs(
536
- prefix=f"{prefix}", results_per_page=PAGE_SIZE
521
+ name_starts_with=f"{prefix}", results_per_page=PAGE_SIZE
537
522
  ):
538
523
  abs_path = self.create_abs_path(obj.name)
539
524
  logger.debug(f"Path: {abs_path}")