acryl-datahub 1.1.1rc4__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (414) hide show
  1. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2615 -2547
  2. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +412 -338
  3. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +5 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/assertion/assertion.py +1 -1
  6. datahub/api/entities/common/serialized_value.py +1 -1
  7. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  8. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  9. datahub/api/entities/dataset/dataset.py +26 -23
  10. datahub/api/entities/external/__init__.py +0 -0
  11. datahub/api/entities/external/external_entities.py +724 -0
  12. datahub/api/entities/external/external_tag.py +147 -0
  13. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  14. datahub/api/entities/external/restricted_text.py +172 -0
  15. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  16. datahub/api/entities/forms/forms.py +3 -3
  17. datahub/api/entities/structuredproperties/structuredproperties.py +4 -4
  18. datahub/api/graphql/operation.py +10 -6
  19. datahub/cli/check_cli.py +88 -7
  20. datahub/cli/cli_utils.py +63 -0
  21. datahub/cli/config_utils.py +18 -10
  22. datahub/cli/container_cli.py +5 -0
  23. datahub/cli/delete_cli.py +125 -27
  24. datahub/cli/docker_check.py +110 -14
  25. datahub/cli/docker_cli.py +153 -229
  26. datahub/cli/exists_cli.py +0 -2
  27. datahub/cli/get_cli.py +0 -2
  28. datahub/cli/graphql_cli.py +1422 -0
  29. datahub/cli/iceberg_cli.py +5 -0
  30. datahub/cli/ingest_cli.py +3 -15
  31. datahub/cli/migrate.py +2 -0
  32. datahub/cli/put_cli.py +1 -4
  33. datahub/cli/quickstart_versioning.py +53 -10
  34. datahub/cli/specific/assertions_cli.py +37 -6
  35. datahub/cli/specific/datacontract_cli.py +54 -7
  36. datahub/cli/specific/dataproduct_cli.py +2 -15
  37. datahub/cli/specific/dataset_cli.py +1 -8
  38. datahub/cli/specific/forms_cli.py +0 -4
  39. datahub/cli/specific/group_cli.py +0 -2
  40. datahub/cli/specific/structuredproperties_cli.py +1 -4
  41. datahub/cli/specific/user_cli.py +172 -3
  42. datahub/cli/state_cli.py +0 -2
  43. datahub/cli/timeline_cli.py +0 -2
  44. datahub/configuration/common.py +40 -1
  45. datahub/configuration/connection_resolver.py +5 -2
  46. datahub/configuration/env_vars.py +331 -0
  47. datahub/configuration/import_resolver.py +7 -4
  48. datahub/configuration/kafka.py +21 -1
  49. datahub/configuration/pydantic_migration_helpers.py +6 -13
  50. datahub/configuration/source_common.py +3 -2
  51. datahub/configuration/validate_field_deprecation.py +5 -2
  52. datahub/configuration/validate_field_removal.py +8 -2
  53. datahub/configuration/validate_field_rename.py +6 -5
  54. datahub/configuration/validate_multiline_string.py +5 -2
  55. datahub/emitter/mce_builder.py +8 -4
  56. datahub/emitter/rest_emitter.py +103 -30
  57. datahub/entrypoints.py +6 -3
  58. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +297 -1
  59. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  60. datahub/ingestion/api/decorators.py +15 -3
  61. datahub/ingestion/api/report.py +381 -3
  62. datahub/ingestion/api/sink.py +27 -2
  63. datahub/ingestion/api/source.py +165 -58
  64. datahub/ingestion/api/source_protocols.py +23 -0
  65. datahub/ingestion/autogenerated/__init__.py +0 -0
  66. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  67. datahub/ingestion/autogenerated/lineage.json +402 -0
  68. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  69. datahub/ingestion/extractor/schema_util.py +13 -4
  70. datahub/ingestion/glossary/classification_mixin.py +5 -0
  71. datahub/ingestion/graph/client.py +330 -25
  72. datahub/ingestion/graph/config.py +3 -2
  73. datahub/ingestion/graph/filters.py +30 -11
  74. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  75. datahub/ingestion/run/pipeline.py +81 -11
  76. datahub/ingestion/run/pipeline_config.py +2 -2
  77. datahub/ingestion/sink/datahub_kafka.py +1 -0
  78. datahub/ingestion/sink/datahub_rest.py +13 -5
  79. datahub/ingestion/sink/file.py +1 -0
  80. datahub/ingestion/source/abs/config.py +1 -1
  81. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  82. datahub/ingestion/source/abs/source.py +15 -30
  83. datahub/ingestion/source/aws/aws_common.py +185 -13
  84. datahub/ingestion/source/aws/glue.py +517 -244
  85. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  86. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  87. datahub/ingestion/source/aws/tag_entities.py +270 -0
  88. datahub/ingestion/source/azure/azure_common.py +3 -3
  89. datahub/ingestion/source/bigquery_v2/bigquery.py +67 -24
  90. datahub/ingestion/source/bigquery_v2/bigquery_config.py +47 -19
  91. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +12 -1
  92. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +3 -0
  93. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  94. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  95. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  96. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  97. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  98. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  99. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  100. datahub/ingestion/source/cassandra/cassandra.py +6 -8
  101. datahub/ingestion/source/cassandra/cassandra_api.py +17 -1
  102. datahub/ingestion/source/cassandra/cassandra_config.py +5 -0
  103. datahub/ingestion/source/cassandra/cassandra_profiling.py +7 -6
  104. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  105. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  106. datahub/ingestion/source/common/subtypes.py +53 -0
  107. datahub/ingestion/source/data_lake_common/data_lake_utils.py +37 -0
  108. datahub/ingestion/source/data_lake_common/object_store.py +115 -27
  109. datahub/ingestion/source/data_lake_common/path_spec.py +72 -43
  110. datahub/ingestion/source/datahub/config.py +12 -9
  111. datahub/ingestion/source/datahub/datahub_database_reader.py +26 -11
  112. datahub/ingestion/source/datahub/datahub_source.py +10 -0
  113. datahub/ingestion/source/dbt/dbt_cloud.py +16 -5
  114. datahub/ingestion/source/dbt/dbt_common.py +224 -9
  115. datahub/ingestion/source/dbt/dbt_core.py +3 -0
  116. datahub/ingestion/source/debug/__init__.py +0 -0
  117. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  118. datahub/ingestion/source/delta_lake/config.py +9 -5
  119. datahub/ingestion/source/delta_lake/source.py +8 -0
  120. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  121. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  122. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  123. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  124. datahub/ingestion/source/dremio/dremio_source.py +132 -98
  125. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  126. datahub/ingestion/source/dynamodb/dynamodb.py +11 -8
  127. datahub/ingestion/source/excel/__init__.py +0 -0
  128. datahub/ingestion/source/excel/config.py +92 -0
  129. datahub/ingestion/source/excel/excel_file.py +539 -0
  130. datahub/ingestion/source/excel/profiling.py +308 -0
  131. datahub/ingestion/source/excel/report.py +49 -0
  132. datahub/ingestion/source/excel/source.py +662 -0
  133. datahub/ingestion/source/excel/util.py +18 -0
  134. datahub/ingestion/source/feast.py +8 -10
  135. datahub/ingestion/source/file.py +3 -0
  136. datahub/ingestion/source/fivetran/config.py +66 -7
  137. datahub/ingestion/source/fivetran/fivetran.py +227 -43
  138. datahub/ingestion/source/fivetran/fivetran_log_api.py +37 -8
  139. datahub/ingestion/source/fivetran/fivetran_query.py +51 -29
  140. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  141. datahub/ingestion/source/fivetran/response_models.py +97 -0
  142. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  143. datahub/ingestion/source/gcs/gcs_source.py +32 -4
  144. datahub/ingestion/source/ge_data_profiler.py +108 -31
  145. datahub/ingestion/source/ge_profiling_config.py +26 -11
  146. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  147. datahub/ingestion/source/grafana/field_utils.py +307 -0
  148. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  149. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  150. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  151. datahub/ingestion/source/grafana/lineage.py +202 -0
  152. datahub/ingestion/source/grafana/models.py +137 -0
  153. datahub/ingestion/source/grafana/report.py +90 -0
  154. datahub/ingestion/source/grafana/types.py +16 -0
  155. datahub/ingestion/source/hex/api.py +28 -1
  156. datahub/ingestion/source/hex/hex.py +16 -5
  157. datahub/ingestion/source/hex/mapper.py +16 -2
  158. datahub/ingestion/source/hex/model.py +2 -0
  159. datahub/ingestion/source/hex/query_fetcher.py +1 -1
  160. datahub/ingestion/source/iceberg/iceberg.py +123 -59
  161. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  162. datahub/ingestion/source/identity/azure_ad.py +1 -1
  163. datahub/ingestion/source/identity/okta.py +1 -14
  164. datahub/ingestion/source/kafka/kafka.py +16 -0
  165. datahub/ingestion/source/kafka_connect/common.py +2 -2
  166. datahub/ingestion/source/kafka_connect/sink_connectors.py +156 -47
  167. datahub/ingestion/source/kafka_connect/source_connectors.py +62 -4
  168. datahub/ingestion/source/looker/looker_common.py +148 -79
  169. datahub/ingestion/source/looker/looker_config.py +15 -4
  170. datahub/ingestion/source/looker/looker_constant.py +4 -0
  171. datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
  172. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  173. datahub/ingestion/source/looker/looker_source.py +503 -547
  174. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  175. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  176. datahub/ingestion/source/looker/lookml_config.py +31 -3
  177. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  178. datahub/ingestion/source/looker/lookml_source.py +96 -117
  179. datahub/ingestion/source/looker/view_upstream.py +494 -1
  180. datahub/ingestion/source/metabase.py +32 -6
  181. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  182. datahub/ingestion/source/metadata/lineage.py +9 -9
  183. datahub/ingestion/source/mlflow.py +12 -2
  184. datahub/ingestion/source/mock_data/__init__.py +0 -0
  185. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  186. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  187. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  188. datahub/ingestion/source/mode.py +26 -5
  189. datahub/ingestion/source/mongodb.py +11 -1
  190. datahub/ingestion/source/neo4j/neo4j_source.py +83 -144
  191. datahub/ingestion/source/nifi.py +2 -2
  192. datahub/ingestion/source/openapi.py +1 -1
  193. datahub/ingestion/source/powerbi/config.py +47 -21
  194. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  195. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  196. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +100 -10
  197. datahub/ingestion/source/powerbi/powerbi.py +10 -6
  198. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +0 -1
  199. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  200. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  201. datahub/ingestion/source/preset.py +3 -3
  202. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  203. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  204. datahub/ingestion/source/redash.py +1 -1
  205. datahub/ingestion/source/redshift/config.py +15 -9
  206. datahub/ingestion/source/redshift/datashares.py +1 -1
  207. datahub/ingestion/source/redshift/lineage.py +386 -687
  208. datahub/ingestion/source/redshift/query.py +23 -19
  209. datahub/ingestion/source/redshift/redshift.py +52 -111
  210. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  211. datahub/ingestion/source/redshift/report.py +0 -2
  212. datahub/ingestion/source/redshift/usage.py +6 -5
  213. datahub/ingestion/source/s3/report.py +4 -2
  214. datahub/ingestion/source/s3/source.py +449 -248
  215. datahub/ingestion/source/sac/sac.py +3 -1
  216. datahub/ingestion/source/salesforce.py +28 -13
  217. datahub/ingestion/source/schema/json_schema.py +14 -14
  218. datahub/ingestion/source/schema_inference/object.py +22 -6
  219. datahub/ingestion/source/sigma/data_classes.py +3 -0
  220. datahub/ingestion/source/sigma/sigma.py +7 -1
  221. datahub/ingestion/source/slack/slack.py +10 -16
  222. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  223. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  224. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  225. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  226. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  227. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  228. datahub/ingestion/source/snowflake/constants.py +3 -0
  229. datahub/ingestion/source/snowflake/snowflake_config.py +76 -23
  230. datahub/ingestion/source/snowflake/snowflake_connection.py +24 -8
  231. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +19 -6
  232. datahub/ingestion/source/snowflake/snowflake_queries.py +464 -97
  233. datahub/ingestion/source/snowflake/snowflake_query.py +77 -5
  234. datahub/ingestion/source/snowflake/snowflake_report.py +1 -2
  235. datahub/ingestion/source/snowflake/snowflake_schema.py +352 -16
  236. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +51 -10
  237. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  238. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  239. datahub/ingestion/source/snowflake/snowflake_utils.py +36 -15
  240. datahub/ingestion/source/snowflake/snowflake_v2.py +39 -4
  241. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  242. datahub/ingestion/source/sql/athena.py +217 -25
  243. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  244. datahub/ingestion/source/sql/clickhouse.py +24 -8
  245. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  246. datahub/ingestion/source/sql/druid.py +2 -2
  247. datahub/ingestion/source/sql/hana.py +3 -1
  248. datahub/ingestion/source/sql/hive.py +4 -3
  249. datahub/ingestion/source/sql/hive_metastore.py +19 -20
  250. datahub/ingestion/source/sql/mariadb.py +0 -1
  251. datahub/ingestion/source/sql/mssql/job_models.py +3 -1
  252. datahub/ingestion/source/sql/mssql/source.py +336 -57
  253. datahub/ingestion/source/sql/mysql.py +154 -4
  254. datahub/ingestion/source/sql/oracle.py +5 -5
  255. datahub/ingestion/source/sql/postgres.py +142 -6
  256. datahub/ingestion/source/sql/presto.py +2 -1
  257. datahub/ingestion/source/sql/sql_common.py +281 -49
  258. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  259. datahub/ingestion/source/sql/sql_types.py +22 -0
  260. datahub/ingestion/source/sql/sqlalchemy_uri.py +39 -7
  261. datahub/ingestion/source/sql/teradata.py +1028 -245
  262. datahub/ingestion/source/sql/trino.py +11 -1
  263. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  264. datahub/ingestion/source/sql/vertica.py +14 -7
  265. datahub/ingestion/source/sql_queries.py +219 -121
  266. datahub/ingestion/source/state/checkpoint.py +8 -29
  267. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  268. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  269. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  270. datahub/ingestion/source/superset.py +314 -67
  271. datahub/ingestion/source/tableau/tableau.py +135 -59
  272. datahub/ingestion/source/tableau/tableau_common.py +9 -2
  273. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  274. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  275. datahub/ingestion/source/unity/config.py +160 -40
  276. datahub/ingestion/source/unity/connection.py +61 -0
  277. datahub/ingestion/source/unity/connection_test.py +1 -0
  278. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  279. datahub/ingestion/source/unity/proxy.py +794 -51
  280. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  281. datahub/ingestion/source/unity/proxy_types.py +36 -2
  282. datahub/ingestion/source/unity/report.py +15 -3
  283. datahub/ingestion/source/unity/source.py +465 -131
  284. datahub/ingestion/source/unity/tag_entities.py +197 -0
  285. datahub/ingestion/source/unity/usage.py +46 -4
  286. datahub/ingestion/source/usage/clickhouse_usage.py +4 -1
  287. datahub/ingestion/source/usage/starburst_trino_usage.py +5 -2
  288. datahub/ingestion/source/usage/usage_common.py +4 -3
  289. datahub/ingestion/source/vertexai/vertexai.py +1 -1
  290. datahub/ingestion/source_config/pulsar.py +3 -1
  291. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  292. datahub/ingestion/transformer/add_dataset_ownership.py +18 -2
  293. datahub/ingestion/transformer/base_transformer.py +8 -5
  294. datahub/ingestion/transformer/set_browse_path.py +112 -0
  295. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  296. datahub/metadata/_internal_schema_classes.py +6806 -4871
  297. datahub/metadata/_urns/urn_defs.py +1767 -1539
  298. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  299. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  300. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  301. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  302. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  303. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +6 -0
  304. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  305. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  306. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  307. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  308. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  309. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  310. datahub/metadata/schema.avsc +18395 -16979
  311. datahub/metadata/schemas/Actors.avsc +38 -1
  312. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  313. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  314. datahub/metadata/schemas/Applications.avsc +38 -0
  315. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  316. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  317. datahub/metadata/schemas/ChartKey.avsc +1 -0
  318. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  319. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  320. datahub/metadata/schemas/CorpUserEditableInfo.avsc +1 -1
  321. datahub/metadata/schemas/CorpUserSettings.avsc +50 -0
  322. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  323. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  324. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  325. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  326. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  327. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  328. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  329. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  330. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  331. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  332. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  333. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  334. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  335. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  336. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  337. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  338. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  339. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  340. datahub/metadata/schemas/DomainKey.avsc +2 -1
  341. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  342. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  343. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  344. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  345. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  346. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  347. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  348. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  349. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  350. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  351. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  352. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  353. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  354. datahub/metadata/schemas/MetadataChangeEvent.avsc +151 -47
  355. datahub/metadata/schemas/MetadataChangeLog.avsc +62 -44
  356. datahub/metadata/schemas/MetadataChangeProposal.avsc +61 -0
  357. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  358. datahub/metadata/schemas/Operation.avsc +4 -2
  359. datahub/metadata/schemas/Ownership.avsc +69 -0
  360. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  361. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  362. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  363. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  364. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  365. datahub/metadata/schemas/SystemMetadata.avsc +61 -0
  366. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  367. datahub/sdk/__init__.py +2 -0
  368. datahub/sdk/_all_entities.py +7 -0
  369. datahub/sdk/_shared.py +249 -5
  370. datahub/sdk/chart.py +386 -0
  371. datahub/sdk/container.py +7 -0
  372. datahub/sdk/dashboard.py +453 -0
  373. datahub/sdk/dataflow.py +7 -0
  374. datahub/sdk/datajob.py +45 -13
  375. datahub/sdk/dataset.py +56 -2
  376. datahub/sdk/entity_client.py +111 -9
  377. datahub/sdk/lineage_client.py +663 -82
  378. datahub/sdk/main_client.py +50 -16
  379. datahub/sdk/mlmodel.py +120 -38
  380. datahub/sdk/mlmodelgroup.py +7 -0
  381. datahub/sdk/search_client.py +7 -3
  382. datahub/sdk/search_filters.py +304 -36
  383. datahub/secret/datahub_secret_store.py +3 -0
  384. datahub/secret/environment_secret_store.py +29 -0
  385. datahub/secret/file_secret_store.py +49 -0
  386. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  387. datahub/specific/aspect_helpers/siblings.py +73 -0
  388. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  389. datahub/specific/chart.py +1 -1
  390. datahub/specific/datajob.py +15 -1
  391. datahub/specific/dataproduct.py +4 -0
  392. datahub/specific/dataset.py +39 -59
  393. datahub/sql_parsing/split_statements.py +13 -0
  394. datahub/sql_parsing/sql_parsing_aggregator.py +70 -26
  395. datahub/sql_parsing/sqlglot_lineage.py +196 -42
  396. datahub/sql_parsing/sqlglot_utils.py +12 -4
  397. datahub/sql_parsing/tool_meta_extractor.py +1 -3
  398. datahub/telemetry/telemetry.py +28 -14
  399. datahub/testing/sdk_v2_helpers.py +7 -1
  400. datahub/upgrade/upgrade.py +73 -17
  401. datahub/utilities/file_backed_collections.py +8 -9
  402. datahub/utilities/is_pytest.py +3 -2
  403. datahub/utilities/logging_manager.py +22 -6
  404. datahub/utilities/mapping.py +29 -2
  405. datahub/utilities/sample_data.py +5 -4
  406. datahub/utilities/server_config_util.py +10 -1
  407. datahub/utilities/sqlalchemy_query_combiner.py +5 -2
  408. datahub/utilities/stats_collections.py +4 -0
  409. datahub/utilities/urns/urn.py +41 -2
  410. datahub/emitter/sql_parsing_builder.py +0 -306
  411. datahub/ingestion/source/redshift/lineage_v2.py +0 -466
  412. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +0 -0
  413. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/licenses/LICENSE +0 -0
  414. {acryl_datahub-1.1.1rc4.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -28,7 +28,7 @@ from looker_sdk.sdk.api40.models import (
28
28
  User,
29
29
  WriteQuery,
30
30
  )
31
- from pydantic.class_validators import validator
31
+ from pydantic import validator
32
32
 
33
33
  import datahub.emitter.mce_builder as builder
34
34
  from datahub.api.entities.platformresource.platform_resource import (
@@ -36,7 +36,7 @@ from datahub.api.entities.platformresource.platform_resource import (
36
36
  PlatformResourceKey,
37
37
  )
38
38
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
39
- from datahub.emitter.mcp_builder import ContainerKey, create_embed_mcp
39
+ from datahub.emitter.mcp_builder import ContainerKey
40
40
  from datahub.ingestion.api.report import Report
41
41
  from datahub.ingestion.api.source import SourceReport
42
42
  from datahub.ingestion.source.common.subtypes import DatasetSubTypes
@@ -72,7 +72,6 @@ from datahub.metadata.com.linkedin.pegasus2avro.dataset import (
72
72
  UpstreamClass,
73
73
  UpstreamLineage,
74
74
  )
75
- from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot
76
75
  from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
77
76
  from datahub.metadata.com.linkedin.pegasus2avro.schema import (
78
77
  ArrayTypeClass,
@@ -90,21 +89,18 @@ from datahub.metadata.com.linkedin.pegasus2avro.schema import (
90
89
  )
91
90
  from datahub.metadata.schema_classes import (
92
91
  BrowsePathEntryClass,
93
- BrowsePathsClass,
94
92
  BrowsePathsV2Class,
95
- ContainerClass,
96
- DatasetPropertiesClass,
93
+ EmbedClass,
97
94
  EnumTypeClass,
98
95
  FineGrainedLineageClass,
99
96
  GlobalTagsClass,
100
97
  SchemaMetadataClass,
101
- StatusClass,
102
- SubTypesClass,
103
98
  TagAssociationClass,
104
99
  TagPropertiesClass,
105
100
  TagSnapshotClass,
106
101
  )
107
102
  from datahub.metadata.urns import TagUrn
103
+ from datahub.sdk.dataset import Dataset
108
104
  from datahub.sql_parsing.sqlglot_lineage import ColumnRef
109
105
  from datahub.utilities.lossy_collections import LossyList, LossySet
110
106
  from datahub.utilities.url_util import remove_port_from_url
@@ -242,13 +238,24 @@ class LookerViewId:
242
238
 
243
239
  dataset_name = config.view_naming_pattern.replace_variables(n_mapping)
244
240
 
245
- return builder.make_dataset_urn_with_platform_instance(
241
+ generated_urn = builder.make_dataset_urn_with_platform_instance(
246
242
  platform=config.platform_name,
247
243
  name=dataset_name,
248
244
  platform_instance=config.platform_instance,
249
245
  env=config.env,
250
246
  )
251
247
 
248
+ logger.debug(
249
+ f"LookerViewId.get_urn for view '{self.view_name}': project='{self.project_name}', model='{self.model_name}', file_path='{self.file_path}', dataset_name='{dataset_name}', generated_urn='{generated_urn}'"
250
+ )
251
+
252
+ return generated_urn
253
+
254
+ def get_view_dataset_name(self, config: LookerCommonConfig) -> str:
255
+ n_mapping: ViewNamingPatternMapping = self.get_mapping(config)
256
+ n_mapping.file_path = self.preprocess_file_path(n_mapping.file_path)
257
+ return config.view_naming_pattern.replace_variables(n_mapping)
258
+
252
259
  def get_browse_path(self, config: LookerCommonConfig) -> str:
253
260
  browse_path = config.view_browse_pattern.replace_variables(
254
261
  self.get_mapping(config)
@@ -276,6 +283,22 @@ class LookerViewId:
276
283
  ],
277
284
  )
278
285
 
286
+ def get_view_dataset_parent_container(
287
+ self, config: LookerCommonConfig
288
+ ) -> List[str]:
289
+ project_key = gen_project_key(config, self.project_name)
290
+ view_path = (
291
+ remove_suffix(self.file_path, ".view.lkml")
292
+ if "{file_path}" in config.view_browse_pattern.pattern
293
+ else os.path.dirname(self.file_path)
294
+ )
295
+ path_entries = view_path.split("/") if view_path else []
296
+ return [
297
+ "Develop",
298
+ project_key.as_urn(),
299
+ *path_entries,
300
+ ]
301
+
279
302
 
280
303
  class ViewFieldType(Enum):
281
304
  DIMENSION = "Dimension"
@@ -284,6 +307,12 @@ class ViewFieldType(Enum):
284
307
  UNKNOWN = "Unknown"
285
308
 
286
309
 
310
+ class ViewFieldDimensionGroupType(Enum):
311
+ # Ref: https://cloud.google.com/looker/docs/reference/param-field-dimension-group
312
+ TIME = "time"
313
+ DURATION = "duration"
314
+
315
+
287
316
  class ViewFieldValue(Enum):
288
317
  NOT_AVAILABLE = "NotAvailable"
289
318
 
@@ -373,6 +402,14 @@ class ExploreUpstreamViewField:
373
402
  : -(len(self.field.field_group_variant.lower()) + 1)
374
403
  ]
375
404
 
405
+ # Validate that field_name is not empty to prevent invalid schema field URNs
406
+ if not field_name or not field_name.strip():
407
+ logger.warning(
408
+ f"Empty field name detected for field '{self.field.name}' in explore '{self.explore.name}'. "
409
+ f"Skipping field to prevent invalid schema field URN generation."
410
+ )
411
+ return None
412
+
376
413
  assert view_name # for lint false positive
377
414
 
378
415
  project_include: ProjectInclude = ProjectInclude(
@@ -452,15 +489,36 @@ class ExploreUpstreamViewField:
452
489
  )
453
490
 
454
491
 
455
- def create_view_project_map(view_fields: List[ViewField]) -> Dict[str, str]:
492
+ def create_view_project_map(
493
+ view_fields: List[ViewField],
494
+ explore_primary_view: Optional[str] = None,
495
+ explore_project_name: Optional[str] = None,
496
+ ) -> Dict[str, str]:
456
497
  """
457
498
  Each view in a model has unique name.
458
499
  Use this function in scope of a model.
500
+
501
+ Args:
502
+ view_fields: List of ViewField objects
503
+ explore_primary_view: The primary view name of the explore (explore.view_name)
504
+ explore_project_name: The project name of the explore (explore.project_name)
459
505
  """
460
506
  view_project_map: Dict[str, str] = {}
461
507
  for view_field in view_fields:
462
508
  if view_field.view_name is not None and view_field.project_name is not None:
463
- view_project_map[view_field.view_name] = view_field.project_name
509
+ # Override field-level project assignment for the primary view when different
510
+ if (
511
+ view_field.view_name == explore_primary_view
512
+ and explore_project_name is not None
513
+ and explore_project_name != view_field.project_name
514
+ ):
515
+ logger.debug(
516
+ f"Overriding project assignment for primary view '{view_field.view_name}': "
517
+ f"field-level project '{view_field.project_name}' → explore-level project '{explore_project_name}'"
518
+ )
519
+ view_project_map[view_field.view_name] = explore_project_name
520
+ else:
521
+ view_project_map[view_field.view_name] = view_field.project_name
464
522
 
465
523
  return view_project_map
466
524
 
@@ -953,6 +1011,9 @@ class LookerExplore:
953
1011
  f"Could not resolve view {view_name} for explore {dict['name']} in model {model_name}"
954
1012
  )
955
1013
  else:
1014
+ logger.debug(
1015
+ f"LookerExplore.from_dict adding upstream view for explore '{dict['name']}' (model='{model_name}'): view_name='{view_name}', info[0].project='{info[0].project}'"
1016
+ )
956
1017
  upstream_views.append(
957
1018
  ProjectInclude(project=info[0].project, include=view_name)
958
1019
  )
@@ -981,6 +1042,7 @@ class LookerExplore:
981
1042
  ) -> Optional["LookerExplore"]:
982
1043
  try:
983
1044
  explore = client.lookml_model_explore(model, explore_name)
1045
+
984
1046
  views: Set[str] = set()
985
1047
  lkml_fields: List[LookmlModelExploreField] = (
986
1048
  explore_field_set_to_lkml_fields(explore)
@@ -1117,7 +1179,11 @@ class LookerExplore:
1117
1179
  )
1118
1180
  )
1119
1181
 
1120
- view_project_map: Dict[str, str] = create_view_project_map(view_fields)
1182
+ view_project_map: Dict[str, str] = create_view_project_map(
1183
+ view_fields,
1184
+ explore_primary_view=explore.view_name,
1185
+ explore_project_name=explore.project_name,
1186
+ )
1121
1187
  if view_project_map:
1122
1188
  logger.debug(f"views and their projects: {view_project_map}")
1123
1189
 
@@ -1243,52 +1309,31 @@ class LookerExplore:
1243
1309
  reporter: SourceReport,
1244
1310
  base_url: str,
1245
1311
  extract_embed_urls: bool,
1246
- ) -> Optional[List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]]]:
1247
- # We only generate MCE-s for explores that contain from clauses and do NOT contain joins
1248
- # All other explores (passthrough explores and joins) end in correct resolution of lineage, and don't need additional nodes in the graph.
1249
-
1250
- dataset_snapshot = DatasetSnapshot(
1251
- urn=self.get_explore_urn(config),
1252
- aspects=[], # we append to this list later on
1253
- )
1254
-
1255
- model_key = gen_model_key(config, self.model_name)
1256
- browse_paths = BrowsePathsClass(paths=[self.get_explore_browse_path(config)])
1257
- container = ContainerClass(container=model_key.as_urn())
1258
- dataset_snapshot.aspects.append(browse_paths)
1259
- dataset_snapshot.aspects.append(StatusClass(removed=False))
1260
-
1261
- custom_properties = {
1262
- "project": self.project_name,
1263
- "model": self.model_name,
1264
- "looker.explore.label": self.label,
1265
- "looker.explore.name": self.name,
1266
- "looker.explore.file": self.source_file,
1267
- }
1268
- dataset_props = DatasetPropertiesClass(
1269
- name=str(self.label) if self.label else LookerUtil._display_name(self.name),
1270
- description=self.description,
1271
- customProperties={
1272
- k: str(v) for k, v in custom_properties.items() if v is not None
1273
- },
1274
- )
1275
- dataset_props.externalUrl = self._get_url(base_url)
1312
+ ) -> Dataset:
1313
+ """
1314
+ Generate a Dataset metadata event for this Looker Explore.
1276
1315
 
1277
- dataset_snapshot.aspects.append(dataset_props)
1316
+ Only generates datasets for explores that contain FROM clauses and do NOT contain joins.
1317
+ Passthrough explores and joins are handled via lineage and do not need additional nodes.
1318
+ """
1319
+ upstream_lineage = None
1278
1320
  view_name_to_urn_map: Dict[str, str] = {}
1321
+
1279
1322
  if self.upstream_views is not None:
1280
1323
  assert self.project_name is not None
1281
- upstreams = []
1324
+ upstreams: list[UpstreamClass] = []
1282
1325
  observed_lineage_ts = datetime.datetime.now(tz=datetime.timezone.utc)
1326
+
1283
1327
  for view_ref in sorted(self.upstream_views):
1284
1328
  # set file_path to ViewFieldType.UNKNOWN if file_path is not available to keep backward compatibility
1285
1329
  # if we raise error on file_path equal to None then existing test-cases will fail as mock data
1286
1330
  # doesn't have required attributes.
1287
1331
  file_path: str = (
1288
1332
  cast(str, self.upstream_views_file_path[view_ref.include])
1289
- if self.upstream_views_file_path[view_ref.include] is not None
1333
+ if self.upstream_views_file_path.get(view_ref.include) is not None
1290
1334
  else ViewFieldValue.NOT_AVAILABLE.value
1291
1335
  )
1336
+
1292
1337
  view_urn = LookerViewId(
1293
1338
  project_name=(
1294
1339
  view_ref.project
@@ -1312,10 +1357,28 @@ class LookerExplore:
1312
1357
  )
1313
1358
  view_name_to_urn_map[view_ref.include] = view_urn
1314
1359
 
1315
- fine_grained_lineages = []
1360
+ fine_grained_lineages: list[FineGrainedLineageClass] = []
1316
1361
  if config.extract_column_level_lineage:
1317
1362
  for field in self.fields or []:
1363
+ # Skip creating fine-grained lineage for empty field names to prevent invalid schema field URNs
1364
+ if not field.name or not field.name.strip():
1365
+ logger.warning(
1366
+ f"Skipping fine-grained lineage for field with empty name in explore '{self.name}'"
1367
+ )
1368
+ continue
1369
+
1318
1370
  for upstream_column_ref in field.upstream_fields:
1371
+ # Skip creating fine-grained lineage for empty column names to prevent invalid schema field URNs
1372
+ if (
1373
+ not upstream_column_ref.column
1374
+ or not upstream_column_ref.column.strip()
1375
+ ):
1376
+ logger.warning(
1377
+ f"Skipping some fine-grained lineage for field '{field.name}' in explore '{self.name}' "
1378
+ f"due to empty upstream column name in table '{upstream_column_ref.table}'"
1379
+ )
1380
+ continue
1381
+
1319
1382
  fine_grained_lineages.append(
1320
1383
  FineGrainedLineageClass(
1321
1384
  upstreamType=FineGrainedLineageUpstreamType.FIELD_SET,
@@ -1335,9 +1398,11 @@ class LookerExplore:
1335
1398
  )
1336
1399
 
1337
1400
  upstream_lineage = UpstreamLineage(
1338
- upstreams=upstreams, fineGrainedLineages=fine_grained_lineages or None
1401
+ upstreams=upstreams,
1402
+ fineGrainedLineages=fine_grained_lineages or None,
1339
1403
  )
1340
- dataset_snapshot.aspects.append(upstream_lineage)
1404
+
1405
+ schema_metadata = None
1341
1406
  if self.fields is not None:
1342
1407
  schema_metadata = LookerUtil._get_schema(
1343
1408
  platform_name=config.platform_name,
@@ -1345,42 +1410,46 @@ class LookerExplore:
1345
1410
  view_fields=self.fields,
1346
1411
  reporter=reporter,
1347
1412
  )
1348
- if schema_metadata is not None:
1349
- dataset_snapshot.aspects.append(schema_metadata)
1350
-
1351
- mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
1352
- mcp = MetadataChangeProposalWrapper(
1353
- entityUrn=dataset_snapshot.urn,
1354
- aspect=SubTypesClass(typeNames=[DatasetSubTypes.LOOKER_EXPLORE]),
1355
- )
1356
-
1357
- proposals: List[Union[MetadataChangeEvent, MetadataChangeProposalWrapper]] = [
1358
- mce,
1359
- mcp,
1360
- ]
1361
1413
 
1362
- # Add tags
1363
- explore_tag_urns: List[TagAssociationClass] = [
1364
- TagAssociationClass(tag=TagUrn(tag).urn()) for tag in self.tags
1365
- ]
1366
- if explore_tag_urns:
1367
- dataset_snapshot.aspects.append(GlobalTagsClass(explore_tag_urns))
1414
+ extra_aspects: List[Union[GlobalTagsClass, EmbedClass]] = []
1368
1415
 
1369
- # If extracting embeds is enabled, produce an MCP for embed URL.
1416
+ explore_tag_urns: List[TagUrn] = [TagUrn(tag) for tag in self.tags]
1370
1417
  if extract_embed_urls:
1371
- embed_mcp = create_embed_mcp(
1372
- dataset_snapshot.urn, self._get_embed_url(base_url)
1373
- )
1374
- proposals.append(embed_mcp)
1418
+ extra_aspects.append(EmbedClass(renderUrl=self._get_embed_url(base_url)))
1375
1419
 
1376
- proposals.append(
1377
- MetadataChangeProposalWrapper(
1378
- entityUrn=dataset_snapshot.urn,
1379
- aspect=container,
1380
- )
1381
- )
1420
+ custom_properties: Dict[str, Optional[str]] = {
1421
+ "project": self.project_name,
1422
+ "model": self.model_name,
1423
+ "looker.explore.label": self.label,
1424
+ "looker.explore.name": self.name,
1425
+ "looker.explore.file": self.source_file,
1426
+ }
1382
1427
 
1383
- return proposals
1428
+ return Dataset(
1429
+ platform=config.platform_name,
1430
+ name=config.explore_naming_pattern.replace_variables(
1431
+ self.get_mapping(config)
1432
+ ),
1433
+ display_name=str(self.label)
1434
+ if self.label
1435
+ else LookerUtil._display_name(self.name),
1436
+ description=self.description,
1437
+ subtype=DatasetSubTypes.LOOKER_EXPLORE,
1438
+ env=config.env,
1439
+ platform_instance=config.platform_instance,
1440
+ custom_properties={
1441
+ k: str(v) for k, v in custom_properties.items() if v is not None
1442
+ },
1443
+ external_url=self._get_url(base_url),
1444
+ upstreams=upstream_lineage,
1445
+ schema=schema_metadata,
1446
+ parent_container=[
1447
+ "Explore",
1448
+ gen_model_key(config, self.model_name).as_urn(),
1449
+ ],
1450
+ tags=explore_tag_urns if explore_tag_urns else None,
1451
+ extra_aspects=extra_aspects,
1452
+ )
1384
1453
 
1385
1454
 
1386
1455
  def gen_project_key(config: LookerCommonConfig, project_name: str) -> LookMLProjectKey:
@@ -5,10 +5,14 @@ from typing import Any, ClassVar, Dict, List, Optional, Tuple, Union, cast
5
5
 
6
6
  import pydantic
7
7
  from looker_sdk.sdk.api40.models import DBConnection
8
- from pydantic import Field, validator
8
+ from pydantic import Field, model_validator, validator
9
9
 
10
10
  from datahub.configuration import ConfigModel
11
- from datahub.configuration.common import AllowDenyPattern, ConfigurationError
11
+ from datahub.configuration.common import (
12
+ AllowDenyPattern,
13
+ ConfigurationError,
14
+ HiddenFromDocs,
15
+ )
12
16
  from datahub.configuration.source_common import (
13
17
  EnvConfigMixin,
14
18
  PlatformInstanceConfigMixin,
@@ -43,6 +47,14 @@ class NamingPattern(ConfigModel):
43
47
  assert isinstance(v, str), "pattern must be a string"
44
48
  return {"pattern": v}
45
49
 
50
+ @model_validator(mode="before")
51
+ @classmethod
52
+ def pydantic_v2_accept_raw_pattern(cls, v):
53
+ # Pydantic v2 compatibility: handle string input by converting to dict
54
+ if isinstance(v, str):
55
+ return {"pattern": v}
56
+ return v
57
+
46
58
  @classmethod
47
59
  def pydantic_validate_pattern(cls, v):
48
60
  assert isinstance(v, NamingPattern)
@@ -132,11 +144,10 @@ class LookerCommonConfig(EnvConfigMixin, PlatformInstanceConfigMixin):
132
144
  description="When enabled, attaches tags to measures, dimensions and dimension groups to make them more "
133
145
  "discoverable. When disabled, adds this information to the description of the column.",
134
146
  )
135
- platform_name: str = Field(
147
+ platform_name: HiddenFromDocs[str] = Field(
136
148
  # TODO: This shouldn't be part of the config.
137
149
  "looker",
138
150
  description="Default platform name.",
139
- hidden_from_docs=True,
140
151
  )
141
152
  extract_column_level_lineage: bool = Field(
142
153
  True,
@@ -11,3 +11,7 @@ prod = "prod"
11
11
  dev = "dev"
12
12
  NAME = "name"
13
13
  DERIVED_DOT_SQL = "derived.sql"
14
+
15
+ VIEW_FIELD_TYPE_ATTRIBUTE = "type"
16
+ VIEW_FIELD_INTERVALS_ATTRIBUTE = "intervals"
17
+ VIEW_FIELD_TIMEFRAMES_ATTRIBUTE = "timeframes"
@@ -2,6 +2,7 @@
2
2
  import json
3
3
  import logging
4
4
  import os
5
+ from enum import Enum
5
6
  from functools import lru_cache
6
7
  from typing import Dict, List, MutableMapping, Optional, Sequence, Set, Union, cast
7
8
 
@@ -31,6 +32,14 @@ from datahub.configuration.common import ConfigurationError
31
32
  logger = logging.getLogger(__name__)
32
33
 
33
34
 
35
+ class LookerQueryResponseFormat(Enum):
36
+ # result_format - Ref: https://cloud.google.com/looker/docs/reference/looker-api/latest/methods/Query/run_inline_query
37
+ JSON = "json"
38
+ SQL = (
39
+ "sql" # Note: This does not execute the query, it only generates the SQL query.
40
+ )
41
+
42
+
34
43
  class TransportOptionsConfig(ConfigModel):
35
44
  timeout: int
36
45
  headers: MutableMapping[str, str]
@@ -69,6 +78,7 @@ class LookerAPIStats(BaseModel):
69
78
  search_looks_calls: int = 0
70
79
  search_dashboards_calls: int = 0
71
80
  all_user_calls: int = 0
81
+ generate_sql_query_calls: int = 0
72
82
 
73
83
 
74
84
  class LookerAPI:
@@ -170,17 +180,40 @@ class LookerAPI:
170
180
  logger.debug(f"Executing query {write_query}")
171
181
  self.client_stats.query_calls += 1
172
182
 
173
- response_json = self.client.run_inline_query(
174
- result_format="json",
183
+ response = self.client.run_inline_query(
184
+ result_format=LookerQueryResponseFormat.JSON.value,
175
185
  body=write_query,
176
186
  transport_options=self.transport_options,
177
187
  )
178
188
 
189
+ data = json.loads(response)
190
+
179
191
  logger.debug("=================Response=================")
180
- data = json.loads(response_json)
181
192
  logger.debug("Length of response: %d", len(data))
182
193
  return data
183
194
 
195
+ def generate_sql_query(
196
+ self, write_query: WriteQuery, use_cache: bool = False
197
+ ) -> str:
198
+ """
199
+ Generates a SQL query string for a given WriteQuery.
200
+
201
+ Note: This does not execute the query, it only generates the SQL query.
202
+ """
203
+ logger.debug(f"Generating SQL query for {write_query}")
204
+ self.client_stats.generate_sql_query_calls += 1
205
+
206
+ response = self.client.run_inline_query(
207
+ result_format=LookerQueryResponseFormat.SQL.value,
208
+ body=write_query,
209
+ transport_options=self.transport_options,
210
+ cache=use_cache,
211
+ )
212
+
213
+ logger.debug("=================Response=================")
214
+ logger.debug("Length of SQL response: %d", len(response))
215
+ return str(response)
216
+
184
217
  def dashboard(self, dashboard_id: str, fields: Union[str, List[str]]) -> Dashboard:
185
218
  self.client_stats.dashboard_calls += 1
186
219
  return self.client.dashboard(
@@ -1,5 +1,5 @@
1
1
  from functools import lru_cache
2
- from typing import ClassVar, Optional, TextIO
2
+ from typing import ClassVar, Optional, TextIO, Type
3
3
 
4
4
  from liquid import Environment
5
5
  from liquid.ast import Node
@@ -20,16 +20,27 @@ class CustomTagException(Exception):
20
20
  class ConditionNode(Node):
21
21
  def __init__(self, tok: Token, sql_or_lookml_reference: str, filter_name: str):
22
22
  self.tok = tok
23
-
24
23
  self.sql_or_lookml_reference = sql_or_lookml_reference
25
-
26
24
  self.filter_name = filter_name
27
25
 
28
26
  def render_to_output(self, context: Context, buffer: TextIO) -> Optional[bool]:
29
27
  # This implementation will make sure that sql parse work correctly if looker condition tag
30
28
  # is used in lookml sql field
31
29
  buffer.write(f"{self.sql_or_lookml_reference}='dummy_value'")
30
+ return True
32
31
 
32
+
33
+ class IncrementConditionNode(Node):
34
+ def __init__(self, tok: Token, sql_or_lookml_reference: str):
35
+ self.tok = tok
36
+ self.sql_or_lookml_reference = sql_or_lookml_reference
37
+
38
+ def render_to_output(self, context: Context, buffer: TextIO) -> Optional[bool]:
39
+ # For incrementcondition, we need to generate a condition that would be used
40
+ # in incremental PDT updates. This typically involves date/time comparisons.
41
+ # We'll render it as a date comparison with a placeholder value
42
+ # See details in Looker documentation for incrementcondition tag -> cloud.google.com/looker/docs/reference/param-view-increment-key
43
+ buffer.write(f"{self.sql_or_lookml_reference} > '2023-01-01'")
33
44
  return True
34
45
 
35
46
 
@@ -44,7 +55,6 @@ class ConditionTag(Tag):
44
55
  This class render the below tag as order.region='ap-south-1' if order_region is provided in config.liquid_variables
45
56
  as order_region: 'ap-south-1'
46
57
  {% condition order_region %} order.region {% endcondition %}
47
-
48
58
  """
49
59
 
50
60
  TAG_START: ClassVar[str] = "condition"
@@ -79,7 +89,48 @@ class ConditionTag(Tag):
79
89
  )
80
90
 
81
91
 
82
- custom_tags = [ConditionTag]
92
+ class IncrementConditionTag(Tag):
93
+ """
94
+ IncrementConditionTag is the equivalent implementation of looker's custom liquid tag "incrementcondition".
95
+ Refer doc: https://cloud.google.com/looker/docs/incremental-pdts#using_the_incrementcondition_tag
96
+
97
+ This tag is used for incremental PDTs to determine which records should be updated.
98
+ It typically works with date/time fields to filter data that has changed since the last update.
99
+
100
+ Example usage in Looker:
101
+ {% incrementcondition created_at %} order.created_at {% endincrementcondition %}
102
+
103
+ This would generate SQL like: order.created_at > '2023-01-01 00:00:00'
104
+ """
105
+
106
+ TAG_START: ClassVar[str] = "incrementcondition"
107
+ TAG_END: ClassVar[str] = "endincrementcondition"
108
+ name: str = "incrementcondition"
109
+
110
+ def __init__(self, env: Environment):
111
+ super().__init__(env)
112
+ self.parser = get_parser(self.env)
113
+
114
+ def parse(self, stream: TokenStream) -> Node:
115
+ expect(stream, TOKEN_TAG, value=IncrementConditionTag.TAG_START)
116
+
117
+ start_token = stream.current
118
+
119
+ stream.next_token()
120
+ expect(stream, TOKEN_LITERAL)
121
+ sql_or_lookml_reference: str = stream.current.value.strip()
122
+
123
+ stream.next_token()
124
+ expect(stream, TOKEN_TAG, value=IncrementConditionTag.TAG_END)
125
+
126
+ return IncrementConditionNode(
127
+ tok=start_token,
128
+ sql_or_lookml_reference=sql_or_lookml_reference,
129
+ )
130
+
131
+
132
+ # Updated custom_tags list to include both tags
133
+ custom_tags: list[Type[Tag]] = [ConditionTag, IncrementConditionTag]
83
134
 
84
135
 
85
136
  @string_filter