acryl-datahub 1.0.0rc18__py3-none-any.whl → 1.3.0.1rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (503) hide show
  1. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/METADATA +2686 -2563
  2. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/RECORD +499 -392
  3. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/entry_points.txt +7 -1
  5. datahub/_version.py +1 -1
  6. datahub/api/circuit_breaker/operation_circuit_breaker.py +2 -2
  7. datahub/api/entities/assertion/assertion.py +1 -1
  8. datahub/api/entities/common/serialized_value.py +1 -1
  9. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  10. datahub/api/entities/datacontract/datacontract.py +35 -3
  11. datahub/api/entities/datajob/dataflow.py +18 -3
  12. datahub/api/entities/datajob/datajob.py +24 -4
  13. datahub/api/entities/dataprocess/dataprocess_instance.py +4 -0
  14. datahub/api/entities/dataproduct/dataproduct.py +32 -3
  15. datahub/api/entities/dataset/dataset.py +47 -72
  16. datahub/api/entities/external/__init__.py +0 -0
  17. datahub/api/entities/external/external_entities.py +724 -0
  18. datahub/api/entities/external/external_tag.py +147 -0
  19. datahub/api/entities/external/lake_formation_external_entites.py +162 -0
  20. datahub/api/entities/external/restricted_text.py +172 -0
  21. datahub/api/entities/external/unity_catalog_external_entites.py +172 -0
  22. datahub/api/entities/forms/forms.py +37 -37
  23. datahub/api/entities/structuredproperties/structuredproperties.py +6 -6
  24. datahub/api/graphql/assertion.py +1 -1
  25. datahub/api/graphql/base.py +8 -6
  26. datahub/api/graphql/operation.py +14 -10
  27. datahub/cli/check_cli.py +91 -9
  28. datahub/cli/cli_utils.py +63 -0
  29. datahub/cli/config_utils.py +20 -12
  30. datahub/cli/container_cli.py +5 -0
  31. datahub/cli/delete_cli.py +133 -34
  32. datahub/cli/docker_check.py +110 -14
  33. datahub/cli/docker_cli.py +155 -231
  34. datahub/cli/exists_cli.py +2 -3
  35. datahub/cli/get_cli.py +2 -3
  36. datahub/cli/graphql_cli.py +1422 -0
  37. datahub/cli/iceberg_cli.py +11 -5
  38. datahub/cli/ingest_cli.py +25 -26
  39. datahub/cli/migrate.py +12 -9
  40. datahub/cli/migration_utils.py +4 -3
  41. datahub/cli/put_cli.py +4 -6
  42. datahub/cli/quickstart_versioning.py +53 -10
  43. datahub/cli/specific/assertions_cli.py +39 -7
  44. datahub/cli/specific/datacontract_cli.py +57 -9
  45. datahub/cli/specific/dataproduct_cli.py +12 -24
  46. datahub/cli/specific/dataset_cli.py +31 -21
  47. datahub/cli/specific/forms_cli.py +2 -5
  48. datahub/cli/specific/group_cli.py +2 -3
  49. datahub/cli/specific/structuredproperties_cli.py +5 -7
  50. datahub/cli/specific/user_cli.py +174 -4
  51. datahub/cli/state_cli.py +2 -3
  52. datahub/cli/timeline_cli.py +2 -3
  53. datahub/configuration/common.py +46 -2
  54. datahub/configuration/connection_resolver.py +5 -2
  55. datahub/configuration/env_vars.py +331 -0
  56. datahub/configuration/import_resolver.py +7 -4
  57. datahub/configuration/kafka.py +21 -1
  58. datahub/configuration/pydantic_migration_helpers.py +6 -13
  59. datahub/configuration/source_common.py +4 -3
  60. datahub/configuration/validate_field_deprecation.py +5 -2
  61. datahub/configuration/validate_field_removal.py +8 -2
  62. datahub/configuration/validate_field_rename.py +6 -5
  63. datahub/configuration/validate_multiline_string.py +5 -2
  64. datahub/emitter/mce_builder.py +12 -8
  65. datahub/emitter/mcp.py +20 -5
  66. datahub/emitter/mcp_builder.py +12 -0
  67. datahub/emitter/request_helper.py +138 -15
  68. datahub/emitter/response_helper.py +111 -19
  69. datahub/emitter/rest_emitter.py +399 -163
  70. datahub/entrypoints.py +10 -5
  71. datahub/errors.py +12 -0
  72. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +299 -2
  73. datahub/ingestion/api/auto_work_units/auto_validate_input_fields.py +87 -0
  74. datahub/ingestion/api/common.py +9 -0
  75. datahub/ingestion/api/decorators.py +15 -3
  76. datahub/ingestion/api/report.py +381 -3
  77. datahub/ingestion/api/sink.py +27 -2
  78. datahub/ingestion/api/source.py +174 -62
  79. datahub/ingestion/api/source_helpers.py +41 -3
  80. datahub/ingestion/api/source_protocols.py +23 -0
  81. datahub/ingestion/autogenerated/__init__.py +0 -0
  82. datahub/ingestion/autogenerated/capability_summary.json +3652 -0
  83. datahub/ingestion/autogenerated/lineage.json +402 -0
  84. datahub/ingestion/autogenerated/lineage_helper.py +177 -0
  85. datahub/ingestion/extractor/schema_util.py +31 -5
  86. datahub/ingestion/glossary/classification_mixin.py +9 -2
  87. datahub/ingestion/graph/client.py +492 -55
  88. datahub/ingestion/graph/config.py +18 -2
  89. datahub/ingestion/graph/filters.py +96 -32
  90. datahub/ingestion/graph/links.py +55 -0
  91. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +21 -11
  92. datahub/ingestion/run/pipeline.py +90 -23
  93. datahub/ingestion/run/pipeline_config.py +3 -3
  94. datahub/ingestion/sink/datahub_kafka.py +1 -0
  95. datahub/ingestion/sink/datahub_rest.py +31 -23
  96. datahub/ingestion/sink/file.py +1 -0
  97. datahub/ingestion/source/abs/config.py +1 -1
  98. datahub/ingestion/source/abs/datalake_profiler_config.py +1 -1
  99. datahub/ingestion/source/abs/source.py +15 -30
  100. datahub/ingestion/source/apply/datahub_apply.py +6 -5
  101. datahub/ingestion/source/aws/aws_common.py +185 -13
  102. datahub/ingestion/source/aws/glue.py +517 -244
  103. datahub/ingestion/source/aws/platform_resource_repository.py +30 -0
  104. datahub/ingestion/source/aws/s3_boto_utils.py +100 -5
  105. datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +1 -1
  106. datahub/ingestion/source/aws/sagemaker_processors/models.py +4 -4
  107. datahub/ingestion/source/aws/tag_entities.py +270 -0
  108. datahub/ingestion/source/azure/azure_common.py +3 -3
  109. datahub/ingestion/source/bigquery_v2/bigquery.py +51 -7
  110. datahub/ingestion/source/bigquery_v2/bigquery_config.py +51 -81
  111. datahub/ingestion/source/bigquery_v2/bigquery_connection.py +81 -0
  112. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +6 -1
  113. datahub/ingestion/source/bigquery_v2/bigquery_report.py +0 -2
  114. datahub/ingestion/source/bigquery_v2/bigquery_schema.py +23 -16
  115. datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +20 -5
  116. datahub/ingestion/source/bigquery_v2/common.py +1 -1
  117. datahub/ingestion/source/bigquery_v2/lineage.py +1 -1
  118. datahub/ingestion/source/bigquery_v2/profiler.py +4 -2
  119. datahub/ingestion/source/bigquery_v2/queries.py +3 -3
  120. datahub/ingestion/source/bigquery_v2/queries_extractor.py +45 -9
  121. datahub/ingestion/source/cassandra/cassandra.py +7 -18
  122. datahub/ingestion/source/cassandra/cassandra_api.py +36 -0
  123. datahub/ingestion/source/cassandra/cassandra_config.py +20 -0
  124. datahub/ingestion/source/cassandra/cassandra_profiling.py +26 -24
  125. datahub/ingestion/source/cassandra/cassandra_utils.py +1 -2
  126. datahub/ingestion/source/common/data_platforms.py +23 -0
  127. datahub/ingestion/source/common/gcp_credentials_config.py +9 -1
  128. datahub/ingestion/source/common/subtypes.py +73 -1
  129. datahub/ingestion/source/data_lake_common/data_lake_utils.py +59 -10
  130. datahub/ingestion/source/data_lake_common/object_store.py +732 -0
  131. datahub/ingestion/source/data_lake_common/path_spec.py +87 -38
  132. datahub/ingestion/source/datahub/config.py +19 -5
  133. datahub/ingestion/source/datahub/datahub_database_reader.py +205 -36
  134. datahub/ingestion/source/datahub/datahub_source.py +11 -1
  135. datahub/ingestion/source/dbt/dbt_cloud.py +17 -10
  136. datahub/ingestion/source/dbt/dbt_common.py +270 -26
  137. datahub/ingestion/source/dbt/dbt_core.py +88 -47
  138. datahub/ingestion/source/dbt/dbt_tests.py +8 -6
  139. datahub/ingestion/source/debug/__init__.py +0 -0
  140. datahub/ingestion/source/debug/datahub_debug.py +300 -0
  141. datahub/ingestion/source/delta_lake/config.py +9 -5
  142. datahub/ingestion/source/delta_lake/source.py +8 -0
  143. datahub/ingestion/source/dremio/dremio_api.py +114 -73
  144. datahub/ingestion/source/dremio/dremio_aspects.py +3 -2
  145. datahub/ingestion/source/dremio/dremio_config.py +5 -4
  146. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
  147. datahub/ingestion/source/dremio/dremio_entities.py +6 -5
  148. datahub/ingestion/source/dremio/dremio_reporting.py +22 -3
  149. datahub/ingestion/source/dremio/dremio_source.py +228 -215
  150. datahub/ingestion/source/dremio/dremio_sql_queries.py +82 -21
  151. datahub/ingestion/source/dynamodb/dynamodb.py +19 -13
  152. datahub/ingestion/source/excel/__init__.py +0 -0
  153. datahub/ingestion/source/excel/config.py +92 -0
  154. datahub/ingestion/source/excel/excel_file.py +539 -0
  155. datahub/ingestion/source/excel/profiling.py +308 -0
  156. datahub/ingestion/source/excel/report.py +49 -0
  157. datahub/ingestion/source/excel/source.py +662 -0
  158. datahub/ingestion/source/excel/util.py +18 -0
  159. datahub/ingestion/source/feast.py +12 -14
  160. datahub/ingestion/source/file.py +3 -0
  161. datahub/ingestion/source/fivetran/config.py +67 -8
  162. datahub/ingestion/source/fivetran/fivetran.py +228 -43
  163. datahub/ingestion/source/fivetran/fivetran_log_api.py +42 -9
  164. datahub/ingestion/source/fivetran/fivetran_query.py +58 -36
  165. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  166. datahub/ingestion/source/fivetran/response_models.py +97 -0
  167. datahub/ingestion/source/gc/datahub_gc.py +0 -2
  168. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
  169. datahub/ingestion/source/gcs/gcs_source.py +53 -10
  170. datahub/ingestion/source/gcs/gcs_utils.py +36 -9
  171. datahub/ingestion/source/ge_data_profiler.py +146 -33
  172. datahub/ingestion/source/ge_profiling_config.py +26 -11
  173. datahub/ingestion/source/grafana/entity_mcp_builder.py +272 -0
  174. datahub/ingestion/source/grafana/field_utils.py +307 -0
  175. datahub/ingestion/source/grafana/grafana_api.py +142 -0
  176. datahub/ingestion/source/grafana/grafana_config.py +104 -0
  177. datahub/ingestion/source/grafana/grafana_source.py +522 -84
  178. datahub/ingestion/source/grafana/lineage.py +202 -0
  179. datahub/ingestion/source/grafana/models.py +137 -0
  180. datahub/ingestion/source/grafana/report.py +90 -0
  181. datahub/ingestion/source/grafana/types.py +16 -0
  182. datahub/ingestion/source/hex/__init__.py +0 -0
  183. datahub/ingestion/source/hex/api.py +402 -0
  184. datahub/ingestion/source/hex/constants.py +8 -0
  185. datahub/ingestion/source/hex/hex.py +311 -0
  186. datahub/ingestion/source/hex/mapper.py +412 -0
  187. datahub/ingestion/source/hex/model.py +78 -0
  188. datahub/ingestion/source/hex/query_fetcher.py +307 -0
  189. datahub/ingestion/source/iceberg/iceberg.py +385 -164
  190. datahub/ingestion/source/iceberg/iceberg_common.py +2 -2
  191. datahub/ingestion/source/iceberg/iceberg_profiler.py +25 -20
  192. datahub/ingestion/source/identity/azure_ad.py +1 -1
  193. datahub/ingestion/source/identity/okta.py +1 -14
  194. datahub/ingestion/source/kafka/kafka.py +28 -71
  195. datahub/ingestion/source/kafka/kafka_config.py +78 -0
  196. datahub/ingestion/source/kafka_connect/common.py +2 -2
  197. datahub/ingestion/source/kafka_connect/sink_connectors.py +157 -48
  198. datahub/ingestion/source/kafka_connect/source_connectors.py +63 -5
  199. datahub/ingestion/source/ldap.py +1 -1
  200. datahub/ingestion/source/looker/looker_common.py +216 -86
  201. datahub/ingestion/source/looker/looker_config.py +15 -4
  202. datahub/ingestion/source/looker/looker_constant.py +4 -0
  203. datahub/ingestion/source/looker/looker_lib_wrapper.py +37 -4
  204. datahub/ingestion/source/looker/looker_liquid_tag.py +56 -5
  205. datahub/ingestion/source/looker/looker_source.py +539 -555
  206. datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
  207. datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
  208. datahub/ingestion/source/looker/lookml_config.py +31 -3
  209. datahub/ingestion/source/looker/lookml_refinement.py +1 -1
  210. datahub/ingestion/source/looker/lookml_source.py +103 -118
  211. datahub/ingestion/source/looker/view_upstream.py +494 -1
  212. datahub/ingestion/source/metabase.py +32 -6
  213. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  214. datahub/ingestion/source/metadata/lineage.py +11 -10
  215. datahub/ingestion/source/mlflow.py +254 -23
  216. datahub/ingestion/source/mock_data/__init__.py +0 -0
  217. datahub/ingestion/source/mock_data/datahub_mock_data.py +533 -0
  218. datahub/ingestion/source/mock_data/datahub_mock_data_report.py +12 -0
  219. datahub/ingestion/source/mock_data/table_naming_helper.py +97 -0
  220. datahub/ingestion/source/mode.py +359 -181
  221. datahub/ingestion/source/mongodb.py +11 -1
  222. datahub/ingestion/source/neo4j/neo4j_source.py +122 -153
  223. datahub/ingestion/source/nifi.py +5 -5
  224. datahub/ingestion/source/openapi.py +85 -38
  225. datahub/ingestion/source/openapi_parser.py +59 -40
  226. datahub/ingestion/source/powerbi/config.py +92 -27
  227. datahub/ingestion/source/powerbi/m_query/data_classes.py +3 -0
  228. datahub/ingestion/source/powerbi/m_query/odbc.py +185 -0
  229. datahub/ingestion/source/powerbi/m_query/parser.py +2 -2
  230. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +358 -14
  231. datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
  232. datahub/ingestion/source/powerbi/powerbi.py +66 -32
  233. datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -2
  234. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -12
  235. datahub/ingestion/source/powerbi_report_server/report_server.py +0 -23
  236. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +2 -4
  237. datahub/ingestion/source/preset.py +3 -3
  238. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  239. datahub/ingestion/source/qlik_sense/qlik_sense.py +2 -1
  240. datahub/ingestion/source/redash.py +1 -1
  241. datahub/ingestion/source/redshift/config.py +15 -9
  242. datahub/ingestion/source/redshift/datashares.py +1 -1
  243. datahub/ingestion/source/redshift/lineage.py +386 -687
  244. datahub/ingestion/source/redshift/profile.py +2 -2
  245. datahub/ingestion/source/redshift/query.py +24 -20
  246. datahub/ingestion/source/redshift/redshift.py +52 -111
  247. datahub/ingestion/source/redshift/redshift_schema.py +17 -12
  248. datahub/ingestion/source/redshift/report.py +0 -2
  249. datahub/ingestion/source/redshift/usage.py +13 -11
  250. datahub/ingestion/source/s3/report.py +4 -2
  251. datahub/ingestion/source/s3/source.py +515 -244
  252. datahub/ingestion/source/sac/sac.py +3 -1
  253. datahub/ingestion/source/salesforce.py +28 -13
  254. datahub/ingestion/source/schema/json_schema.py +14 -14
  255. datahub/ingestion/source/schema_inference/object.py +22 -6
  256. datahub/ingestion/source/sigma/config.py +75 -8
  257. datahub/ingestion/source/sigma/data_classes.py +3 -0
  258. datahub/ingestion/source/sigma/sigma.py +36 -7
  259. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  260. datahub/ingestion/source/slack/slack.py +403 -140
  261. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  262. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  263. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  264. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  265. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  266. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  267. datahub/ingestion/source/snowflake/constants.py +4 -0
  268. datahub/ingestion/source/snowflake/snowflake_config.py +103 -34
  269. datahub/ingestion/source/snowflake/snowflake_connection.py +47 -25
  270. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +25 -6
  271. datahub/ingestion/source/snowflake/snowflake_profiler.py +1 -6
  272. datahub/ingestion/source/snowflake/snowflake_queries.py +511 -107
  273. datahub/ingestion/source/snowflake/snowflake_query.py +100 -72
  274. datahub/ingestion/source/snowflake/snowflake_report.py +4 -2
  275. datahub/ingestion/source/snowflake/snowflake_schema.py +381 -16
  276. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +163 -52
  277. datahub/ingestion/source/snowflake/snowflake_summary.py +7 -1
  278. datahub/ingestion/source/snowflake/snowflake_tag.py +4 -1
  279. datahub/ingestion/source/snowflake/snowflake_usage_v2.py +8 -2
  280. datahub/ingestion/source/snowflake/snowflake_utils.py +62 -17
  281. datahub/ingestion/source/snowflake/snowflake_v2.py +56 -10
  282. datahub/ingestion/source/snowflake/stored_proc_lineage.py +143 -0
  283. datahub/ingestion/source/sql/athena.py +219 -26
  284. datahub/ingestion/source/sql/athena_properties_extractor.py +795 -0
  285. datahub/ingestion/source/sql/clickhouse.py +29 -9
  286. datahub/ingestion/source/sql/cockroachdb.py +5 -4
  287. datahub/ingestion/source/sql/druid.py +9 -4
  288. datahub/ingestion/source/sql/hana.py +3 -1
  289. datahub/ingestion/source/sql/hive.py +28 -8
  290. datahub/ingestion/source/sql/hive_metastore.py +24 -25
  291. datahub/ingestion/source/sql/mariadb.py +0 -1
  292. datahub/ingestion/source/sql/mssql/job_models.py +18 -2
  293. datahub/ingestion/source/sql/mssql/source.py +376 -62
  294. datahub/ingestion/source/sql/mysql.py +154 -4
  295. datahub/ingestion/source/sql/oracle.py +62 -11
  296. datahub/ingestion/source/sql/postgres.py +142 -6
  297. datahub/ingestion/source/sql/presto.py +20 -2
  298. datahub/ingestion/source/sql/sql_common.py +281 -49
  299. datahub/ingestion/source/sql/sql_config.py +1 -34
  300. datahub/ingestion/source/sql/sql_generic_profiler.py +2 -1
  301. datahub/ingestion/source/sql/sql_types.py +27 -2
  302. datahub/ingestion/source/sql/sqlalchemy_uri.py +68 -0
  303. datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
  304. datahub/ingestion/source/sql/stored_procedures/base.py +253 -0
  305. datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +2 -29
  306. datahub/ingestion/source/sql/teradata.py +1028 -245
  307. datahub/ingestion/source/sql/trino.py +43 -10
  308. datahub/ingestion/source/sql/two_tier_sql_source.py +3 -4
  309. datahub/ingestion/source/sql/vertica.py +14 -7
  310. datahub/ingestion/source/sql_queries.py +219 -121
  311. datahub/ingestion/source/state/checkpoint.py +8 -29
  312. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  313. datahub/ingestion/source/state/redundant_run_skip_handler.py +21 -0
  314. datahub/ingestion/source/state/stale_entity_removal_handler.py +0 -1
  315. datahub/ingestion/source/state/stateful_ingestion_base.py +36 -11
  316. datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider.py +2 -1
  317. datahub/ingestion/source/superset.py +810 -126
  318. datahub/ingestion/source/tableau/tableau.py +172 -69
  319. datahub/ingestion/source/tableau/tableau_common.py +11 -4
  320. datahub/ingestion/source/tableau/tableau_constant.py +1 -4
  321. datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
  322. datahub/ingestion/source/tableau/tableau_validation.py +1 -1
  323. datahub/ingestion/source/unity/config.py +161 -40
  324. datahub/ingestion/source/unity/connection.py +61 -0
  325. datahub/ingestion/source/unity/connection_test.py +1 -0
  326. datahub/ingestion/source/unity/platform_resource_repository.py +19 -0
  327. datahub/ingestion/source/unity/proxy.py +794 -51
  328. datahub/ingestion/source/unity/proxy_patch.py +321 -0
  329. datahub/ingestion/source/unity/proxy_types.py +36 -2
  330. datahub/ingestion/source/unity/report.py +15 -3
  331. datahub/ingestion/source/unity/source.py +465 -131
  332. datahub/ingestion/source/unity/tag_entities.py +197 -0
  333. datahub/ingestion/source/unity/usage.py +46 -4
  334. datahub/ingestion/source/usage/clickhouse_usage.py +11 -4
  335. datahub/ingestion/source/usage/starburst_trino_usage.py +10 -5
  336. datahub/ingestion/source/usage/usage_common.py +4 -68
  337. datahub/ingestion/source/vertexai/__init__.py +0 -0
  338. datahub/ingestion/source/vertexai/vertexai.py +1367 -0
  339. datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
  340. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +89 -0
  341. datahub/ingestion/source_config/pulsar.py +3 -1
  342. datahub/ingestion/source_report/ingestion_stage.py +50 -11
  343. datahub/ingestion/transformer/add_dataset_dataproduct.py +1 -1
  344. datahub/ingestion/transformer/add_dataset_ownership.py +19 -3
  345. datahub/ingestion/transformer/base_transformer.py +8 -5
  346. datahub/ingestion/transformer/dataset_domain.py +1 -1
  347. datahub/ingestion/transformer/set_browse_path.py +112 -0
  348. datahub/integrations/assertion/common.py +3 -2
  349. datahub/integrations/assertion/snowflake/compiler.py +4 -3
  350. datahub/lite/lite_util.py +2 -2
  351. datahub/metadata/{_schema_classes.py → _internal_schema_classes.py} +3095 -631
  352. datahub/metadata/_urns/urn_defs.py +1866 -1582
  353. datahub/metadata/com/linkedin/pegasus2avro/application/__init__.py +19 -0
  354. datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
  355. datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
  356. datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
  357. datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
  358. datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
  359. datahub/metadata/com/linkedin/pegasus2avro/file/__init__.py +19 -0
  360. datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py +2 -0
  361. datahub/metadata/com/linkedin/pegasus2avro/logical/__init__.py +15 -0
  362. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +8 -0
  363. datahub/metadata/com/linkedin/pegasus2avro/module/__init__.py +31 -0
  364. datahub/metadata/com/linkedin/pegasus2avro/platform/event/v1/__init__.py +4 -0
  365. datahub/metadata/com/linkedin/pegasus2avro/role/__init__.py +2 -0
  366. datahub/metadata/com/linkedin/pegasus2avro/settings/asset/__init__.py +19 -0
  367. datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +8 -0
  368. datahub/metadata/com/linkedin/pegasus2avro/template/__init__.py +31 -0
  369. datahub/metadata/schema.avsc +18404 -16617
  370. datahub/metadata/schema_classes.py +3 -3
  371. datahub/metadata/schemas/Actors.avsc +38 -1
  372. datahub/metadata/schemas/ApplicationKey.avsc +31 -0
  373. datahub/metadata/schemas/ApplicationProperties.avsc +72 -0
  374. datahub/metadata/schemas/Applications.avsc +38 -0
  375. datahub/metadata/schemas/AssetSettings.avsc +63 -0
  376. datahub/metadata/schemas/ChartInfo.avsc +2 -1
  377. datahub/metadata/schemas/ChartKey.avsc +1 -0
  378. datahub/metadata/schemas/ContainerKey.avsc +1 -0
  379. datahub/metadata/schemas/ContainerProperties.avsc +8 -0
  380. datahub/metadata/schemas/CorpUserEditableInfo.avsc +15 -1
  381. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  382. datahub/metadata/schemas/CorpUserSettings.avsc +145 -0
  383. datahub/metadata/schemas/DashboardKey.avsc +1 -0
  384. datahub/metadata/schemas/DataContractKey.avsc +2 -1
  385. datahub/metadata/schemas/DataFlowInfo.avsc +8 -0
  386. datahub/metadata/schemas/DataFlowKey.avsc +1 -0
  387. datahub/metadata/schemas/DataHubFileInfo.avsc +230 -0
  388. datahub/metadata/schemas/DataHubFileKey.avsc +21 -0
  389. datahub/metadata/schemas/DataHubIngestionSourceKey.avsc +2 -1
  390. datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  391. datahub/metadata/schemas/DataHubPageModuleKey.avsc +21 -0
  392. datahub/metadata/schemas/DataHubPageModuleProperties.avsc +298 -0
  393. datahub/metadata/schemas/DataHubPageTemplateKey.avsc +21 -0
  394. datahub/metadata/schemas/DataHubPageTemplateProperties.avsc +251 -0
  395. datahub/metadata/schemas/DataHubPolicyInfo.avsc +12 -1
  396. datahub/metadata/schemas/DataJobInfo.avsc +8 -0
  397. datahub/metadata/schemas/DataJobInputOutput.avsc +8 -0
  398. datahub/metadata/schemas/DataJobKey.avsc +1 -0
  399. datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
  400. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
  401. datahub/metadata/schemas/DataProcessKey.avsc +8 -0
  402. datahub/metadata/schemas/DataProductKey.avsc +3 -1
  403. datahub/metadata/schemas/DataProductProperties.avsc +1 -1
  404. datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
  405. datahub/metadata/schemas/DatasetKey.avsc +11 -1
  406. datahub/metadata/schemas/DatasetUsageStatistics.avsc +8 -0
  407. datahub/metadata/schemas/Deprecation.avsc +2 -0
  408. datahub/metadata/schemas/DomainKey.avsc +2 -1
  409. datahub/metadata/schemas/ExecutionRequestInput.avsc +5 -0
  410. datahub/metadata/schemas/FormInfo.avsc +5 -0
  411. datahub/metadata/schemas/GlobalSettingsInfo.avsc +134 -0
  412. datahub/metadata/schemas/GlossaryNodeKey.avsc +2 -1
  413. datahub/metadata/schemas/GlossaryTermKey.avsc +3 -1
  414. datahub/metadata/schemas/IcebergWarehouseInfo.avsc +8 -0
  415. datahub/metadata/schemas/IncidentInfo.avsc +3 -3
  416. datahub/metadata/schemas/InstitutionalMemory.avsc +31 -0
  417. datahub/metadata/schemas/LogicalParent.avsc +145 -0
  418. datahub/metadata/schemas/MLFeatureKey.avsc +1 -0
  419. datahub/metadata/schemas/MLFeatureTableKey.avsc +1 -0
  420. datahub/metadata/schemas/MLModelDeploymentKey.avsc +8 -0
  421. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  422. datahub/metadata/schemas/MLModelGroupKey.avsc +11 -1
  423. datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
  424. datahub/metadata/schemas/MLModelKey.avsc +9 -0
  425. datahub/metadata/schemas/MLPrimaryKeyKey.avsc +1 -0
  426. datahub/metadata/schemas/MetadataChangeEvent.avsc +189 -47
  427. datahub/metadata/schemas/MetadataChangeLog.avsc +65 -44
  428. datahub/metadata/schemas/MetadataChangeProposal.avsc +64 -0
  429. datahub/metadata/schemas/NotebookKey.avsc +1 -0
  430. datahub/metadata/schemas/Operation.avsc +21 -2
  431. datahub/metadata/schemas/Ownership.avsc +69 -0
  432. datahub/metadata/schemas/QueryProperties.avsc +24 -2
  433. datahub/metadata/schemas/QuerySubjects.avsc +1 -12
  434. datahub/metadata/schemas/RelationshipChangeEvent.avsc +215 -0
  435. datahub/metadata/schemas/SchemaFieldKey.avsc +4 -1
  436. datahub/metadata/schemas/Siblings.avsc +2 -0
  437. datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
  438. datahub/metadata/schemas/StructuredProperties.avsc +69 -0
  439. datahub/metadata/schemas/StructuredPropertySettings.avsc +9 -0
  440. datahub/metadata/schemas/SystemMetadata.avsc +147 -0
  441. datahub/metadata/schemas/UpstreamLineage.avsc +9 -0
  442. datahub/metadata/schemas/__init__.py +3 -3
  443. datahub/sdk/__init__.py +7 -0
  444. datahub/sdk/_all_entities.py +15 -0
  445. datahub/sdk/_shared.py +393 -10
  446. datahub/sdk/_utils.py +4 -0
  447. datahub/sdk/chart.py +386 -0
  448. datahub/sdk/container.py +7 -0
  449. datahub/sdk/dashboard.py +453 -0
  450. datahub/sdk/dataflow.py +309 -0
  451. datahub/sdk/datajob.py +367 -0
  452. datahub/sdk/dataset.py +180 -4
  453. datahub/sdk/entity.py +99 -3
  454. datahub/sdk/entity_client.py +154 -12
  455. datahub/sdk/lineage_client.py +943 -0
  456. datahub/sdk/main_client.py +83 -8
  457. datahub/sdk/mlmodel.py +383 -0
  458. datahub/sdk/mlmodelgroup.py +240 -0
  459. datahub/sdk/search_client.py +85 -8
  460. datahub/sdk/search_filters.py +393 -68
  461. datahub/secret/datahub_secret_store.py +5 -1
  462. datahub/secret/environment_secret_store.py +29 -0
  463. datahub/secret/file_secret_store.py +49 -0
  464. datahub/specific/aspect_helpers/fine_grained_lineage.py +76 -0
  465. datahub/specific/aspect_helpers/siblings.py +73 -0
  466. datahub/specific/aspect_helpers/structured_properties.py +27 -0
  467. datahub/specific/chart.py +1 -1
  468. datahub/specific/datajob.py +15 -1
  469. datahub/specific/dataproduct.py +4 -0
  470. datahub/specific/dataset.py +51 -59
  471. datahub/sql_parsing/_sqlglot_patch.py +1 -2
  472. datahub/sql_parsing/fingerprint_utils.py +6 -0
  473. datahub/sql_parsing/split_statements.py +30 -3
  474. datahub/sql_parsing/sql_parsing_aggregator.py +144 -63
  475. datahub/sql_parsing/sqlglot_lineage.py +517 -44
  476. datahub/sql_parsing/sqlglot_utils.py +30 -18
  477. datahub/sql_parsing/tool_meta_extractor.py +25 -2
  478. datahub/telemetry/telemetry.py +30 -16
  479. datahub/testing/check_imports.py +1 -1
  480. datahub/testing/docker_utils.py +8 -2
  481. datahub/testing/mce_helpers.py +421 -0
  482. datahub/testing/mcp_diff.py +17 -21
  483. datahub/testing/sdk_v2_helpers.py +18 -0
  484. datahub/upgrade/upgrade.py +86 -30
  485. datahub/utilities/file_backed_collections.py +14 -15
  486. datahub/utilities/hive_schema_to_avro.py +2 -2
  487. datahub/utilities/ingest_utils.py +2 -2
  488. datahub/utilities/is_pytest.py +3 -2
  489. datahub/utilities/logging_manager.py +30 -7
  490. datahub/utilities/mapping.py +29 -2
  491. datahub/utilities/sample_data.py +5 -4
  492. datahub/utilities/server_config_util.py +298 -10
  493. datahub/utilities/sqlalchemy_query_combiner.py +6 -4
  494. datahub/utilities/stats_collections.py +4 -0
  495. datahub/utilities/threaded_iterator_executor.py +16 -3
  496. datahub/utilities/urn_encoder.py +1 -1
  497. datahub/utilities/urns/urn.py +41 -2
  498. datahub/emitter/sql_parsing_builder.py +0 -306
  499. datahub/ingestion/source/redshift/lineage_v2.py +0 -458
  500. datahub/ingestion/source/vertexai.py +0 -697
  501. datahub/ingestion/transformer/system_metadata_transformer.py +0 -45
  502. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info/licenses}/LICENSE +0 -0
  503. {acryl_datahub-1.0.0rc18.dist-info → acryl_datahub-1.3.0.1rc9.dist-info}/top_level.txt +0 -0
@@ -2,13 +2,14 @@ import logging
2
2
  import time
3
3
  import warnings
4
4
  from abc import ABC
5
- from typing import Dict, Iterable, Optional, Tuple
5
+ from typing import Dict, Iterable, List, Optional, Tuple
6
6
 
7
7
  from pydantic import validator
8
8
  from pydantic.fields import Field
9
9
 
10
10
  from datahub.configuration.common import ConfigModel
11
11
  from datahub.emitter.mce_builder import make_tag_urn
12
+ from datahub.emitter.mcp import MetadataChangeProposalWrapper
12
13
  from datahub.ingestion.api.common import PipelineContext
13
14
  from datahub.ingestion.api.decorators import (
14
15
  SourceCapability,
@@ -20,6 +21,7 @@ from datahub.ingestion.api.decorators import (
20
21
  )
21
22
  from datahub.ingestion.api.source import Source, SourceReport
22
23
  from datahub.ingestion.api.workunit import MetadataWorkUnit
24
+ from datahub.ingestion.source.common.subtypes import DatasetSubTypes
23
25
  from datahub.ingestion.source.openapi_parser import (
24
26
  clean_url,
25
27
  compose_url_attr,
@@ -32,14 +34,13 @@ from datahub.ingestion.source.openapi_parser import (
32
34
  set_metadata,
33
35
  try_guessing,
34
36
  )
35
- from datahub.metadata.com.linkedin.pegasus2avro.metadata.snapshot import DatasetSnapshot
36
- from datahub.metadata.com.linkedin.pegasus2avro.mxe import MetadataChangeEvent
37
37
  from datahub.metadata.schema_classes import (
38
38
  AuditStampClass,
39
39
  DatasetPropertiesClass,
40
40
  GlobalTagsClass,
41
41
  InstitutionalMemoryClass,
42
42
  InstitutionalMemoryMetadataClass,
43
+ SubTypesClass,
43
44
  TagAssociationClass,
44
45
  )
45
46
 
@@ -81,6 +82,9 @@ class OpenApiConfig(ConfigModel):
81
82
  get_token: dict = Field(
82
83
  default={}, description="Retrieving a token from the endpoint."
83
84
  )
85
+ verify_ssl: bool = Field(
86
+ default=True, description="Enable SSL certificate verification"
87
+ )
84
88
 
85
89
  @validator("bearer_token", always=True)
86
90
  def ensure_only_one_token(
@@ -101,7 +105,7 @@ class OpenApiConfig(ConfigModel):
101
105
  # details there once, and then use that session for all requests.
102
106
  self.token = f"Bearer {self.bearer_token}"
103
107
  else:
104
- assert "url_complement" in self.get_token.keys(), (
108
+ assert "url_complement" in self.get_token, (
105
109
  "When 'request_type' is set to 'get', an url_complement is needed for the request."
106
110
  )
107
111
  if self.get_token["request_type"] == "get":
@@ -128,12 +132,14 @@ class OpenApiConfig(ConfigModel):
128
132
  tok_url=url4req,
129
133
  method=self.get_token["request_type"],
130
134
  proxies=self.proxies,
135
+ verify_ssl=self.verify_ssl,
131
136
  )
132
137
  sw_dict = get_swag_json(
133
138
  self.url,
134
139
  token=self.token,
135
140
  swagger_file=self.swagger_file,
136
141
  proxies=self.proxies,
142
+ verify_ssl=self.verify_ssl,
137
143
  ) # load the swagger file
138
144
 
139
145
  else: # using basic auth for accessing endpoints
@@ -143,6 +149,7 @@ class OpenApiConfig(ConfigModel):
143
149
  password=self.password,
144
150
  swagger_file=self.swagger_file,
145
151
  proxies=self.proxies,
152
+ verify_ssl=self.verify_ssl,
146
153
  )
147
154
  return sw_dict
148
155
 
@@ -222,8 +229,9 @@ class APISource(Source, ABC):
222
229
 
223
230
  def init_dataset(
224
231
  self, endpoint_k: str, endpoint_dets: dict
225
- ) -> Tuple[DatasetSnapshot, str]:
232
+ ) -> Tuple[str, str, List[MetadataWorkUnit]]:
226
233
  config = self.config
234
+ workunits = []
227
235
 
228
236
  dataset_name = endpoint_k[1:].replace("/", ".")
229
237
 
@@ -233,22 +241,27 @@ class APISource(Source, ABC):
233
241
  else:
234
242
  dataset_name = "root"
235
243
 
236
- dataset_snapshot = DatasetSnapshot(
237
- urn=f"urn:li:dataset:(urn:li:dataPlatform:{self.platform},{config.name}.{dataset_name},PROD)",
238
- aspects=[],
239
- )
244
+ dataset_urn = f"urn:li:dataset:(urn:li:dataPlatform:{self.platform},{config.name}.{dataset_name},PROD)"
240
245
 
241
- # adding description
242
- dataset_properties = DatasetPropertiesClass(
246
+ # Create dataset properties aspect
247
+ properties = DatasetPropertiesClass(
243
248
  description=endpoint_dets["description"], customProperties={}
244
249
  )
245
- dataset_snapshot.aspects.append(dataset_properties)
250
+ wu = MetadataWorkUnit(
251
+ id=dataset_name,
252
+ mcp=MetadataChangeProposalWrapper(entityUrn=dataset_urn, aspect=properties),
253
+ )
254
+ workunits.append(wu)
246
255
 
247
- # adding tags
256
+ # Create tags aspect
248
257
  tags_str = [make_tag_urn(t) for t in endpoint_dets["tags"]]
249
258
  tags_tac = [TagAssociationClass(t) for t in tags_str]
250
259
  gtc = GlobalTagsClass(tags_tac)
251
- dataset_snapshot.aspects.append(gtc)
260
+ wu = MetadataWorkUnit(
261
+ id=f"{dataset_name}-tags",
262
+ mcp=MetadataChangeProposalWrapper(entityUrn=dataset_urn, aspect=gtc),
263
+ )
264
+ workunits.append(wu)
252
265
 
253
266
  # the link will appear in the "documentation"
254
267
  link_url = clean_url(config.url + self.url_basepath + endpoint_k)
@@ -260,17 +273,25 @@ class APISource(Source, ABC):
260
273
  url=link_url, description=link_description, createStamp=creation
261
274
  )
262
275
  inst_memory = InstitutionalMemoryClass([link_metadata])
263
- dataset_snapshot.aspects.append(inst_memory)
276
+ wu = MetadataWorkUnit(
277
+ id=f"{dataset_name}-docs",
278
+ mcp=MetadataChangeProposalWrapper(
279
+ entityUrn=dataset_urn, aspect=inst_memory
280
+ ),
281
+ )
282
+ workunits.append(wu)
264
283
 
265
- return dataset_snapshot, dataset_name
284
+ # Create subtype aspect
285
+ sub_types = SubTypesClass(typeNames=[DatasetSubTypes.API_ENDPOINT])
286
+ wu = MetadataWorkUnit(
287
+ id=f"{dataset_name}-subtype",
288
+ mcp=MetadataChangeProposalWrapper(entityUrn=dataset_urn, aspect=sub_types),
289
+ )
290
+ workunits.append(wu)
266
291
 
267
- def build_wu(
268
- self, dataset_snapshot: DatasetSnapshot, dataset_name: str
269
- ) -> ApiWorkUnit:
270
- mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
271
- return ApiWorkUnit(id=dataset_name, mce=mce)
292
+ return dataset_name, dataset_urn, workunits
272
293
 
273
- def get_workunits_internal(self) -> Iterable[ApiWorkUnit]:
294
+ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
274
295
  config = self.config
275
296
 
276
297
  sw_dict = self.config.get_swagger()
@@ -294,17 +315,25 @@ class APISource(Source, ABC):
294
315
  if endpoint_k in config.ignore_endpoints:
295
316
  continue
296
317
 
297
- dataset_snapshot, dataset_name = self.init_dataset(
318
+ # Initialize dataset and get common aspects
319
+ dataset_name, dataset_urn, workunits = self.init_dataset(
298
320
  endpoint_k, endpoint_dets
299
321
  )
322
+ for wu in workunits:
323
+ yield wu
300
324
 
301
- # adding dataset fields
302
- if "data" in endpoint_dets.keys():
325
+ # Handle schema metadata if available
326
+ if "data" in endpoint_dets:
303
327
  # we are lucky! data is defined in the swagger for this endpoint
304
328
  schema_metadata = set_metadata(dataset_name, endpoint_dets["data"])
305
- dataset_snapshot.aspects.append(schema_metadata)
306
- yield self.build_wu(dataset_snapshot, dataset_name)
307
- elif endpoint_dets["method"] != "get":
329
+ wu = MetadataWorkUnit(
330
+ id=f"{dataset_name}-schema",
331
+ mcp=MetadataChangeProposalWrapper(
332
+ entityUrn=dataset_urn, aspect=schema_metadata
333
+ ),
334
+ )
335
+ yield wu
336
+ elif endpoint_dets["method"] != "GET":
308
337
  self.report.report_warning(
309
338
  title="Failed to Extract Endpoint Metadata",
310
339
  message=f"No example provided for {endpoint_dets['method']}",
@@ -320,6 +349,7 @@ class APISource(Source, ABC):
320
349
  tot_url,
321
350
  token=config.token,
322
351
  proxies=config.proxies,
352
+ verify_ssl=config.verify_ssl,
323
353
  )
324
354
  else:
325
355
  response = request_call(
@@ -327,6 +357,7 @@ class APISource(Source, ABC):
327
357
  username=config.username,
328
358
  password=config.password,
329
359
  proxies=config.proxies,
360
+ verify_ssl=config.verify_ssl,
330
361
  )
331
362
  if response.status_code == 200:
332
363
  fields2add, root_dataset_samples[dataset_name] = extract_fields(
@@ -338,13 +369,17 @@ class APISource(Source, ABC):
338
369
  context=f"Endpoint Type: {endpoint_k}, Name: {dataset_name}",
339
370
  )
340
371
  schema_metadata = set_metadata(dataset_name, fields2add)
341
- dataset_snapshot.aspects.append(schema_metadata)
342
-
343
- yield self.build_wu(dataset_snapshot, dataset_name)
372
+ wu = MetadataWorkUnit(
373
+ id=f"{dataset_name}-schema",
374
+ mcp=MetadataChangeProposalWrapper(
375
+ entityUrn=dataset_urn, aspect=schema_metadata
376
+ ),
377
+ )
378
+ yield wu
344
379
  else:
345
380
  self.report_bad_responses(response.status_code, type=endpoint_k)
346
381
  else:
347
- if endpoint_k not in config.forced_examples.keys():
382
+ if endpoint_k not in config.forced_examples:
348
383
  # start guessing...
349
384
  url_guess = try_guessing(endpoint_k, root_dataset_samples)
350
385
  tot_url = clean_url(config.url + self.url_basepath + url_guess)
@@ -353,6 +388,7 @@ class APISource(Source, ABC):
353
388
  tot_url,
354
389
  token=config.token,
355
390
  proxies=config.proxies,
391
+ verify_ssl=config.verify_ssl,
356
392
  )
357
393
  else:
358
394
  response = request_call(
@@ -360,6 +396,7 @@ class APISource(Source, ABC):
360
396
  username=config.username,
361
397
  password=config.password,
362
398
  proxies=config.proxies,
399
+ verify_ssl=config.verify_ssl,
363
400
  )
364
401
  if response.status_code == 200:
365
402
  fields2add, _ = extract_fields(response, dataset_name)
@@ -369,9 +406,13 @@ class APISource(Source, ABC):
369
406
  context=f"Endpoint Type: {endpoint_k}, Name: {dataset_name}",
370
407
  )
371
408
  schema_metadata = set_metadata(dataset_name, fields2add)
372
- dataset_snapshot.aspects.append(schema_metadata)
373
-
374
- yield self.build_wu(dataset_snapshot, dataset_name)
409
+ wu = MetadataWorkUnit(
410
+ id=f"{dataset_name}-schema",
411
+ mcp=MetadataChangeProposalWrapper(
412
+ entityUrn=dataset_urn, aspect=schema_metadata
413
+ ),
414
+ )
415
+ yield wu
375
416
  else:
376
417
  self.report_bad_responses(response.status_code, type=endpoint_k)
377
418
  else:
@@ -384,6 +425,7 @@ class APISource(Source, ABC):
384
425
  tot_url,
385
426
  token=config.token,
386
427
  proxies=config.proxies,
428
+ verify_ssl=config.verify_ssl,
387
429
  )
388
430
  else:
389
431
  response = request_call(
@@ -391,6 +433,7 @@ class APISource(Source, ABC):
391
433
  username=config.username,
392
434
  password=config.password,
393
435
  proxies=config.proxies,
436
+ verify_ssl=config.verify_ssl,
394
437
  )
395
438
  if response.status_code == 200:
396
439
  fields2add, _ = extract_fields(response, dataset_name)
@@ -400,9 +443,13 @@ class APISource(Source, ABC):
400
443
  context=f"Endpoint Type: {endpoint_k}, Name: {dataset_name}",
401
444
  )
402
445
  schema_metadata = set_metadata(dataset_name, fields2add)
403
- dataset_snapshot.aspects.append(schema_metadata)
404
-
405
- yield self.build_wu(dataset_snapshot, dataset_name)
446
+ wu = MetadataWorkUnit(
447
+ id=f"{dataset_name}-schema",
448
+ mcp=MetadataChangeProposalWrapper(
449
+ entityUrn=dataset_urn, aspect=schema_metadata
450
+ ),
451
+ )
452
+ yield wu
406
453
  else:
407
454
  self.report_bad_responses(response.status_code, type=endpoint_k)
408
455
 
@@ -59,17 +59,21 @@ def request_call(
59
59
  username: Optional[str] = None,
60
60
  password: Optional[str] = None,
61
61
  proxies: Optional[dict] = None,
62
+ verify_ssl: bool = True,
62
63
  ) -> requests.Response:
63
64
  headers = {"accept": "application/json"}
64
65
  if username is not None and password is not None:
65
66
  return requests.get(
66
- url, headers=headers, auth=HTTPBasicAuth(username, password)
67
+ url,
68
+ headers=headers,
69
+ auth=HTTPBasicAuth(username, password),
70
+ verify=verify_ssl,
67
71
  )
68
72
  elif token is not None:
69
73
  headers["Authorization"] = f"{token}"
70
- return requests.get(url, proxies=proxies, headers=headers)
74
+ return requests.get(url, proxies=proxies, headers=headers, verify=verify_ssl)
71
75
  else:
72
- return requests.get(url, headers=headers)
76
+ return requests.get(url, headers=headers, verify=verify_ssl)
73
77
 
74
78
 
75
79
  def get_swag_json(
@@ -79,10 +83,16 @@ def get_swag_json(
79
83
  password: Optional[str] = None,
80
84
  swagger_file: str = "",
81
85
  proxies: Optional[dict] = None,
86
+ verify_ssl: bool = True,
82
87
  ) -> Dict:
83
88
  tot_url = url + swagger_file
84
89
  response = request_call(
85
- url=tot_url, token=token, username=username, password=password, proxies=proxies
90
+ url=tot_url,
91
+ token=token,
92
+ username=username,
93
+ password=password,
94
+ proxies=proxies,
95
+ verify_ssl=verify_ssl,
86
96
  )
87
97
 
88
98
  if response.status_code != 200:
@@ -127,37 +137,45 @@ def get_endpoints(sw_dict: dict) -> dict:
127
137
  check_sw_version(sw_dict)
128
138
 
129
139
  for p_k, p_o in sw_dict["paths"].items():
130
- method = list(p_o)[0]
131
- if "200" in p_o[method]["responses"].keys():
132
- base_res = p_o[method]["responses"]["200"]
133
- elif 200 in p_o[method]["responses"].keys():
134
- # if you read a plain yml file the 200 will be an integer
135
- base_res = p_o[method]["responses"][200]
136
- else:
137
- # the endpoint does not have a 200 response
138
- continue
139
-
140
- if "description" in p_o[method].keys():
141
- desc = p_o[method]["description"]
142
- elif "summary" in p_o[method].keys():
143
- desc = p_o[method]["summary"]
144
- else: # still testing
145
- desc = ""
146
-
147
- try:
148
- tags = p_o[method]["tags"]
149
- except KeyError:
150
- tags = []
151
-
152
- url_details[p_k] = {"description": desc, "tags": tags, "method": method}
153
-
154
- example_data = check_for_api_example_data(base_res, p_k)
155
- if example_data:
156
- url_details[p_k]["data"] = example_data
157
-
158
- # checking whether there are defined parameters to execute the call...
159
- if "parameters" in p_o[method].keys():
160
- url_details[p_k]["parameters"] = p_o[method]["parameters"]
140
+ for method, method_spec in p_o.items():
141
+ # skip non-method keys like "parameters"
142
+ if method.lower() not in [
143
+ "get",
144
+ "post",
145
+ "put",
146
+ "delete",
147
+ "patch",
148
+ "options",
149
+ "head",
150
+ ]:
151
+ continue
152
+
153
+ responses = method_spec.get("responses", {})
154
+ base_res = responses.get("200") or responses.get(200)
155
+ if not base_res:
156
+ # if there is no 200 response, we skip this method
157
+ continue
158
+
159
+ # if the description is not present, we will use the summary
160
+ # if both are not present, we will use an empty string
161
+ desc = method_spec.get("description") or method_spec.get("summary", "")
162
+
163
+ # if the tags are not present, we will use an empty list
164
+ tags = method_spec.get("tags", [])
165
+
166
+ url_details[p_k] = {
167
+ "description": desc,
168
+ "tags": tags,
169
+ "method": method.upper(),
170
+ }
171
+
172
+ example_data = check_for_api_example_data(base_res, p_k)
173
+ if example_data:
174
+ url_details[p_k]["data"] = example_data
175
+
176
+ # checking whether there are defined parameters to execute the call...
177
+ if "parameters" in p_o[method]:
178
+ url_details[p_k]["parameters"] = p_o[method]["parameters"]
161
179
 
162
180
  return dict(sorted(url_details.items()))
163
181
 
@@ -169,7 +187,7 @@ def check_for_api_example_data(base_res: dict, key: str) -> dict:
169
187
  data = {}
170
188
  if "content" in base_res:
171
189
  res_cont = base_res["content"]
172
- if "application/json" in res_cont.keys():
190
+ if "application/json" in res_cont:
173
191
  ex_field = None
174
192
  if "example" in res_cont["application/json"]:
175
193
  ex_field = "example"
@@ -186,7 +204,7 @@ def check_for_api_example_data(base_res: dict, key: str) -> dict:
186
204
  logger.warning(
187
205
  f"Field in swagger file does not give consistent data --- {key}"
188
206
  )
189
- elif "text/csv" in res_cont.keys():
207
+ elif "text/csv" in res_cont:
190
208
  data = res_cont["text/csv"]["schema"]
191
209
  elif "examples" in base_res:
192
210
  data = base_res["examples"]["application/json"]
@@ -239,7 +257,7 @@ def guessing_url_name(url: str, examples: dict) -> str:
239
257
 
240
258
  # substituting the parameter's name w the value
241
259
  for name, clean_name in zip(needed_n, cleaned_needed_n):
242
- if clean_name in examples[ex2use].keys():
260
+ if clean_name in examples[ex2use]:
243
261
  guessed_url = re.sub(name, str(examples[ex2use][clean_name]), guessed_url)
244
262
 
245
263
  return guessed_url
@@ -358,6 +376,7 @@ def get_tok(
358
376
  tok_url: str = "",
359
377
  method: str = "post",
360
378
  proxies: Optional[dict] = None,
379
+ verify_ssl: bool = True,
361
380
  ) -> str:
362
381
  """
363
382
  Trying to post username/password to get auth.
@@ -368,7 +387,7 @@ def get_tok(
368
387
  # this will make a POST call with username and password
369
388
  data = {"username": username, "password": password, "maxDuration": True}
370
389
  # url2post = url + "api/authenticate/"
371
- response = requests.post(url4req, proxies=proxies, json=data)
390
+ response = requests.post(url4req, proxies=proxies, json=data, verify=verify_ssl)
372
391
  if response.status_code == 200:
373
392
  cont = json.loads(response.content)
374
393
  if "token" in cont: # other authentication scheme
@@ -377,7 +396,7 @@ def get_tok(
377
396
  token = f"Bearer {cont['tokens']['access']}"
378
397
  elif method == "get":
379
398
  # this will make a GET call with username and password
380
- response = requests.get(url4req)
399
+ response = requests.get(url4req, verify=verify_ssl)
381
400
  if response.status_code == 200:
382
401
  cont = json.loads(response.content)
383
402
  token = cont["token"]