acryl-datahub 0.15.0rc20__py3-none-any.whl → 0.15.0rc22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (28) hide show
  1. {acryl_datahub-0.15.0rc20.dist-info → acryl_datahub-0.15.0rc22.dist-info}/METADATA +2478 -2478
  2. {acryl_datahub-0.15.0rc20.dist-info → acryl_datahub-0.15.0rc22.dist-info}/RECORD +28 -26
  3. datahub/__init__.py +1 -1
  4. datahub/api/entities/structuredproperties/structuredproperties.py +56 -68
  5. datahub/cli/ingest_cli.py +110 -0
  6. datahub/emitter/rest_emitter.py +17 -4
  7. datahub/ingestion/sink/datahub_rest.py +12 -1
  8. datahub/ingestion/source/datahub/datahub_database_reader.py +41 -21
  9. datahub/ingestion/source/datahub/datahub_source.py +8 -1
  10. datahub/ingestion/source/kafka/kafka_connect.py +81 -51
  11. datahub/ingestion/source/s3/source.py +2 -3
  12. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +2 -1
  13. datahub/ingestion/source/snowflake/snowflake_query.py +13 -0
  14. datahub/ingestion/source/snowflake/snowflake_schema.py +16 -0
  15. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +23 -0
  16. datahub/ingestion/source/tableau/tableau.py +42 -3
  17. datahub/ingestion/source/tableau/tableau_common.py +12 -5
  18. datahub/ingestion/source/tableau/tableau_constant.py +2 -0
  19. datahub/ingestion/source/tableau/tableau_server_wrapper.py +33 -0
  20. datahub/ingestion/source/tableau/tableau_validation.py +48 -0
  21. datahub/metadata/_schema_classes.py +400 -400
  22. datahub/metadata/_urns/urn_defs.py +1355 -1355
  23. datahub/metadata/schema.avsc +17221 -17574
  24. datahub/testing/compare_metadata_json.py +1 -1
  25. datahub/utilities/file_backed_collections.py +35 -2
  26. {acryl_datahub-0.15.0rc20.dist-info → acryl_datahub-0.15.0rc22.dist-info}/WHEEL +0 -0
  27. {acryl_datahub-0.15.0rc20.dist-info → acryl_datahub-0.15.0rc22.dist-info}/entry_points.txt +0 -0
  28. {acryl_datahub-0.15.0rc20.dist-info → acryl_datahub-0.15.0rc22.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- datahub/__init__.py,sha256=fYgu28dsndrekGv9Pq_ENw7G6Erm7qtsY5H6W3cKFDU,575
1
+ datahub/__init__.py,sha256=T0tNQ0v5Y2QyvLqZg1tU0kxvIjYvmZ8eZdrD_d8Uwe4,575
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
3
  datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
4
4
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -52,7 +52,7 @@ datahub/api/entities/forms/forms_graphql_constants.py,sha256=DKpnKlMKTjmnyrCTvp6
52
52
  datahub/api/entities/platformresource/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
53
  datahub/api/entities/platformresource/platform_resource.py,sha256=pVAjv6NoH746Mfvdak7ji0eqlEcEeV-Ji7M5gyNXmds,10603
54
54
  datahub/api/entities/structuredproperties/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
- datahub/api/entities/structuredproperties/structuredproperties.py,sha256=PcTX5gI7pg_Aq9JeIvUNZ5JYrQ2XS1uUEJZ73ORgYgA,9434
55
+ datahub/api/entities/structuredproperties/structuredproperties.py,sha256=YO4mdn6BziOzvzoFe-g2KfZlOZy8gqwMyyzj_7vF4BY,8845
56
56
  datahub/api/graphql/__init__.py,sha256=5yl0dJxO-2d_QuykdJrDIbWq4ja9bo0t2dAEh89JOog,142
57
57
  datahub/api/graphql/assertion.py,sha256=ponITypRQ8vE8kiqRNpvdoniNJzi4aeBK97UvkF0VhA,2818
58
58
  datahub/api/graphql/base.py,sha256=9q637r6v-RGOd8Mk8HW2g0vt9zpqFexsQ5R6TPEHVbs,1614
@@ -67,7 +67,7 @@ datahub/cli/docker_cli.py,sha256=QGoWFp8ZZsXOSMbgu0Q4snMmMmtP3epWAN-fYglUNEc,364
67
67
  datahub/cli/env_utils.py,sha256=RQzjg4JE29hjPt4v7p-RuqoOr99w8E3DBHWiN2Sm7T4,252
68
68
  datahub/cli/exists_cli.py,sha256=IsuU86R-g7BJjAl1vULH6d-BWJHAKa4XHLZl5WxGUEM,1233
69
69
  datahub/cli/get_cli.py,sha256=VV80BCXfZ0-C8fr2k43SIuN9DB-fOYP9StWsTHnXwFw,2327
70
- datahub/cli/ingest_cli.py,sha256=miFXBUm9xD8vRvKPwpB-3GXKV1Abf8xtPWyxV6UeenM,18983
70
+ datahub/cli/ingest_cli.py,sha256=nRoZvVpsGPXmEZCvSOBfsZ61Ep1fCqYRVp79RBnHSnI,22393
71
71
  datahub/cli/json_file.py,sha256=nWo-VVthaaW4Do1eUqgrzk0fShb29MjiKXvZVOTq76c,943
72
72
  datahub/cli/lite_cli.py,sha256=UmlMMquce6lHiPaKUBBT0XQtqR9SHEmrGlJyKV9YY60,13030
73
73
  datahub/cli/migrate.py,sha256=p42vixwKzi9OHQnIa0K2FxwGvt-1OxXeuYGJzfu5Sqo,17939
@@ -119,7 +119,7 @@ datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
119
119
  datahub/emitter/mcp_builder.py,sha256=ju-1dZMKs5dlWcTi4zcNRVmhkfhmfX3JFULZSbgxSFs,9968
120
120
  datahub/emitter/mcp_patch_builder.py,sha256=W85q1maVUMpOIo5lwLRn82rLXRVoZ_gurl_a-pvVCpE,4291
121
121
  datahub/emitter/request_helper.py,sha256=33ORG3S3OVy97_jlWBRn7yUM5XCIkRN6WSdJvN7Ofcg,670
122
- datahub/emitter/rest_emitter.py,sha256=rIWqEJjcSIM16_8DXqNqZ_h5s_nj46DTiyRKA5EQHXQ,15021
122
+ datahub/emitter/rest_emitter.py,sha256=3kG_aPKy9pLibd4SJNtdJxn792c5TJliFjjCOw6NoUM,15533
123
123
  datahub/emitter/serialization_helper.py,sha256=q12Avmf70Vy4ttQGMJoTKlE5EsybMKNg2w3MQeZiHvk,3652
124
124
  datahub/emitter/sql_parsing_builder.py,sha256=Cr5imZrm3dYDSCACt5MFscgHCtVbHTD6IjUmsvsKoEs,11991
125
125
  datahub/emitter/synchronized_file_emitter.py,sha256=s4ATuxalI4GDAkrZTaGSegxBdvvNPZ9jRSdtElU0kNs,1805
@@ -180,7 +180,7 @@ datahub/ingestion/sink/blackhole.py,sha256=-jYcWo4i8q7312bCIoHrGr7nT9JdPvA7c4jvS
180
180
  datahub/ingestion/sink/console.py,sha256=TZfhA0Ec2eNCrMH7RRy2JOdUE-U-hkoIQrPm1CmKLQs,591
181
181
  datahub/ingestion/sink/datahub_kafka.py,sha256=_cjuXu5I6G0zJ2UK7hMbaKjMPZXeIwRMgm7CVeTiNtc,2578
182
182
  datahub/ingestion/sink/datahub_lite.py,sha256=7u2aWm7ENLshKHl-PkjJg6Mrw4bWs8sTfKIBz4mm8Ak,1879
183
- datahub/ingestion/sink/datahub_rest.py,sha256=pU9z-vR-R7kGogqxkC7-9AZNctR9oUfAmfhhoD0-hwQ,12245
183
+ datahub/ingestion/sink/datahub_rest.py,sha256=ME8OygJgd7AowrokJLmdjYHxIQEy5jXWS0yKwOLR934,12592
184
184
  datahub/ingestion/sink/file.py,sha256=SxXJPJpkIGoaqRjCcSmj2ZE3xE4rLlBABBGwpTj5LWI,3271
185
185
  datahub/ingestion/sink/sink_registry.py,sha256=JRBWx8qEYg0ubSTyhqwgSWctgxwyp6fva9GoN2LwBao,490
186
186
  datahub/ingestion/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -266,9 +266,9 @@ datahub/ingestion/source/data_lake_common/path_spec.py,sha256=u3u2eMe70V5vur-j8m
266
266
  datahub/ingestion/source/datahub/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
267
267
  datahub/ingestion/source/datahub/config.py,sha256=pOXt0b1PX6D7dtD4RuKwdmr6sQKnXSf6LHxfPUMhP8s,3658
268
268
  datahub/ingestion/source/datahub/datahub_api_reader.py,sha256=hlKADVEPoTFiRGKqRsMF5mL4fSu_IrIW8Nx7LpEzvkM,2134
269
- datahub/ingestion/source/datahub/datahub_database_reader.py,sha256=TLH1KMyvRgiuENr8t0lnBjCxggONsDrxYThRzdNVEuE,8458
269
+ datahub/ingestion/source/datahub/datahub_database_reader.py,sha256=F8JrOjSrmJ2B6m1MWh83A1EYFDcGMla749HUeQWMnL0,9464
270
270
  datahub/ingestion/source/datahub/datahub_kafka_reader.py,sha256=8x9_u5kRjgSmu7c295ZIZjxP6bgoZZbWsKRicuLStRQ,4145
271
- datahub/ingestion/source/datahub/datahub_source.py,sha256=VKUtSRpwLAFatfru_pNy045HSA2z2DPzupQKIiX2uyE,8173
271
+ datahub/ingestion/source/datahub/datahub_source.py,sha256=2jDnsHEzpGhr00qQI9unSUJYD6Cb1McYFKOVbA-Zcm4,8487
272
272
  datahub/ingestion/source/datahub/report.py,sha256=VHBfCbwFRzdLdB7hQG9ST4EiZxl_vBCU0XxGcZR6Xxs,940
273
273
  datahub/ingestion/source/datahub/state.py,sha256=PZoT7sSK1wadVf5vN6phrgr7I6LL7ePP-EJjP1OO0bQ,3507
274
274
  datahub/ingestion/source/dbt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -321,7 +321,7 @@ datahub/ingestion/source/identity/azure_ad.py,sha256=GdmJFD4UMsb5353Z7phXRf-YsXR
321
321
  datahub/ingestion/source/identity/okta.py,sha256=PnRokWLG8wSoNZlXJiRZiW6APTEHO09q4n2j_l6m3V0,30756
322
322
  datahub/ingestion/source/kafka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
323
323
  datahub/ingestion/source/kafka/kafka.py,sha256=9SR7bqp9J0rPYde5IClhnAuVNy9ItsB8-ZeXtTc_mEY,26442
324
- datahub/ingestion/source/kafka/kafka_connect.py,sha256=5KUlhn3876c41Z3kx5l4oJhbu0ekXZQRdxmu52vb_v8,55167
324
+ datahub/ingestion/source/kafka/kafka_connect.py,sha256=Jm1MYky_OPIwvVHuEjgOjK0e6-jA-dYnsLZ7r-Y_9mA,56208
325
325
  datahub/ingestion/source/kafka/kafka_schema_registry_base.py,sha256=13XjSwqyVhH1CJUFHAbWdmmv_Rw0Ju_9HQdBmIzPNNA,566
326
326
  datahub/ingestion/source/looker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
327
327
  datahub/ingestion/source/looker/lkml_patched.py,sha256=XShEU7Wbz0DubDhYMjKf9wjKZrBJa2XPg9MIjp8rPhk,733
@@ -398,7 +398,7 @@ datahub/ingestion/source/s3/config.py,sha256=Zs1nrBZKLImteZreIcSMMRLj8vBGgxakNDs
398
398
  datahub/ingestion/source/s3/datalake_profiler_config.py,sha256=k7S9Xcmgr3-CvWrd5NEX-V8JSrcAwkm7vbHPTVZicow,3620
399
399
  datahub/ingestion/source/s3/profiling.py,sha256=yKNCKpr6w7qpCH-baeSkNE9VjkN6eBot_weD-2_Jxzk,17579
400
400
  datahub/ingestion/source/s3/report.py,sha256=fzkTdTewYlWrTk4f2Cyl-e8RV4qw9wEVtm0cdKD-Xgw,542
401
- datahub/ingestion/source/s3/source.py,sha256=OGc12oNWoXGVeIbSKzYlc7Qy3UeEmQ5vIOm-sG8fJxg,47396
401
+ datahub/ingestion/source/s3/source.py,sha256=8O_vu1J91h7owQlYyK27AZAQHxKsDpNC_jsLNpMed98,47336
402
402
  datahub/ingestion/source/sac/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
403
403
  datahub/ingestion/source/sac/sac.py,sha256=zPSO9ukuyhvNaaVzeAYpA-_sFma_XMcCQMPaGvDWuTk,30226
404
404
  datahub/ingestion/source/sac/sac_common.py,sha256=-xQTDBtgH56AnpRXWGDnlmQqUuLRx-7wF1U1kQFWtX8,998
@@ -427,13 +427,13 @@ datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81
427
427
  datahub/ingestion/source/snowflake/snowflake_config.py,sha256=LZqnTELtzRNf0vsKG-xXggXyt13S9RYvHOZEZHRjgNk,18851
428
428
  datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=yzv-01FdmfDSCJY5rqKNNodXxzg3SS5DF7oA4WXArOA,17793
429
429
  datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
430
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=w2CPm5XEU-KMUSIpb58aKOaxTDHfM5NvghutCVRicy4,23247
430
+ datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=EnTJoRIQKcZOIYfb_NUff_YA8IdIroaFD1JHUn-M6ok,23346
431
431
  datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
432
432
  datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=fu-8S9eADIXZcd_kHc6cBeMa-on9RF9qG3yqjJnS3DE,26085
433
- datahub/ingestion/source/snowflake/snowflake_query.py,sha256=PuqoseJbqkQEIYkmlLvPJxcVOGG7HVs4U-WWFQgQEWs,38211
433
+ datahub/ingestion/source/snowflake/snowflake_query.py,sha256=yDu_1aTAG7eLEh1w1FGmn2-c6NJZURdslnI6fC_4B_0,38723
434
434
  datahub/ingestion/source/snowflake/snowflake_report.py,sha256=_-rD7Q4MzKY8fYzJHSBnGX4gurwujL3UoRzcP_TZURs,6468
435
- datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=K31vJ19ZCIqtJkszsJWF1eppu8U23gkZYfb5jw231dc,20997
436
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=st4qoOdMGuo6fJQh-cJf_2hnczIuv6VRXGO4x3p1MgQ,39416
435
+ datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=z5ZPgh-TILAz0DeIwDxRCsj980CM2BbftXiFpM1dV_Y,21674
436
+ datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=vof3mNImstnlL8kc0OkTHzMIqnbEkt9RmnYBX1JX0oE,40386
437
437
  datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=ud3Ah4qHrmSfpD8Od-gPdzwtON9dJa0eqHt-8Yr5h2Q,6366
438
438
  datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYhWeK4XafJq3ssFL9kcS03jEeWT4,5506
439
439
  datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=fyfWmFVz2WZrpTJWNIe9m0WpDHgeFrGPf8diORJZUwo,6212
@@ -486,9 +486,11 @@ datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider
486
486
  datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py,sha256=xsH7Ao_05VTjqpkzLkhdf5B1ULMzFoD8vkJJIJU9w-U,4077
487
487
  datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
488
488
  datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
489
- datahub/ingestion/source/tableau/tableau.py,sha256=2M0d4IYn0kcMFlQ2yAvPRnXKZcj_xcqvEJik7QDnebI,136605
490
- datahub/ingestion/source/tableau/tableau_common.py,sha256=WugmFZvLgrHjvhUVBBZGRXiBJcsh2qcZK2TnWo5UQEA,26007
491
- datahub/ingestion/source/tableau/tableau_constant.py,sha256=nWElhtDo5kj5mWivZFmtVF_4Ugw0-EatBYWyDVzu5hE,2501
489
+ datahub/ingestion/source/tableau/tableau.py,sha256=P_DUuUvXk5u2ihA0JghtRkYc_KI_yQR2ZiQVe9IUvsU,138197
490
+ datahub/ingestion/source/tableau/tableau_common.py,sha256=9gQLq_3BlAsKll83uVlnWJRWaIDtFtREUyuimXF13Z0,26219
491
+ datahub/ingestion/source/tableau/tableau_constant.py,sha256=jVQMgLXND5aPL6XLETKp81BehRkvyLTU_Vhhe_1NOkI,2576
492
+ datahub/ingestion/source/tableau/tableau_server_wrapper.py,sha256=PEGfcoUcBdsnOa5EzCqy1IiuQ3OZ9fZVEMzDqhhHOto,922
493
+ datahub/ingestion/source/tableau/tableau_validation.py,sha256=l0DuXUuxJwEXMzo61xLx-KLc5u6tiz2n0e9EepJdWEM,1808
492
494
  datahub/ingestion/source/unity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
493
495
  datahub/ingestion/source/unity/analyze_profiler.py,sha256=2pqkFY30CfN4aHgFZZntjeG0hNhBytZJvXC13VfTc1I,4689
494
496
  datahub/ingestion/source/unity/config.py,sha256=m4-n7mYz4Ct4L1QdfJFklwHyj8boKCbV7Sb3Ou6AT3Q,14756
@@ -559,12 +561,12 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
559
561
  datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
560
562
  datahub/lite/lite_util.py,sha256=pgBpT3vTO1YCQ2njZRNyicSkHYeEmQCt41BaXU8WvMo,4503
561
563
  datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
562
- datahub/metadata/_schema_classes.py,sha256=iPeBXGvbNEm0vw5pYwunnvx7bTtBdmIQVtzMOlS6bSI,955042
563
- datahub/metadata/schema.avsc,sha256=Xx93OdPzQfBb2CtntIYE-HAeKNg-JZcCtRU95v7ZZCs,677728
564
+ datahub/metadata/_schema_classes.py,sha256=FTLom36n7gr6zxYfPWWoy9AmdnB4KOIXYRoVZbS9kog,955042
565
+ datahub/metadata/schema.avsc,sha256=D-rNu2SC2tyvqju8pQwGNGGT9zy1_fzxzoigH5YmUvo,722242
564
566
  datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
565
567
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
566
568
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
567
- datahub/metadata/_urns/urn_defs.py,sha256=WBHf7Ze2qBvR-uWpcdMqEy-T2AIBzf8ioS-wJMMXXOo,107119
569
+ datahub/metadata/_urns/urn_defs.py,sha256=LFHZGzHlDA0KJes1Xg7-lWetXusi7bubA7Q5hu4ER88,107119
568
570
  datahub/metadata/com/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
569
571
  datahub/metadata/com/linkedin/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
570
572
  datahub/metadata/com/linkedin/events/__init__.py,sha256=s_dR0plZF-rOxxIbE8ojekJqwiHzl2WYR-Z3kW6kKS0,298
@@ -882,7 +884,7 @@ datahub/testing/__init__.py,sha256=TywIuzGQvzJsNhI_PGD1RFk11M3RtGl9jIMtAVVHIkg,2
882
884
  datahub/testing/check_imports.py,sha256=EKuJmgUA46uOrlaOy0fCvPB7j9POkpJ0ExhO_pT3YAk,1356
883
885
  datahub/testing/check_sql_parser_result.py,sha256=f7U7IUSbfV4VACdNI857wPZ9tAZ9j6mXiXmcJNT_RzM,2671
884
886
  datahub/testing/check_str_enum.py,sha256=yqk0XXHOGteN-IGqCp5JHy0Kca13BnI09ZqKc4Nwl3E,1187
885
- datahub/testing/compare_metadata_json.py,sha256=EzIPHtRL00a1PSdaA82LU0oRo85GqjF7_jjWG_NwfW8,5274
887
+ datahub/testing/compare_metadata_json.py,sha256=pVJB2qLoKzEJLBXqFT-qGrxpA1y76y-mIbvJf0NnAD0,5274
886
888
  datahub/testing/docker_utils.py,sha256=g169iy_jNR_mg0p8X31cChZqjOryutAIHUYLq3xqueY,2415
887
889
  datahub/testing/doctest.py,sha256=1_8WEhHZ2eRQtw8vsXKzr9L5zzvs0Tcr6q4mnkyyvtw,295
888
890
  datahub/testing/mcp_diff.py,sha256=_sBFhmclYXJGQ_JYDrvKWXNGXt9ACvqeQvFaZrRHa8Q,10729
@@ -900,7 +902,7 @@ datahub/utilities/dedup_list.py,sha256=dUSpe1AajfuwlHVJKNv-CzDXSCkaw0HgSMOsxqUkQ
900
902
  datahub/utilities/delayed_iter.py,sha256=XlsI0DCXkVVejFKOW_uMT0E8DTqqOHQN3Ooak4EcULE,645
901
903
  datahub/utilities/delta.py,sha256=hkpF8W7Lvg2gUJBQR3mmIzOxsRQ6i5cchRPFlAVoV10,1128
902
904
  datahub/utilities/docs_build.py,sha256=uFMK3z1d4BExpsrvguHunidbEDAzQ8hoOP7iQ0A_IVw,211
903
- datahub/utilities/file_backed_collections.py,sha256=tS6hN0kxMaaTb8rB-vDqAd973mTKshERSC55JIe_3Cw,20557
905
+ datahub/utilities/file_backed_collections.py,sha256=I2GxSYtVzfo38pQpv2FyoBeWISiKD4zUi0t34jPCNrU,21957
904
906
  datahub/utilities/global_warning_util.py,sha256=adrEl3WhetQ-bymrPINjd976ZFndhbvk3QosUYGsos8,261
905
907
  datahub/utilities/hive_schema_to_avro.py,sha256=2-7NI9haCAYbyUmHTb-QPxjn4WbmnDDKAIGmHJ11-1E,11622
906
908
  datahub/utilities/is_pytest.py,sha256=2m9T4S9IIKhI5RfTqrB2ZmumzHocdxBHpM1HroWj2XQ,138
@@ -974,8 +976,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
974
976
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
975
977
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
976
978
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
977
- acryl_datahub-0.15.0rc20.dist-info/METADATA,sha256=KuTZA5lnEW-UAvSPqqkBsDFKkwlJF8WzYbcphVMW_aE,173559
978
- acryl_datahub-0.15.0rc20.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
979
- acryl_datahub-0.15.0rc20.dist-info/entry_points.txt,sha256=Yj0PWB0LQOq4Rj2fyR6ETx4BUGw4TOcNL0ZNoAZ9kQg,9504
980
- acryl_datahub-0.15.0rc20.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
981
- acryl_datahub-0.15.0rc20.dist-info/RECORD,,
979
+ acryl_datahub-0.15.0rc22.dist-info/METADATA,sha256=48jbXm5fKitlO7rhjtNA1FcJT9Y7ypQ25EtatHbSeqY,173559
980
+ acryl_datahub-0.15.0rc22.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
981
+ acryl_datahub-0.15.0rc22.dist-info/entry_points.txt,sha256=Yj0PWB0LQOq4Rj2fyR6ETx4BUGw4TOcNL0ZNoAZ9kQg,9504
982
+ acryl_datahub-0.15.0rc22.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
983
+ acryl_datahub-0.15.0rc22.dist-info/RECORD,,
datahub/__init__.py CHANGED
@@ -3,7 +3,7 @@ import warnings
3
3
 
4
4
  # Published at https://pypi.org/project/acryl-datahub/.
5
5
  __package_name__ = "acryl-datahub"
6
- __version__ = "0.15.0rc20"
6
+ __version__ = "0.15.0rc22"
7
7
 
8
8
 
9
9
  def is_dev_mode() -> bool:
@@ -1,8 +1,7 @@
1
1
  import logging
2
- from contextlib import contextmanager
3
2
  from enum import Enum
4
3
  from pathlib import Path
5
- from typing import Generator, List, Optional
4
+ from typing import List, Optional
6
5
 
7
6
  import yaml
8
7
  from pydantic import validator
@@ -10,6 +9,7 @@ from ruamel.yaml import YAML
10
9
 
11
10
  from datahub.configuration.common import ConfigModel
12
11
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
12
+ from datahub.ingestion.api.global_context import get_graph_context, set_graph_context
13
13
  from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
14
14
  from datahub.metadata.schema_classes import (
15
15
  PropertyValueClass,
@@ -24,23 +24,10 @@ logger = logging.getLogger(__name__)
24
24
  class StructuredPropertiesConfig:
25
25
  """Configuration class to hold the graph client"""
26
26
 
27
- _graph: Optional[DataHubGraph] = None
28
-
29
- @classmethod
30
- @contextmanager
31
- def use_graph(cls, graph: DataHubGraph) -> Generator[None, None, None]:
32
- """Context manager to temporarily set a custom graph"""
33
- previous_graph = cls._graph
34
- cls._graph = graph
35
- try:
36
- yield
37
- finally:
38
- cls._graph = previous_graph
39
-
40
27
  @classmethod
41
- def get_graph(cls) -> DataHubGraph:
28
+ def get_graph_required(cls) -> DataHubGraph:
42
29
  """Get the current graph, falling back to default if none set"""
43
- return cls._graph if cls._graph is not None else get_default_graph()
30
+ return get_graph_context() or get_default_graph()
44
31
 
45
32
 
46
33
  class AllowedTypes(Enum):
@@ -79,7 +66,7 @@ class TypeQualifierAllowedTypes(ConfigModel):
79
66
  @validator("allowed_types", each_item=True)
80
67
  def validate_allowed_types(cls, v):
81
68
  if v:
82
- graph = StructuredPropertiesConfig.get_graph()
69
+ graph = StructuredPropertiesConfig.get_graph_required()
83
70
  validated_urn = Urn.make_entity_type_urn(v)
84
71
  if not graph.exists(validated_urn):
85
72
  raise ValueError(
@@ -106,7 +93,7 @@ class StructuredProperties(ConfigModel):
106
93
  @validator("entity_types", each_item=True)
107
94
  def validate_entity_types(cls, v):
108
95
  if v:
109
- graph = StructuredPropertiesConfig.get_graph()
96
+ graph = StructuredPropertiesConfig.get_graph_required()
110
97
  validated_urn = Urn.make_entity_type_urn(v)
111
98
  if not graph.exists(validated_urn):
112
99
  raise ValueError(
@@ -136,63 +123,64 @@ class StructuredProperties(ConfigModel):
136
123
 
137
124
  @staticmethod
138
125
  def create(file: str, graph: Optional[DataHubGraph] = None) -> None:
139
- emitter: DataHubGraph = graph if graph else get_default_graph()
140
- with StructuredPropertiesConfig.use_graph(emitter):
141
- print("Using graph")
126
+ with set_graph_context(graph):
127
+ graph = StructuredPropertiesConfig.get_graph_required()
128
+
142
129
  with open(file) as fp:
143
130
  structuredproperties: List[dict] = yaml.safe_load(fp)
144
- for structuredproperty_raw in structuredproperties:
145
- structuredproperty = StructuredProperties.parse_obj(
146
- structuredproperty_raw
131
+ for structuredproperty_raw in structuredproperties:
132
+ structuredproperty = StructuredProperties.parse_obj(
133
+ structuredproperty_raw
134
+ )
135
+
136
+ if not structuredproperty.type.islower():
137
+ structuredproperty.type = structuredproperty.type.lower()
138
+ logger.warning(
139
+ f"Structured property type should be lowercase. Updated to {structuredproperty.type}"
147
140
  )
148
- if not structuredproperty.type.islower():
149
- structuredproperty.type = structuredproperty.type.lower()
150
- logger.warn(
151
- f"Structured property type should be lowercase. Updated to {structuredproperty.type}"
152
- )
153
- if not AllowedTypes.check_allowed_type(structuredproperty.type):
154
- raise ValueError(
155
- f"Type {structuredproperty.type} is not allowed. Allowed types are {AllowedTypes.values()}"
156
- )
157
- mcp = MetadataChangeProposalWrapper(
158
- entityUrn=structuredproperty.urn,
159
- aspect=StructuredPropertyDefinitionClass(
160
- qualifiedName=structuredproperty.fqn,
161
- valueType=Urn.make_data_type_urn(structuredproperty.type),
162
- displayName=structuredproperty.display_name,
163
- description=structuredproperty.description,
164
- entityTypes=[
165
- Urn.make_entity_type_urn(entity_type)
166
- for entity_type in structuredproperty.entity_types or []
167
- ],
168
- cardinality=structuredproperty.cardinality,
169
- immutable=structuredproperty.immutable,
170
- allowedValues=(
171
- [
172
- PropertyValueClass(
173
- value=v.value, description=v.description
174
- )
175
- for v in structuredproperty.allowed_values
176
- ]
177
- if structuredproperty.allowed_values
178
- else None
179
- ),
180
- typeQualifier=(
181
- {
182
- "allowedTypes": structuredproperty.type_qualifier.allowed_types
183
- }
184
- if structuredproperty.type_qualifier
185
- else None
186
- ),
187
- ),
141
+ if not AllowedTypes.check_allowed_type(structuredproperty.type):
142
+ raise ValueError(
143
+ f"Type {structuredproperty.type} is not allowed. Allowed types are {AllowedTypes.values()}"
188
144
  )
189
- emitter.emit_mcp(mcp)
145
+ mcp = MetadataChangeProposalWrapper(
146
+ entityUrn=structuredproperty.urn,
147
+ aspect=StructuredPropertyDefinitionClass(
148
+ qualifiedName=structuredproperty.fqn,
149
+ valueType=Urn.make_data_type_urn(structuredproperty.type),
150
+ displayName=structuredproperty.display_name,
151
+ description=structuredproperty.description,
152
+ entityTypes=[
153
+ Urn.make_entity_type_urn(entity_type)
154
+ for entity_type in structuredproperty.entity_types or []
155
+ ],
156
+ cardinality=structuredproperty.cardinality,
157
+ immutable=structuredproperty.immutable,
158
+ allowedValues=(
159
+ [
160
+ PropertyValueClass(
161
+ value=v.value, description=v.description
162
+ )
163
+ for v in structuredproperty.allowed_values
164
+ ]
165
+ if structuredproperty.allowed_values
166
+ else None
167
+ ),
168
+ typeQualifier=(
169
+ {
170
+ "allowedTypes": structuredproperty.type_qualifier.allowed_types
171
+ }
172
+ if structuredproperty.type_qualifier
173
+ else None
174
+ ),
175
+ ),
176
+ )
177
+ graph.emit_mcp(mcp)
190
178
 
191
- logger.info(f"Created structured property {structuredproperty.urn}")
179
+ logger.info(f"Created structured property {structuredproperty.urn}")
192
180
 
193
181
  @classmethod
194
182
  def from_datahub(cls, graph: DataHubGraph, urn: str) -> "StructuredProperties":
195
- with StructuredPropertiesConfig.use_graph(graph):
183
+ with set_graph_context(graph):
196
184
  structured_property: Optional[
197
185
  StructuredPropertyDefinitionClass
198
186
  ] = graph.get_aspect(urn, StructuredPropertyDefinitionClass)
datahub/cli/ingest_cli.py CHANGED
@@ -27,6 +27,7 @@ from datahub.utilities.perf_timer import PerfTimer
27
27
 
28
28
  logger = logging.getLogger(__name__)
29
29
 
30
+ INGEST_SRC_TABLE_COLUMNS = ["runId", "source", "startTime", "status", "URN"]
30
31
  RUNS_TABLE_COLUMNS = ["runId", "rows", "created at"]
31
32
  RUN_TABLE_COLUMNS = ["urn", "aspect name", "created at"]
32
33
 
@@ -437,6 +438,115 @@ def mcps(path: str) -> None:
437
438
  sys.exit(ret)
438
439
 
439
440
 
441
+ @ingest.command()
442
+ @click.argument("page_offset", type=int, default=0)
443
+ @click.argument("page_size", type=int, default=100)
444
+ @click.option("--urn", type=str, default=None, help="Filter by ingestion source URN.")
445
+ @click.option(
446
+ "--source", type=str, default=None, help="Filter by ingestion source name."
447
+ )
448
+ @upgrade.check_upgrade
449
+ @telemetry.with_telemetry()
450
+ def list_source_runs(page_offset: int, page_size: int, urn: str, source: str) -> None:
451
+ """List ingestion source runs with their details, optionally filtered by URN or source."""
452
+
453
+ query = """
454
+ query listIngestionRuns($input: ListIngestionSourcesInput!) {
455
+ listIngestionSources(input: $input) {
456
+ ingestionSources {
457
+ urn
458
+ name
459
+ executions {
460
+ executionRequests {
461
+ id
462
+ result {
463
+ startTimeMs
464
+ status
465
+ }
466
+ }
467
+ }
468
+ }
469
+ }
470
+ }
471
+ """
472
+
473
+ # filter by urn and/or source using CONTAINS
474
+ filters = []
475
+ if urn:
476
+ filters.append({"field": "urn", "values": [urn], "condition": "CONTAIN"})
477
+ if source:
478
+ filters.append({"field": "name", "values": [source], "condition": "CONTAIN"})
479
+
480
+ variables = {
481
+ "input": {
482
+ "start": page_offset,
483
+ "count": page_size,
484
+ "filters": filters,
485
+ }
486
+ }
487
+
488
+ client = get_default_graph()
489
+ session = client._session
490
+ gms_host = client.config.server
491
+
492
+ url = f"{gms_host}/api/graphql"
493
+ try:
494
+ response = session.post(url, json={"query": query, "variables": variables})
495
+ response.raise_for_status()
496
+ except Exception as e:
497
+ click.echo(f"Error fetching data: {str(e)}")
498
+ return
499
+
500
+ try:
501
+ data = response.json()
502
+ except ValueError:
503
+ click.echo("Failed to parse JSON response from server.")
504
+ return
505
+
506
+ if not data:
507
+ click.echo("No response received from the server.")
508
+ return
509
+
510
+ # when urn or source filter does not match, exit gracefully
511
+ if (
512
+ not isinstance(data.get("data"), dict)
513
+ or "listIngestionSources" not in data["data"]
514
+ ):
515
+ click.echo("No matching ingestion sources found. Please check your filters.")
516
+ return
517
+
518
+ ingestion_sources = data["data"]["listIngestionSources"]["ingestionSources"]
519
+ if not ingestion_sources:
520
+ click.echo("No ingestion sources or executions found.")
521
+ return
522
+
523
+ rows = []
524
+ for ingestion_source in ingestion_sources:
525
+ urn = ingestion_source.get("urn", "N/A")
526
+ name = ingestion_source.get("name", "N/A")
527
+
528
+ executions = ingestion_source.get("executions", {}).get("executionRequests", [])
529
+ for execution in executions:
530
+ execution_id = execution.get("id", "N/A")
531
+ start_time = execution.get("result", {}).get("startTimeMs", "N/A")
532
+ start_time = (
533
+ datetime.fromtimestamp(start_time / 1000).strftime("%Y-%m-%d %H:%M:%S")
534
+ if start_time != "N/A"
535
+ else "N/A"
536
+ )
537
+ status = execution.get("result", {}).get("status", "N/A")
538
+
539
+ rows.append([execution_id, name, start_time, status, urn])
540
+
541
+ click.echo(
542
+ tabulate(
543
+ rows,
544
+ headers=INGEST_SRC_TABLE_COLUMNS,
545
+ tablefmt="grid",
546
+ )
547
+ )
548
+
549
+
440
550
  @ingest.command()
441
551
  @click.argument("page_offset", type=int, default=0)
442
552
  @click.argument("page_size", type=int, default=100)
@@ -46,8 +46,18 @@ _DEFAULT_RETRY_MAX_TIMES = int(
46
46
  os.getenv("DATAHUB_REST_EMITTER_DEFAULT_RETRY_MAX_TIMES", "4")
47
47
  )
48
48
 
49
- # The limit is 16mb. We will use a max of 15mb to have some space for overhead.
50
- _MAX_BATCH_INGEST_PAYLOAD_SIZE = 15 * 1024 * 1024
49
+ # The limit is 16mb. We will use a max of 15mb to have some space
50
+ # for overhead like request headers.
51
+ # This applies to pretty much all calls to GMS.
52
+ INGEST_MAX_PAYLOAD_BYTES = 15 * 1024 * 1024
53
+
54
+ # This limit is somewhat arbitrary. All GMS endpoints will timeout
55
+ # and return a 500 if processing takes too long. To avoid sending
56
+ # too much to the backend and hitting a timeout, we try to limit
57
+ # the number of MCPs we send in a batch.
58
+ BATCH_INGEST_MAX_PAYLOAD_LENGTH = int(
59
+ os.getenv("DATAHUB_REST_EMITTER_BATCH_MAX_PAYLOAD_LENGTH", 200)
60
+ )
51
61
 
52
62
 
53
63
  class DataHubRestEmitter(Closeable, Emitter):
@@ -290,11 +300,14 @@ class DataHubRestEmitter(Closeable, Emitter):
290
300
  # As a safety mechanism, we need to make sure we don't exceed the max payload size for GMS.
291
301
  # If we will exceed the limit, we need to break it up into chunks.
292
302
  mcp_obj_chunks: List[List[str]] = []
293
- current_chunk_size = _MAX_BATCH_INGEST_PAYLOAD_SIZE
303
+ current_chunk_size = INGEST_MAX_PAYLOAD_BYTES
294
304
  for mcp_obj in mcp_objs:
295
305
  mcp_obj_size = len(json.dumps(mcp_obj))
296
306
 
297
- if mcp_obj_size + current_chunk_size > _MAX_BATCH_INGEST_PAYLOAD_SIZE:
307
+ if (
308
+ mcp_obj_size + current_chunk_size > INGEST_MAX_PAYLOAD_BYTES
309
+ or len(mcp_obj_chunks[-1]) >= BATCH_INGEST_MAX_PAYLOAD_LENGTH
310
+ ):
298
311
  mcp_obj_chunks.append([])
299
312
  current_chunk_size = 0
300
313
  mcp_obj_chunks[-1].append(mcp_obj)
@@ -18,7 +18,10 @@ from datahub.configuration.common import (
18
18
  )
19
19
  from datahub.emitter.mcp import MetadataChangeProposalWrapper
20
20
  from datahub.emitter.mcp_builder import mcps_from_mce
21
- from datahub.emitter.rest_emitter import DataHubRestEmitter
21
+ from datahub.emitter.rest_emitter import (
22
+ BATCH_INGEST_MAX_PAYLOAD_LENGTH,
23
+ DataHubRestEmitter,
24
+ )
22
25
  from datahub.ingestion.api.common import RecordEnvelope, WorkUnit
23
26
  from datahub.ingestion.api.sink import (
24
27
  NoopWriteCallback,
@@ -71,6 +74,14 @@ class DatahubRestSinkConfig(DatahubClientConfig):
71
74
  # Only applies in async batch mode.
72
75
  max_per_batch: pydantic.PositiveInt = 100
73
76
 
77
+ @pydantic.validator("max_per_batch", always=True)
78
+ def validate_max_per_batch(cls, v):
79
+ if v > BATCH_INGEST_MAX_PAYLOAD_LENGTH:
80
+ raise ValueError(
81
+ f"max_per_batch must be less than or equal to {BATCH_INGEST_MAX_PAYLOAD_LENGTH}"
82
+ )
83
+ return v
84
+
74
85
 
75
86
  @dataclasses.dataclass
76
87
  class DataHubRestSinkReport(SinkReport):
@@ -147,6 +147,47 @@ class DataHubDatabaseReader:
147
147
  version
148
148
  """
149
149
 
150
+ def execute_server_cursor(
151
+ self, query: str, params: Dict[str, Any]
152
+ ) -> Iterable[Dict[str, Any]]:
153
+ with self.engine.connect() as conn:
154
+ if self.engine.dialect.name == "postgresql":
155
+ with conn.begin(): # Transaction required for PostgreSQL server-side cursor
156
+ conn = conn.execution_options(
157
+ stream_results=True,
158
+ yield_per=self.config.database_query_batch_size,
159
+ )
160
+ result = conn.execute(query, params)
161
+ for row in result:
162
+ yield dict(row)
163
+ elif self.engine.dialect.name == "mysql": # MySQL
164
+ import MySQLdb
165
+
166
+ with contextlib.closing(
167
+ conn.connection.cursor(MySQLdb.cursors.SSCursor)
168
+ ) as cursor:
169
+ logger.debug(f"Using Cursor type: {cursor.__class__.__name__}")
170
+ cursor.execute(query, params)
171
+
172
+ columns = [desc[0] for desc in cursor.description]
173
+ while True:
174
+ rows = cursor.fetchmany(self.config.database_query_batch_size)
175
+ if not rows:
176
+ break # Use break instead of return in generator
177
+ for row in rows:
178
+ yield dict(zip(columns, row))
179
+ else:
180
+ raise ValueError(f"Unsupported dialect: {self.engine.dialect.name}")
181
+
182
+ def _get_rows(
183
+ self, from_createdon: datetime, stop_time: datetime
184
+ ) -> Iterable[Dict[str, Any]]:
185
+ params = {
186
+ "exclude_aspects": list(self.config.exclude_aspects),
187
+ "since_createdon": from_createdon.strftime(DATETIME_FORMAT),
188
+ }
189
+ yield from self.execute_server_cursor(self.query, params)
190
+
150
191
  def get_aspects(
151
192
  self, from_createdon: datetime, stop_time: datetime
152
193
  ) -> Iterable[Tuple[MetadataChangeProposalWrapper, datetime]]:
@@ -159,27 +200,6 @@ class DataHubDatabaseReader:
159
200
  if mcp:
160
201
  yield mcp, row["createdon"]
161
202
 
162
- def _get_rows(
163
- self, from_createdon: datetime, stop_time: datetime
164
- ) -> Iterable[Dict[str, Any]]:
165
- with self.engine.connect() as conn:
166
- with contextlib.closing(conn.connection.cursor()) as cursor:
167
- cursor.execute(
168
- self.query,
169
- {
170
- "exclude_aspects": list(self.config.exclude_aspects),
171
- "since_createdon": from_createdon.strftime(DATETIME_FORMAT),
172
- },
173
- )
174
-
175
- columns = [desc[0] for desc in cursor.description]
176
- while True:
177
- rows = cursor.fetchmany(self.config.database_query_batch_size)
178
- if not rows:
179
- return
180
- for row in rows:
181
- yield dict(zip(columns, row))
182
-
183
203
  def get_soft_deleted_rows(self) -> Iterable[Dict[str, Any]]:
184
204
  """
185
205
  Fetches all soft-deleted entities from the database.
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from datetime import datetime, timezone
2
+ from datetime import datetime, timedelta, timezone
3
3
  from functools import partial
4
4
  from typing import Dict, Iterable, List, Optional
5
5
 
@@ -26,6 +26,7 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
26
26
  StatefulIngestionSourceBase,
27
27
  )
28
28
  from datahub.metadata.schema_classes import ChangeTypeClass
29
+ from datahub.utilities.progress_timer import ProgressTimer
29
30
 
30
31
  logger = logging.getLogger(__name__)
31
32
 
@@ -105,11 +106,17 @@ class DataHubSource(StatefulIngestionSourceBase):
105
106
  self, from_createdon: datetime, reader: DataHubDatabaseReader
106
107
  ) -> Iterable[MetadataWorkUnit]:
107
108
  logger.info(f"Fetching database aspects starting from {from_createdon}")
109
+ progress = ProgressTimer(report_every=timedelta(seconds=60))
108
110
  mcps = reader.get_aspects(from_createdon, self.report.stop_time)
109
111
  for i, (mcp, createdon) in enumerate(mcps):
110
112
  if not self.urn_pattern.allowed(str(mcp.entityUrn)):
111
113
  continue
112
114
 
115
+ if progress.should_report():
116
+ logger.info(
117
+ f"Ingested {i} database aspects so far, currently at {createdon}"
118
+ )
119
+
113
120
  yield mcp.as_workunit()
114
121
  self.report.num_database_aspects_ingested += 1
115
122