acryl-datahub 1.0.0.2rc4__py3-none-any.whl → 1.0.0.3rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (30) hide show
  1. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3rc1.dist-info}/METADATA +2433 -2433
  2. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3rc1.dist-info}/RECORD +30 -28
  3. datahub/_version.py +1 -1
  4. datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py +2 -1
  5. datahub/ingestion/source/hex/api.py +1 -20
  6. datahub/ingestion/source/hex/query_fetcher.py +4 -1
  7. datahub/ingestion/source/sigma/config.py +75 -6
  8. datahub/ingestion/source/sigma/sigma.py +16 -1
  9. datahub/ingestion/source/sigma/sigma_api.py +99 -58
  10. datahub/ingestion/source/snowflake/snowflake_queries.py +18 -4
  11. datahub/ingestion/source/snowflake/snowflake_query.py +1 -1
  12. datahub/ingestion/source/sql/stored_procedures/base.py +12 -1
  13. datahub/metadata/_schema_classes.py +47 -2
  14. datahub/metadata/_urns/urn_defs.py +56 -0
  15. datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py +2 -0
  16. datahub/metadata/schema.avsc +121 -85
  17. datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc +22 -0
  18. datahub/metadata/schemas/DataTransformLogic.avsc +4 -2
  19. datahub/metadata/schemas/FormInfo.avsc +5 -0
  20. datahub/metadata/schemas/MLModelDeploymentProperties.avsc +3 -0
  21. datahub/metadata/schemas/MetadataChangeEvent.avsc +6 -0
  22. datahub/metadata/schemas/MetadataChangeLog.avsc +3 -0
  23. datahub/metadata/schemas/MetadataChangeProposal.avsc +3 -0
  24. datahub/metadata/schemas/QueryProperties.avsc +4 -2
  25. datahub/metadata/schemas/SystemMetadata.avsc +86 -0
  26. datahub/sql_parsing/sqlglot_utils.py +16 -8
  27. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3rc1.dist-info}/WHEEL +0 -0
  28. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3rc1.dist-info}/entry_points.txt +0 -0
  29. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3rc1.dist-info}/licenses/LICENSE +0 -0
  30. {acryl_datahub-1.0.0.2rc4.dist-info → acryl_datahub-1.0.0.3rc1.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
- acryl_datahub-1.0.0.2rc4.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.0.0.3rc1.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=tio-Iic7Okp5tRZa3VWguBhJfeAqAu3sfuYo-W1E7Wc,323
4
+ datahub/_version.py,sha256=R-5q2sde87sdyofKBpzMGjN_yrh8SbPAoOTVYlH3CuU,323
5
5
  datahub/entrypoints.py,sha256=2TYgHhs3sCxJlojIHjqfxzt3_ImPwPzq4vBtsUuMqu4,8885
6
6
  datahub/errors.py,sha256=BzKdcmYseHOt36zfjJXc17WNutFhp9Y23cU_L6cIkxc,612
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -151,7 +151,7 @@ datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188m
151
151
  datahub/ingestion/api/workunit.py,sha256=e8n8RfSjHZZm2R4ShNH0UuMtUkMjyqqM2j2t7oL74lo,6327
152
152
  datahub/ingestion/api/auto_work_units/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
153
153
  datahub/ingestion/api/auto_work_units/auto_dataset_properties_aspect.py,sha256=ID_6N3nWl2qohsSGizUCqo3d2MNyDeVbyWroQpSOSsc,5059
154
- datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py,sha256=5jrl7cEyonce-YdWe1Iw6y3Okw5smJosqwOm5e-nvqM,4363
154
+ datahub/ingestion/api/auto_work_units/auto_ensure_aspect_size.py,sha256=fMjPnyWEofIZV52E2AFYU3IgBJwyZvbygXxCJyEtcWI,4442
155
155
  datahub/ingestion/extractor/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
156
156
  datahub/ingestion/extractor/extractor_registry.py,sha256=f7CLfW3pr29QZkXSHbp7HjUrsdw7ejQJmot-tiSPcqc,342
157
157
  datahub/ingestion/extractor/json_ref_patch.py,sha256=4g3ZWHn7rwS74jUvSXJiGpi-UKHhiSYKKgBeU4E5ukE,1448
@@ -327,12 +327,12 @@ datahub/ingestion/source/git/git_import.py,sha256=5CT6vMDb0MDctCtShnxb3JVihULtvk
327
327
  datahub/ingestion/source/grafana/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
328
328
  datahub/ingestion/source/grafana/grafana_source.py,sha256=3pU3xodPgS5lmnjuQ_u7F0XPzD_Y8MnPlMxRJ86qz4g,4960
329
329
  datahub/ingestion/source/hex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
330
- datahub/ingestion/source/hex/api.py,sha256=JfFPD8O4z16fwZE_BdX5aCQztEq-tbzxJJ7aofH4DE4,12274
330
+ datahub/ingestion/source/hex/api.py,sha256=OVQNI_11NJJcNCT6OzSDEtVjNcom0vmes_KkjgzWCcI,11806
331
331
  datahub/ingestion/source/hex/constants.py,sha256=8hUTMWyG5keTNfXoLu_Dh413Hw_mGGJX1atiiDZyKtg,271
332
332
  datahub/ingestion/source/hex/hex.py,sha256=PIRl8fPkKtlHV7cqR4H8RKVYdTLgEFXHFzc3QAqJLhE,12733
333
333
  datahub/ingestion/source/hex/mapper.py,sha256=N3mTlEcrOmhv9ia1dnHGFgFJD2ddyTtU3H5IUbb-UxU,13344
334
334
  datahub/ingestion/source/hex/model.py,sha256=S9bUhfFcjzuio2dBS6HzSyRVPiSJvRvMQ0qyVrjV5-E,1766
335
- datahub/ingestion/source/hex/query_fetcher.py,sha256=5r065vL7XohcgZ_fj-1h6o8cxrPin37IeYsC99GU6LA,12287
335
+ datahub/ingestion/source/hex/query_fetcher.py,sha256=ZaRrja05mK0VlIvpsFi-8-EBoJr0GSLbUxBUjycibIU,12505
336
336
  datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
337
337
  datahub/ingestion/source/iceberg/iceberg.py,sha256=s69XzCGD5oV_hqTyvzCt5eLKZVEzVIJo_DiAEDk3p6A,34759
338
338
  datahub/ingestion/source/iceberg/iceberg_common.py,sha256=VGosqYPmn_j6GETSnDHZ8Ay1BVOedmx2x5LHxw16I3A,12278
@@ -439,10 +439,10 @@ datahub/ingestion/source/schema_inference/json.py,sha256=p5S-3idn65V2uad5T8txs1U
439
439
  datahub/ingestion/source/schema_inference/object.py,sha256=dhSOtxVJHbTDY0hWeHwdLYHnOsW07Omk7Y4DPeztie0,5847
440
440
  datahub/ingestion/source/schema_inference/parquet.py,sha256=CdqsNuiabLLCulWbuPMssijeFmKLv3M5MKFIhlatpWA,3456
441
441
  datahub/ingestion/source/sigma/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
442
- datahub/ingestion/source/sigma/config.py,sha256=yfdKQYvI5hKVl8gNAKIcJe-VW3klvdDqYbUP76gJQDI,3812
442
+ datahub/ingestion/source/sigma/config.py,sha256=xpZXt4f05-sroWFv9SbzVhU1-iBeVfU1ocJKb-fy3aM,6333
443
443
  datahub/ingestion/source/sigma/data_classes.py,sha256=YZkkzwftV34mq5c_4jlC2PCSiRKt4hvHjmqikLQhl1I,2012
444
- datahub/ingestion/source/sigma/sigma.py,sha256=ucODIa5KUGr3WSoo7VgCt8uFaKRbSDlwsdVMAcjPLpQ,24378
445
- datahub/ingestion/source/sigma/sigma_api.py,sha256=SVvbUs2vjueUdDa-3FzeMsaX5pNpApVI192P7EZzPcI,17870
444
+ datahub/ingestion/source/sigma/sigma.py,sha256=ZtPj8eu6hcJxyFcWizob4kRaxrpcqsWzh__lmuVZdt8,25212
445
+ datahub/ingestion/source/sigma/sigma_api.py,sha256=7PK5AQa838hYeaQ5L0dioi4n4bLrpN-r7COKTTNUYw8,19837
446
446
  datahub/ingestion/source/slack/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
447
447
  datahub/ingestion/source/slack/slack.py,sha256=3N7Yp-u9DvBmo536Z6-pQTrJgSJ3i742GePSgjlBOUU,27616
448
448
  datahub/ingestion/source/snowflake/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -455,8 +455,8 @@ datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=pEw2O9xoTSIWDi
455
455
  datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
456
456
  datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=FBmiONx4EGHWV8RNJT6zHZyntKinPFFyd2oKbTUIbhE,21319
457
457
  datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
458
- datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=gX9E1Z_CemAZsuTDmtvqrxY7vBL2da75j7X8Xwhaf8Y,28441
459
- datahub/ingestion/source/snowflake/snowflake_query.py,sha256=0AMPQ_L7sgQtBizBNEe69-BUM8_wk1m8ystWivwKEMI,40409
458
+ datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=PY4Wy6i89nqRl92ARwXNqWwm-ifagkKbKKtxYWeswkk,29209
459
+ datahub/ingestion/source/snowflake/snowflake_query.py,sha256=JtTrfzGqM9mk2Fr-F1X0KXzc_8ot7rD3dD2vPEuzd0E,40411
460
460
  datahub/ingestion/source/snowflake/snowflake_report.py,sha256=O-465aBA8uaYZ6WepP7i6cgK6Q1jXJPjDA1j9C8klus,6762
461
461
  datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=1yGBbs2aWIdHnrwgeTR7J2lqxbbBsIt8ejCLumIpLEA,27274
462
462
  datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=_37-AQyI4uGt4fu-d3v2eAWzQ3uG835ZQxMjFwGYCng,57193
@@ -496,7 +496,7 @@ datahub/ingestion/source/sql/mssql/__init__.py,sha256=1agpl8S_uDW40olkhCX_W19dbr
496
496
  datahub/ingestion/source/sql/mssql/job_models.py,sha256=nAo3rciu-w2-dXCz6_ekDEbGMEjCMEfh8WvSfXoF2l0,9359
497
497
  datahub/ingestion/source/sql/mssql/source.py,sha256=Er0uTAsEOzLg2KVddf22pOdzEAwkOjNdqYCPxeCiHdA,32755
498
498
  datahub/ingestion/source/sql/stored_procedures/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
499
- datahub/ingestion/source/sql/stored_procedures/base.py,sha256=X4RXYknK8ZPsMgXQFzo2_CsMcXYhDWLm-fLFmZNkZfc,7888
499
+ datahub/ingestion/source/sql/stored_procedures/base.py,sha256=n0l5OaTuW-m3TRvkxs3TqvgMeWF6BagzW3tjyWUcC1A,8631
500
500
  datahub/ingestion/source/sql/stored_procedures/lineage.py,sha256=9kBoq4JLa4-I3TMAYwChXp76Kag8YBUEIAkaQMplIkY,1844
501
501
  datahub/ingestion/source/state/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
502
502
  datahub/ingestion/source/state/checkpoint.py,sha256=-fTUZKkY4nHTFqSWZ0jJkkdIu_tWlOjRNhm4FTr4ul4,8860
@@ -592,12 +592,12 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
592
592
  datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
593
593
  datahub/lite/lite_util.py,sha256=G0LQHKkyEb1pc_q183g6hflShclGx7kikgMaOxtVVcs,4545
594
594
  datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
595
- datahub/metadata/_schema_classes.py,sha256=HvQKAu3eTDt3lkQcoLpBqYlPo0jfBONWCd6T9a1Meck,1011170
596
- datahub/metadata/schema.avsc,sha256=rX4HzpW5NPL28A2vfu6ESo-hSyQDnK7LUnvasa7dQCE,752628
595
+ datahub/metadata/_schema_classes.py,sha256=n8NwTeBKsnQdtNCMG85LDc6mPgDT5JGUbsgg9KcNj18,1012635
596
+ datahub/metadata/schema.avsc,sha256=ckv1TFKtvz0eGTSqL2qijL4eqfTuQYMIj91pO0vbc2M,753114
597
597
  datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
598
598
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
599
599
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
600
- datahub/metadata/_urns/urn_defs.py,sha256=mQ52ozRUt19MyBLNZh1f1ETlafCzCYmEbcKxAjR_8o4,133983
600
+ datahub/metadata/_urns/urn_defs.py,sha256=SCNCcJBVZjQd2LE98oMyAA8Lg30Umc6mExPdzAKBhgs,136344
601
601
  datahub/metadata/com/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
602
602
  datahub/metadata/com/linkedin/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
603
603
  datahub/metadata/com/linkedin/events/__init__.py,sha256=s_dR0plZF-rOxxIbE8ojekJqwiHzl2WYR-Z3kW6kKS0,298
@@ -639,7 +639,7 @@ datahub/metadata/com/linkedin/pegasus2avro/identity/__init__.py,sha256=1U583fdMT
639
639
  datahub/metadata/com/linkedin/pegasus2avro/incident/__init__.py,sha256=LfB7ytT1uuGl5Y8oeU6ILCpSGsGmUKgCjsHphn7AThg,916
640
640
  datahub/metadata/com/linkedin/pegasus2avro/ingestion/__init__.py,sha256=1bfG2naq4iS_pwU4J-BVer_gfL0hDbJbnH0gh1MPNgA,871
641
641
  datahub/metadata/com/linkedin/pegasus2avro/metadata/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
642
- datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py,sha256=kooj_lICFtq7GjWmYFtQOGuXYy9u4QomKrhTTVKFnDg,4812
642
+ datahub/metadata/com/linkedin/pegasus2avro/metadata/key/__init__.py,sha256=bvCD3zLtdgAtLT6ZpjeBQ8-QglBL9fGBPW89whWGNwM,4929
643
643
  datahub/metadata/com/linkedin/pegasus2avro/metadata/query/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
644
644
  datahub/metadata/com/linkedin/pegasus2avro/metadata/query/filter/__init__.py,sha256=DBP_QtxkFmC5q_kuk4dGjb4uOKbB4xKgqTWXGxmNbBQ,532
645
645
  datahub/metadata/com/linkedin/pegasus2avro/metadata/snapshot/__init__.py,sha256=OPboF8SV11wGnjvWQB-rxtB0otMdCsE7Tcy7xkOUgz8,2358
@@ -717,6 +717,7 @@ datahub/metadata/schemas/DataHubConnectionDetails.avsc,sha256=IvZj6OA7HRvy-ZIIn0
717
717
  datahub/metadata/schemas/DataHubConnectionKey.avsc,sha256=VwbamVFoEdp6epz1lJm_UShBl6ksBxoA7jAYuPI5u3M,522
718
718
  datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc,sha256=4wac7sluRIq-0ZjODE5SmuVKuQeW8ajLJNRpqEBRyio,4601
719
719
  datahub/metadata/schemas/DataHubIngestionSourceKey.avsc,sha256=CSmoOx_Eqa1-he5dRaVOUQWIv1l2e2lraEPIixKK-lo,526
720
+ datahub/metadata/schemas/DataHubOpenAPISchemaKey.avsc,sha256=q6ZyMoxInwmrkrXkUgMe-i-WZzAxbjcvJ-EI99SnEp8,599
720
721
  datahub/metadata/schemas/DataHubPersonaInfo.avsc,sha256=OUvbTgPQsBtzkDDb9pxHXpQ6A7dkL77ZnCXZ-MLEG14,227
721
722
  datahub/metadata/schemas/DataHubPersonaKey.avsc,sha256=ddj-DhXa0_YMdLaGkKLLSklfIeDRvSwPXu8o__YEXUE,448
722
723
  datahub/metadata/schemas/DataHubPolicyInfo.avsc,sha256=mK8tvgX4hDYuck2R6EihKSYzo6s8hNvvA_hgTOgMrFc,9397
@@ -752,7 +753,7 @@ datahub/metadata/schemas/DataProcessInstanceRunEvent.avsc,sha256=zwTYULEnpMbqwkL
752
753
  datahub/metadata/schemas/DataProcessKey.avsc,sha256=sQzsBZAAIJwwZcDrEU3d4G50qlGwrISC1Tqgb9BLq2Y,2258
753
754
  datahub/metadata/schemas/DataProductKey.avsc,sha256=5az_f7g0F0oMwWTI4OyEJn47PKQWNVC6OnMZPs9P4Oo,639
754
755
  datahub/metadata/schemas/DataProductProperties.avsc,sha256=nYEK6JgpTprU0iZaqWLZsBGYJLkh6HCi1qCu-wbYhvM,6925
755
- datahub/metadata/schemas/DataTransformLogic.avsc,sha256=wDng1GK9znVoK0INHGiSCSa-AH5MrDkVdMzz4wOWmrY,2011
756
+ datahub/metadata/schemas/DataTransformLogic.avsc,sha256=nHTH6UzJ2Zz88N2aWa96hawLUR20HP7eSynfPtI1kzg,2111
756
757
  datahub/metadata/schemas/DataTypeInfo.avsc,sha256=MCjzal71P8uIXZg161LrU8rZTJocZeizK-YxYA0Det0,704
757
758
  datahub/metadata/schemas/DataTypeKey.avsc,sha256=Gs5uc_azwg10e36ZbwDTFQMevr0IfiFvJoEGHRzEilw,546
758
759
  datahub/metadata/schemas/DatahubIngestionCheckpoint.avsc,sha256=m2Zyrx3ZWDc5gHuwbmBSRJ3JN4NFkpUhDEKM2Yeuqrw,5681
@@ -797,7 +798,7 @@ datahub/metadata/schemas/ExecutionRequestKey.avsc,sha256=SvjnlTAGYsSnvVE0rZ9-7UP
797
798
  datahub/metadata/schemas/ExecutionRequestResult.avsc,sha256=kg3xMNr9kYLPnFsV-iqcGm1sh1muQVGJvxUt15L1yKo,2333
798
799
  datahub/metadata/schemas/ExecutionRequestSignal.avsc,sha256=dsIUa6tfVSXqYOgh4cW6_Hzi8RjHuJJoO-mBAuZukpA,2515
799
800
  datahub/metadata/schemas/Filter.avsc,sha256=PU-aGkc2-sI3ZXY7ci-Y0A7zp1jux3VW_6c8MJRAokg,5933
800
- datahub/metadata/schemas/FormInfo.avsc,sha256=FbN34htiCgm3LqKDL3sVsJhMUHIyc5jYpGJtYm7Ysd4,6270
801
+ datahub/metadata/schemas/FormInfo.avsc,sha256=tlNI-m6uWJ46yF6Ls6Q_DOEZM2bQgXTd4bSeH30N9MA,6438
801
802
  datahub/metadata/schemas/FormKey.avsc,sha256=1-wE28B8T3WJ3JtexreNtFvP3To3n7U-jvYudCuSM9o,437
802
803
  datahub/metadata/schemas/Forms.avsc,sha256=shmkhRoHN2gTaTsqGrGDRoNwe_z-nrFbbLjH9MtVDCs,10955
803
804
  datahub/metadata/schemas/GlobalSettingsInfo.avsc,sha256=OVMM6FwhHhufHkezYcVePK0zI2llzFYLVFJhmAiHoiI,10102
@@ -828,7 +829,7 @@ datahub/metadata/schemas/MLFeatureTableProperties.avsc,sha256=BtrqcsxoQXObPZXSGR
828
829
  datahub/metadata/schemas/MLHyperParam.avsc,sha256=dE6i5r6LTYMNrQe9yy-jKoP09GOJUf__1bO69ldpydc,833
829
830
  datahub/metadata/schemas/MLMetric.avsc,sha256=y8WPVVwjhu3YGtqpFFJYNYK8w778RRL_d2sHG1Dc7uM,804
830
831
  datahub/metadata/schemas/MLModelDeploymentKey.avsc,sha256=vt04jFF_ZHSvWhqLoxC8C_KspiRLkvNNIXJI0aKPF1Q,2425
831
- datahub/metadata/schemas/MLModelDeploymentProperties.avsc,sha256=I3v-uNOeYxO4hooPHOjafWWHuVyeGvG90oma0tzpNFg,5409
832
+ datahub/metadata/schemas/MLModelDeploymentProperties.avsc,sha256=7IlGrMmX8nfgezvaZyrXskCTCRlwvRzGOYUOpFV3r6Y,5480
832
833
  datahub/metadata/schemas/MLModelFactorPrompts.avsc,sha256=8kX-P4F4mVLFT980z3MwIautt1_6uA-c_Z87nYNDK-k,2712
833
834
  datahub/metadata/schemas/MLModelGroupKey.avsc,sha256=3LoMWejMfCwdoqz3PFinRbY1_Yy4Kypw7pwg3tL42Jg,2497
834
835
  datahub/metadata/schemas/MLModelGroupProperties.avsc,sha256=AZ5Pohk3_pCctQ4hcE1UOURQFYHQne0dw_lRUpOu5WY,6924
@@ -837,9 +838,9 @@ datahub/metadata/schemas/MLModelProperties.avsc,sha256=hDCBHxGe-cmCBeU1k0ANuQlKj
837
838
  datahub/metadata/schemas/MLPrimaryKeyKey.avsc,sha256=Kq2Q9WxZ6nQ8wR4P6wpPCI-J7FwXQyoa10s6BvXtkm8,1110
838
839
  datahub/metadata/schemas/MLPrimaryKeyProperties.avsc,sha256=URIuOpS93RVk8MZVcbZ-dmTwu_cN3KSOKxSR8fm-eTo,6744
839
840
  datahub/metadata/schemas/MLTrainingRunProperties.avsc,sha256=WGgj0MuQrGD4UgvyHCJHzTnHja2LlJTOr1gLu8SySj0,4269
840
- datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=rM6klxSttK-cS083tR0BaVgWVJskjMmhwFbHuV8VKI8,375579
841
- datahub/metadata/schemas/MetadataChangeLog.avsc,sha256=mpdodpx25E6M1Gq_7slEcPAm-1Es5xPsoqV60HgO7zg,12167
842
- datahub/metadata/schemas/MetadataChangeProposal.avsc,sha256=EMfQrYsuHf1p6UvBjoLtfdTHGe-vGNJaCFEHz8hdKU0,9698
841
+ datahub/metadata/schemas/MetadataChangeEvent.avsc,sha256=OiEGLnlNxypkw2pyupuRj88k5Bkl0hLoiBQ3bODXQjc,375776
842
+ datahub/metadata/schemas/MetadataChangeLog.avsc,sha256=Cf5eECeShCA_XHFr2MRhRQpPE61F6Xv-z1jjoBLJLgc,12239
843
+ datahub/metadata/schemas/MetadataChangeProposal.avsc,sha256=tvO5cGIqZAIvUbMon1RAKgSY4E0jvBqT5VmLWAuNGkY,9770
843
844
  datahub/metadata/schemas/Metrics.avsc,sha256=O7DJGjOwmHbb1x_Zj7AuM_HaHKjBvkfJKfUsX8icXD4,690
844
845
  datahub/metadata/schemas/NativeGroupMembership.avsc,sha256=9mh9tzyj3ErmTIhX7ERRUm78j1QtGwXUl9UuIXPndBg,588
845
846
  datahub/metadata/schemas/NotebookContent.avsc,sha256=ck3yDt0qK5Hn3-mWTNLlYnohXCs6kMUUWPXm7o1JEWE,12496
@@ -858,7 +859,7 @@ datahub/metadata/schemas/PostInfo.avsc,sha256=CxL1Z-6jWO-xQ7mei5dwAFe_U7sfkQrkSB
858
859
  datahub/metadata/schemas/PostKey.avsc,sha256=MEA-S-RonGrsrpwYEejR6CXRka2DcqP7S7qvO5JgEFU,475
859
860
  datahub/metadata/schemas/QuantitativeAnalyses.avsc,sha256=XrGYVoGUh7ZRjAbMORSiqF5Zl4a0IbYDPiQuSJhABZM,960
860
861
  datahub/metadata/schemas/QueryKey.avsc,sha256=VI4oIHvAO7f0lN_7V3QVuBfHcPz31c57XtW6IrlDfxc,518
861
- datahub/metadata/schemas/QueryProperties.avsc,sha256=fiLYT539EM0jVEAngWXC7P9QnnIMoEsAaUp7nClwu0s,5509
862
+ datahub/metadata/schemas/QueryProperties.avsc,sha256=26Q3zzuzJbUCUG7IJ3q_OEdcNbyzloZzDJWPbv3GQAk,5589
862
863
  datahub/metadata/schemas/QuerySubjects.avsc,sha256=WbnUGiOBdc1Ypn4P_R5gCfPH-wNHJUIk7YeyJ2NiOZQ,1747
863
864
  datahub/metadata/schemas/QueryUsageStatistics.avsc,sha256=z1gfAnXdBoPEeERi5RESjrdBuS6AcIdqdN5JqWOSuNo,6192
864
865
  datahub/metadata/schemas/RoleKey.avsc,sha256=Uas5jFViSHXhFqq8D4P6-UXqywOth3coztjQ5wA7wL0,449
@@ -877,6 +878,7 @@ datahub/metadata/schemas/StructuredPropertyDefinition.avsc,sha256=a-6TaOQ4A7LDFL
877
878
  datahub/metadata/schemas/StructuredPropertyKey.avsc,sha256=lp7tQBgeriEU1YMQ6a4-6aUGSWDqNl00lLDym97j1yI,618
878
879
  datahub/metadata/schemas/StructuredPropertySettings.avsc,sha256=EDNlXfT1TqogfulCanIc-nuYO9ZxRFOGzD9tl3ZJdB8,3732
879
880
  datahub/metadata/schemas/SubTypes.avsc,sha256=bhXbzK020zDyQno97Xp05vmoMeZ82IGu2jz7pWDo3RQ,655
881
+ datahub/metadata/schemas/SystemMetadata.avsc,sha256=wDVdpa9LSAlMzHIiWw-fMLHTCrxcJdnDOY_n5CDNTN8,2068
880
882
  datahub/metadata/schemas/TagKey.avsc,sha256=BfckMlx-wg_LV1_PFVgItfNBPtCQ8_erGeQM4LzOXmY,640
881
883
  datahub/metadata/schemas/TagProperties.avsc,sha256=Qzttxd7BB38JUwwl7tZzIV1Warnh-uQO-Ahw9Sd-vH4,883
882
884
  datahub/metadata/schemas/TelemetryClientId.avsc,sha256=GScej0kXFZxoBUcRVrVynzArFSYQpO_dnhY5Po5dlx0,408
@@ -938,7 +940,7 @@ datahub/sql_parsing/sql_parsing_aggregator.py,sha256=A3_0wSxBJSRowEaslptDpBoKO42
938
940
  datahub/sql_parsing/sql_parsing_common.py,sha256=cZ4WvVyHZuXDGjnBvKMX2_fz2EMextB5WQWcK0_saBo,3155
939
941
  datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
940
942
  datahub/sql_parsing/sqlglot_lineage.py,sha256=l0kT8MuRIg96X7BNJaboMznF54b-yvM2nMTLyF2d0Nw,47446
941
- datahub/sql_parsing/sqlglot_utils.py,sha256=HP6awSU4ijmwjmTvGA_d0X_RO9O3rbGdkbVAWEhAcck,14667
943
+ datahub/sql_parsing/sqlglot_utils.py,sha256=5cUiEWLWfVTI7uIxolAfOfNVo50qnklzhj86gxSFWqg,14943
942
944
  datahub/sql_parsing/tool_meta_extractor.py,sha256=EV_g7sOchTSUm2p6wluNJqND7-rDYokVTqqFCM7hQ6c,7599
943
945
  datahub/telemetry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
944
946
  datahub/telemetry/stats.py,sha256=TwaQisQlD2Bk0uw__pP6u3Ovz9r-Ip4pCwpnto4r5e0,959
@@ -1043,8 +1045,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1043
1045
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1044
1046
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1045
1047
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1046
- acryl_datahub-1.0.0.2rc4.dist-info/METADATA,sha256=Alsmazr6nq0qPL29zOJJo5M92W2webjcRMxOD7j2BIY,176853
1047
- acryl_datahub-1.0.0.2rc4.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
1048
- acryl_datahub-1.0.0.2rc4.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
1049
- acryl_datahub-1.0.0.2rc4.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1050
- acryl_datahub-1.0.0.2rc4.dist-info/RECORD,,
1048
+ acryl_datahub-1.0.0.3rc1.dist-info/METADATA,sha256=43mPIcmD4ByKfyR6rn8PPgaKNUBSmDmVJnGm1KhBZuo,176855
1049
+ acryl_datahub-1.0.0.3rc1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
1050
+ acryl_datahub-1.0.0.3rc1.dist-info/entry_points.txt,sha256=o3mDeJXSKhsy7XLkuogihraiabBdLn9HaizYXPrxmk0,9710
1051
+ acryl_datahub-1.0.0.3rc1.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1052
+ acryl_datahub-1.0.0.3rc1.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.0.0.2rc4"
3
+ __version__ = "1.0.0.3rc1"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -23,6 +23,7 @@ class EnsureAspectSizeProcessor:
23
23
  ):
24
24
  self.report = report
25
25
  self.payload_constraint = payload_constraint
26
+ self.schema_size_constraint = int(self.payload_constraint * 0.985)
26
27
 
27
28
  def ensure_dataset_profile_size(
28
29
  self, dataset_urn: str, profile: DatasetProfileClass
@@ -68,7 +69,7 @@ class EnsureAspectSizeProcessor:
68
69
  for field in schema.fields:
69
70
  field_size = len(json.dumps(pre_json_transform(field.to_obj())))
70
71
  logger.debug(f"Field {field.fieldPath} takes total {field_size}")
71
- if total_fields_size + field_size < self.payload_constraint:
72
+ if total_fields_size + field_size < self.schema_size_constraint:
72
73
  accepted_fields.append(field)
73
74
  total_fields_size += field_size
74
75
  else:
@@ -27,6 +27,7 @@ logger = logging.getLogger(__name__)
27
27
 
28
28
  # The following models were Claude-generated from Hex API OpenAPI definition https://static.hex.site/openapi.json
29
29
  # To be exclusively used internally for the deserialization of the API response
30
+ # Model is incomplete and fields may have not been mapped if not used in the ingestion
30
31
 
31
32
 
32
33
  class HexApiAppViewStats(BaseModel):
@@ -83,20 +84,10 @@ class HexApiUser(BaseModel):
83
84
  email: str
84
85
 
85
86
 
86
- class HexApiAccessType(StrEnum):
87
- """Access type enum."""
88
-
89
- NONE = "NONE"
90
- VIEW = "VIEW"
91
- EDIT = "EDIT"
92
- FULL_ACCESS = "FULL_ACCESS"
93
-
94
-
95
87
  class HexApiUserAccess(BaseModel):
96
88
  """User access model."""
97
89
 
98
90
  user: HexApiUser
99
- access: Optional[HexApiAccessType] = None
100
91
 
101
92
 
102
93
  class HexApiCollectionData(BaseModel):
@@ -109,13 +100,6 @@ class HexApiCollectionAccess(BaseModel):
109
100
  """Collection access model."""
110
101
 
111
102
  collection: HexApiCollectionData
112
- access: Optional[HexApiAccessType] = None
113
-
114
-
115
- class HexApiAccessSettings(BaseModel):
116
- """Access settings model."""
117
-
118
- access: Optional[HexApiAccessType] = None
119
103
 
120
104
 
121
105
  class HexApiWeeklySchedule(BaseModel):
@@ -145,9 +129,6 @@ class HexApiSharing(BaseModel):
145
129
  users: Optional[List[HexApiUserAccess]] = []
146
130
  collections: Optional[List[HexApiCollectionAccess]] = []
147
131
  groups: Optional[List[Any]] = []
148
- workspace: Optional[HexApiAccessSettings] = None
149
- public_web: Optional[HexApiAccessSettings] = Field(default=None, alias="publicWeb")
150
- support: Optional[HexApiAccessSettings] = None
151
132
 
152
133
  class Config:
153
134
  extra = "ignore" # Allow extra fields in the JSON
@@ -18,7 +18,8 @@ from datahub.utilities.time import datetime_to_ts_millis
18
18
  logger = logging.getLogger(__name__)
19
19
 
20
20
  # Pattern to extract both project_id and workspace_name from Hex metadata in SQL comments
21
- HEX_METADATA_PATTERN = r'-- Hex query metadata: \{.*?"project_id": "([^"]+)".*?"project_url": "https?://[^/]+/([^/]+)/hex/.*?\}'
21
+ # Only match metadata with "context": "SCHEDULED_RUN" to filter out non-scheduled runs
22
+ HEX_METADATA_PATTERN = r'-- Hex query metadata: \{.*?"context": "SCHEDULED_RUN".*?"project_id": "([^"]+)".*?"project_url": "https?://[^/]+/([^/]+)/hex/.*?\}'
22
23
 
23
24
 
24
25
  @dataclass
@@ -39,6 +40,7 @@ class HexQueryFetcherReport(SourceReport):
39
40
  fetched_query_objects: int = 0
40
41
  filtered_out_queries_missing_metadata: int = 0
41
42
  filtered_out_queries_different_workspace: int = 0
43
+ filtered_out_queries_no_match: int = 0
42
44
  filtered_out_queries_no_subjects: int = 0
43
45
  total_queries: int = 0
44
46
  total_dataset_subjects: int = 0
@@ -210,6 +212,7 @@ class HexQueryFetcher:
210
212
  match = re.search(HEX_METADATA_PATTERN, sql_statement)
211
213
 
212
214
  if not match:
215
+ self.report.filtered_out_queries_no_match += 1
213
216
  return None
214
217
 
215
218
  try:
@@ -1,8 +1,9 @@
1
1
  import logging
2
2
  from dataclasses import dataclass, field
3
- from typing import Dict, Optional
3
+ from typing import Dict, List, Optional
4
4
 
5
5
  import pydantic
6
+ from pydantic import BaseModel, Field
6
7
 
7
8
  from datahub.configuration.common import AllowDenyPattern
8
9
  from datahub.configuration.source_common import (
@@ -17,6 +18,7 @@ from datahub.ingestion.source.state.stale_entity_removal_handler import (
17
18
  from datahub.ingestion.source.state.stateful_ingestion_base import (
18
19
  StatefulIngestionConfigBase,
19
20
  )
21
+ from datahub.utilities.lossy_collections import LossyDict
20
22
 
21
23
  logger = logging.getLogger(__name__)
22
24
 
@@ -53,15 +55,82 @@ class Constant:
53
55
  DEFAULT_API_URL = "https://aws-api.sigmacomputing.com/v2"
54
56
 
55
57
 
58
+ class WorkspaceCounts(BaseModel):
59
+ workbooks_count: int = 0
60
+ datasets_count: int = 0
61
+ elements_count: int = 0
62
+ pages_count: int = 0
63
+
64
+ def is_empty(self) -> bool:
65
+ return (
66
+ self.workbooks_count == 0
67
+ and self.datasets_count == 0
68
+ and self.elements_count == 0
69
+ and self.pages_count == 0
70
+ )
71
+
72
+ def as_obj(self) -> dict:
73
+ return {
74
+ "workbooks_count": self.workbooks_count,
75
+ "datasets_count": self.datasets_count,
76
+ "elements_count": self.elements_count,
77
+ "pages_count": self.pages_count,
78
+ }
79
+
80
+
81
+ class SigmaWorkspaceEntityFilterReport(EntityFilterReport):
82
+ type: str = "workspace"
83
+
84
+ workspace_counts: LossyDict[str, WorkspaceCounts] = Field(
85
+ default_factory=LossyDict,
86
+ description="Counts of workbooks, datasets, elements and pages in each workspace.",
87
+ )
88
+
89
+ def increment_workbooks_count(self, workspace_id: str) -> None:
90
+ if workspace_id not in self.workspace_counts:
91
+ self.workspace_counts[workspace_id] = WorkspaceCounts()
92
+ self.workspace_counts[workspace_id].workbooks_count += 1
93
+
94
+ def increment_datasets_count(self, workspace_id: str) -> None:
95
+ if workspace_id not in self.workspace_counts:
96
+ self.workspace_counts[workspace_id] = WorkspaceCounts()
97
+ self.workspace_counts[workspace_id].datasets_count += 1
98
+
99
+ def increment_elements_count(self, workspace_id: str) -> None:
100
+ if workspace_id not in self.workspace_counts:
101
+ self.workspace_counts[workspace_id] = WorkspaceCounts()
102
+ self.workspace_counts[workspace_id].elements_count += 1
103
+
104
+ def increment_pages_count(self, workspace_id: str) -> None:
105
+ if workspace_id not in self.workspace_counts:
106
+ self.workspace_counts[workspace_id] = WorkspaceCounts()
107
+ self.workspace_counts[workspace_id].pages_count += 1
108
+
109
+ def as_obj(self) -> dict:
110
+ return {
111
+ "filtered": self.dropped_entities.as_obj(),
112
+ "processed": self.processed_entities.as_obj(),
113
+ "workspace_counts": {
114
+ key: item.as_obj() for key, item in self.workspace_counts.items()
115
+ },
116
+ }
117
+
118
+
56
119
  @dataclass
57
120
  class SigmaSourceReport(StaleEntityRemovalSourceReport):
58
- workspaces: EntityFilterReport = EntityFilterReport.field(type="workspace")
59
- number_of_workspaces: Optional[int] = None
121
+ workspaces: SigmaWorkspaceEntityFilterReport = field(
122
+ default_factory=SigmaWorkspaceEntityFilterReport
123
+ )
60
124
  non_accessible_workspaces_count: int = 0
61
- shared_entities_count: int = 0
62
- number_of_datasets: int = 0
63
- number_of_workbooks: int = 0
125
+
126
+ datasets: EntityFilterReport = EntityFilterReport.field(type="dataset")
127
+ datasets_without_workspace: int = 0
128
+
129
+ workbooks: EntityFilterReport = EntityFilterReport.field(type="workbook")
130
+ workbooks_without_workspace: int = 0
131
+
64
132
  number_of_files_metadata: Dict[str, int] = field(default_factory=dict)
133
+ empty_workspaces: List[str] = field(default_factory=list)
65
134
 
66
135
 
67
136
  class PlatformDetail(PlatformInstanceConfigMixin, EnvConfigMixin):
@@ -35,6 +35,7 @@ from datahub.ingestion.source.sigma.config import (
35
35
  PlatformDetail,
36
36
  SigmaSourceConfig,
37
37
  SigmaSourceReport,
38
+ WorkspaceCounts,
38
39
  )
39
40
  from datahub.ingestion.source.sigma.data_classes import (
40
41
  Element,
@@ -163,7 +164,6 @@ class SigmaSource(StatefulIngestionSourceBase, TestableSource):
163
164
  def _get_allowed_workspaces(self) -> List[Workspace]:
164
165
  all_workspaces = self.sigma_api.workspaces.values()
165
166
  logger.info(f"Number of workspaces = {len(all_workspaces)}")
166
- self.reporter.number_of_workspaces = len(all_workspaces)
167
167
 
168
168
  allowed_workspaces = []
169
169
  for workspace in all_workspaces:
@@ -285,6 +285,7 @@ class SigmaSource(StatefulIngestionSourceBase, TestableSource):
285
285
  yield self._gen_dataset_properties(dataset_urn, dataset)
286
286
 
287
287
  if dataset.workspaceId:
288
+ self.reporter.workspaces.increment_datasets_count(dataset.workspaceId)
288
289
  yield from add_entity_to_container(
289
290
  container_key=self._gen_workspace_key(dataset.workspaceId),
290
291
  entity_type="dataset",
@@ -468,6 +469,8 @@ class SigmaSource(StatefulIngestionSourceBase, TestableSource):
468
469
  ).as_workunit()
469
470
 
470
471
  if workbook.workspaceId:
472
+ self.reporter.workspaces.increment_elements_count(workbook.workspaceId)
473
+
471
474
  yield self._gen_entity_browsepath_aspect(
472
475
  entity_urn=chart_urn,
473
476
  parent_entity_urn=builder.make_container_urn(
@@ -525,6 +528,7 @@ class SigmaSource(StatefulIngestionSourceBase, TestableSource):
525
528
  all_input_fields: List[InputFieldClass] = []
526
529
 
527
530
  if workbook.workspaceId:
531
+ self.reporter.workspaces.increment_pages_count(workbook.workspaceId)
528
532
  yield self._gen_entity_browsepath_aspect(
529
533
  entity_urn=dashboard_urn,
530
534
  parent_entity_urn=builder.make_container_urn(
@@ -614,6 +618,8 @@ class SigmaSource(StatefulIngestionSourceBase, TestableSource):
614
618
 
615
619
  paths = workbook.path.split("/")[1:]
616
620
  if workbook.workspaceId:
621
+ self.reporter.workspaces.increment_workbooks_count(workbook.workspaceId)
622
+
617
623
  yield self._gen_entity_browsepath_aspect(
618
624
  entity_urn=dashboard_urn,
619
625
  parent_entity_urn=builder.make_container_urn(
@@ -667,6 +673,15 @@ class SigmaSource(StatefulIngestionSourceBase, TestableSource):
667
673
  f"{workspace.name} ({workspace.workspaceId})"
668
674
  )
669
675
  yield from self._gen_workspace_workunit(workspace)
676
+ if self.reporter.workspaces.workspace_counts.get(
677
+ workspace.workspaceId, WorkspaceCounts()
678
+ ).is_empty():
679
+ logger.warning(
680
+ f"Workspace {workspace.name} ({workspace.workspaceId}) is empty. If this is not expected, add the user associated with the Client ID/Secret to each workspace with missing metadata"
681
+ )
682
+ self.reporter.empty_workspaces.append(
683
+ f"{workspace.name} ({workspace.workspaceId})"
684
+ )
670
685
  yield from self._gen_sigma_dataset_upstream_lineage_workunit()
671
686
 
672
687
  def get_report(self) -> SourceReport: