acryl-datahub 0.15.0rc16__py3-none-any.whl → 0.15.0rc17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (33) hide show
  1. {acryl_datahub-0.15.0rc16.dist-info → acryl_datahub-0.15.0rc17.dist-info}/METADATA +2512 -2512
  2. {acryl_datahub-0.15.0rc16.dist-info → acryl_datahub-0.15.0rc17.dist-info}/RECORD +33 -31
  3. datahub/__init__.py +1 -1
  4. datahub/api/entities/structuredproperties/structuredproperties.py +7 -5
  5. datahub/cli/delete_cli.py +66 -20
  6. datahub/configuration/common.py +3 -3
  7. datahub/ingestion/api/source.py +5 -1
  8. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +2 -2
  9. datahub/ingestion/run/pipeline.py +1 -1
  10. datahub/ingestion/run/pipeline_config.py +6 -0
  11. datahub/ingestion/source/kafka/kafka.py +18 -11
  12. datahub/ingestion/source/looker/lookml_concept_context.py +1 -2
  13. datahub/ingestion/source/looker/view_upstream.py +65 -30
  14. datahub/ingestion/source/snowflake/snowflake_query.py +6 -2
  15. datahub/ingestion/source/snowflake/snowflake_report.py +1 -0
  16. datahub/ingestion/source/snowflake/snowflake_schema.py +12 -0
  17. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +17 -2
  18. datahub/ingestion/source/snowflake/snowflake_utils.py +45 -5
  19. datahub/ingestion/source/state/redundant_run_skip_handler.py +1 -1
  20. datahub/ingestion/source/tableau/tableau.py +35 -16
  21. datahub/ingestion/source/tableau/tableau_common.py +0 -1
  22. datahub/metadata/_schema_classes.py +122 -2
  23. datahub/metadata/com/linkedin/pegasus2avro/structured/__init__.py +2 -0
  24. datahub/metadata/schema.avsc +73 -1
  25. datahub/metadata/schemas/StructuredPropertyDefinition.avsc +1 -1
  26. datahub/metadata/schemas/StructuredPropertyKey.avsc +1 -0
  27. datahub/metadata/schemas/StructuredPropertySettings.avsc +114 -0
  28. datahub/sql_parsing/schema_resolver.py +23 -0
  29. datahub/sql_parsing/sqlglot_lineage.py +48 -13
  30. datahub/testing/doctest.py +12 -0
  31. {acryl_datahub-0.15.0rc16.dist-info → acryl_datahub-0.15.0rc17.dist-info}/WHEEL +0 -0
  32. {acryl_datahub-0.15.0rc16.dist-info → acryl_datahub-0.15.0rc17.dist-info}/entry_points.txt +0 -0
  33. {acryl_datahub-0.15.0rc16.dist-info → acryl_datahub-0.15.0rc17.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- datahub/__init__.py,sha256=rPlQTQrjyFLBRsOQhvB6o82Gor0VfK7UUrRTXzbe8kw,575
1
+ datahub/__init__.py,sha256=36zFdNjWt7jwEClr7v19ajrbbZWK2fom9sGt-Llby2I,575
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
3
  datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
4
4
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -52,7 +52,7 @@ datahub/api/entities/forms/forms_graphql_constants.py,sha256=DKpnKlMKTjmnyrCTvp6
52
52
  datahub/api/entities/platformresource/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
53
  datahub/api/entities/platformresource/platform_resource.py,sha256=pVAjv6NoH746Mfvdak7ji0eqlEcEeV-Ji7M5gyNXmds,10603
54
54
  datahub/api/entities/structuredproperties/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
- datahub/api/entities/structuredproperties/structuredproperties.py,sha256=UeNPicCmrYJThv0msUlYUNArHGcjdc_0EX5yLijc_Ao,9267
55
+ datahub/api/entities/structuredproperties/structuredproperties.py,sha256=PcTX5gI7pg_Aq9JeIvUNZ5JYrQ2XS1uUEJZ73ORgYgA,9434
56
56
  datahub/api/graphql/__init__.py,sha256=5yl0dJxO-2d_QuykdJrDIbWq4ja9bo0t2dAEh89JOog,142
57
57
  datahub/api/graphql/assertion.py,sha256=ponITypRQ8vE8kiqRNpvdoniNJzi4aeBK97UvkF0VhA,2818
58
58
  datahub/api/graphql/base.py,sha256=9q637r6v-RGOd8Mk8HW2g0vt9zpqFexsQ5R6TPEHVbs,1614
@@ -61,7 +61,7 @@ datahub/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
61
  datahub/cli/check_cli.py,sha256=9dXNyzZayHeoFjwFjLkMVyx6DiCZfeESyI-sYtGA6bE,12850
62
62
  datahub/cli/cli_utils.py,sha256=gFmcOGAT6IdrTwmpRFSwaqzGmoqS4dbWrxILB1uvlGk,13214
63
63
  datahub/cli/config_utils.py,sha256=yuXw7RzpRY5x_-MAoqWbv46qUkIeRNAJL4_OeJpYdBE,4879
64
- datahub/cli/delete_cli.py,sha256=Z7iXNr4ZMmghCnldU8laK4SwTNrhQEEnnUH_TeaBKog,21838
64
+ datahub/cli/delete_cli.py,sha256=VLeHi7MLFCtTk7MI4y8r_k_7aLcCUZIglU2MNLsXU6M,23051
65
65
  datahub/cli/docker_check.py,sha256=rED4wHXqxcQ_qNFyIgFEZ85BHT9ZTE5YC-oUKqbRqi0,9432
66
66
  datahub/cli/docker_cli.py,sha256=QGoWFp8ZZsXOSMbgu0Q4snMmMmtP3epWAN-fYglUNEc,36491
67
67
  datahub/cli/env_utils.py,sha256=RQzjg4JE29hjPt4v7p-RuqoOr99w8E3DBHWiN2Sm7T4,252
@@ -89,7 +89,7 @@ datahub/cli/specific/structuredproperties_cli.py,sha256=Q-ew8JBPmSbyj2IGFaaMHs1V
89
89
  datahub/cli/specific/user_cli.py,sha256=jGAokb1NRu8obs6P2g4OL2NQdFgpUBa9De55TBBtun0,1897
90
90
  datahub/configuration/__init__.py,sha256=5TN3a7CWNsLRHpdj-sv2bxKWF2IslvJwE6EpNMFrIS4,123
91
91
  datahub/configuration/_config_enum.py,sha256=ul2hr5gMmdLvBINicFkMNMi1ApmnmZSwNdUYYted5nk,1447
92
- datahub/configuration/common.py,sha256=ZEApfo1lyQ4zIwelBBPzAEsMSGt6BdqVK9n8MPE7pnQ,10511
92
+ datahub/configuration/common.py,sha256=Ngj2-HKPEhCMbcx3phUqyoOHayhqWNt1t0e2hO3GQNY,10508
93
93
  datahub/configuration/config_loader.py,sha256=4V8rrbKvCbfEys2Tlw2uZXb3yC9Hpoubn2O8GXhGe3A,5785
94
94
  datahub/configuration/connection_resolver.py,sha256=n4-6MwMiOEDgTouxO0SMjTILKVhJPo6-naE6FuR5qMs,1516
95
95
  datahub/configuration/datetimes.py,sha256=nayNc0mmlVKH6oVv9ud6C1dDUiZPGabW-YZxvrkosPg,2870
@@ -138,7 +138,7 @@ datahub/ingestion/api/registry.py,sha256=LGElUdzhNQoEr-k2SN23mJaIYnA1PYfF97LQxBm
138
138
  datahub/ingestion/api/report.py,sha256=CpQHqLAoYGV4bxNIpYQugLY0EUoxROlp2NUM9ONHj_I,4364
139
139
  datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
140
140
  datahub/ingestion/api/sink.py,sha256=6g01wou8pv79s0leDWyK12cgl7eLtpiwSUHqOw08vx4,4503
141
- datahub/ingestion/api/source.py,sha256=LktIC5KuRr_5ncLtGnU2bBSqE7Vy0yp9kBF5n9lq-4Y,18627
141
+ datahub/ingestion/api/source.py,sha256=W_GkXkEXGdwwO0OEaR2BgxoBAATsvY9VIubCTXSHfB8,18774
142
142
  datahub/ingestion/api/source_helpers.py,sha256=ninruzG4MwJuEmkOzpqLONzVi4OOi2x3RLWoogoELY4,19708
143
143
  datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188mmC8J8,582
144
144
  datahub/ingestion/api/workunit.py,sha256=e8n8RfSjHZZm2R4ShNH0UuMtUkMjyqqM2j2t7oL74lo,6327
@@ -168,13 +168,13 @@ datahub/ingestion/graph/config.py,sha256=3b_Gxa5wcBnphP63bBiAFdWS7PJhUHRE1WZL_q4
168
168
  datahub/ingestion/graph/connections.py,sha256=9462L0ZWGKURyypAln25eMPhK3pcufBar9tNDoqspXs,741
169
169
  datahub/ingestion/graph/filters.py,sha256=UeUZQHoimavIYx-jXLA0WGkOUe10TaO8uEZkfa-QgNE,6188
170
170
  datahub/ingestion/reporting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
171
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py,sha256=ebQyfart6xL86SBujWS1M81zs9yrq8g5fgojER-l7Og,8363
171
+ datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py,sha256=rrnlgptYF3YkxWlLYpkLm3mgrmzHcy6AwTHUG18bKVA,8373
172
172
  datahub/ingestion/reporting/file_reporter.py,sha256=tiWukmMxHrTQI3rOAumsq6lRlw8T6spqpS6XBDYnrZU,1640
173
173
  datahub/ingestion/reporting/reporting_provider_registry.py,sha256=jTYSh3T4sensjnHQfPLiIcbA2dG8w0px9ghChAJjGdU,310
174
174
  datahub/ingestion/run/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
175
175
  datahub/ingestion/run/connection.py,sha256=dqS9Fp8byIJNydPmVgtjjjlPJguuUWuMuvGnpNbQdSs,1474
176
- datahub/ingestion/run/pipeline.py,sha256=8MNUC19h7AvxjlDJj3E_FZlY56SAUlYG0heIko2XK_g,30572
177
- datahub/ingestion/run/pipeline_config.py,sha256=91Uvs76EGbCzZZbm819TT0L6pixf2tfI2_nHpnCoyS4,3948
176
+ datahub/ingestion/run/pipeline.py,sha256=9yqpcU_-Lr224cFrxi9rLJHmG04cHGIORcbzQw0H1pI,30577
177
+ datahub/ingestion/run/pipeline_config.py,sha256=c8WeID2sa3OB3lNs_rBIf7jRSKaQ9ta38uvTxScq5Fo,4099
178
178
  datahub/ingestion/sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
179
179
  datahub/ingestion/sink/blackhole.py,sha256=-jYcWo4i8q7312bCIoHrGr7nT9JdPvA7c4jvSc7YOgY,557
180
180
  datahub/ingestion/sink/console.py,sha256=TZfhA0Ec2eNCrMH7RRy2JOdUE-U-hkoIQrPm1CmKLQs,591
@@ -320,7 +320,7 @@ datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
320
320
  datahub/ingestion/source/identity/azure_ad.py,sha256=GdmJFD4UMsb5353Z7phXRf-YsXR2woGLRJwBXUkgXq0,28809
321
321
  datahub/ingestion/source/identity/okta.py,sha256=PnRokWLG8wSoNZlXJiRZiW6APTEHO09q4n2j_l6m3V0,30756
322
322
  datahub/ingestion/source/kafka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
323
- datahub/ingestion/source/kafka/kafka.py,sha256=QUw8VCmqIhZJvUiFJmFmekFmy4nXCLD4EKJNC6jk6Y4,26092
323
+ datahub/ingestion/source/kafka/kafka.py,sha256=9SR7bqp9J0rPYde5IClhnAuVNy9ItsB8-ZeXtTc_mEY,26442
324
324
  datahub/ingestion/source/kafka/kafka_connect.py,sha256=5KUlhn3876c41Z3kx5l4oJhbu0ekXZQRdxmu52vb_v8,55167
325
325
  datahub/ingestion/source/kafka/kafka_schema_registry_base.py,sha256=13XjSwqyVhH1CJUFHAbWdmmv_Rw0Ju_9HQdBmIzPNNA,566
326
326
  datahub/ingestion/source/looker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -338,13 +338,13 @@ datahub/ingestion/source/looker/looker_source.py,sha256=AByQxWVfOBqOtZPaR_cw9SB-
338
338
  datahub/ingestion/source/looker/looker_template_language.py,sha256=EG4ZfVZ0x53lgaYh2ohzL4ZCy9KsX0TA51XqCmsCd2Q,14328
339
339
  datahub/ingestion/source/looker/looker_usage.py,sha256=qegMr-Rnqz3xNGSBfsuD3S_BPXf7UEMhwFN7DPQeLNo,22914
340
340
  datahub/ingestion/source/looker/looker_view_id_cache.py,sha256=92gDy6NONhJYBp92z_IBzDVZvezmUIkaBCZY1bdk6mE,4392
341
- datahub/ingestion/source/looker/lookml_concept_context.py,sha256=f-mkjozmjzwxXGjaxPMYwkie137VwD8t0seVPiQmlFw,18108
341
+ datahub/ingestion/source/looker/lookml_concept_context.py,sha256=guaIfY_cP8UOboJhsf9a1b9r6xjLh8k5C9uMb4p5Neg,18066
342
342
  datahub/ingestion/source/looker/lookml_config.py,sha256=Q0fMsu_Cvm8807R6VB14VJDLqjoLTyGF-WsiUD6xEk8,10519
343
343
  datahub/ingestion/source/looker/lookml_refinement.py,sha256=MkVreI0BylaCFyDHihDHaCcXyDSP84eF9p1h5d-ZHnM,9504
344
344
  datahub/ingestion/source/looker/lookml_source.py,sha256=ifuneqLsVK_TfYrMkCZ1uB4yeZ1QbRkcGhTOnyc9DT8,40520
345
345
  datahub/ingestion/source/looker/str_functions.py,sha256=zceEX2ka_4WaWwWgEdyknUSz7X3GrO951BkwSbF2afo,766
346
346
  datahub/ingestion/source/looker/urn_functions.py,sha256=4VvqEfGvIMq3rNHHps0-HlPurMPnpqdxNtDAOOHIZww,528
347
- datahub/ingestion/source/looker/view_upstream.py,sha256=k278-uwh8uspdREpjE_uqks4wB1t1tu7-O2hbYdwqa0,24288
347
+ datahub/ingestion/source/looker/view_upstream.py,sha256=rAWKif3UngeRGS_CMQqjihKs3Lxiu6DecLAdtdxvqOw,26096
348
348
  datahub/ingestion/source/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
349
349
  datahub/ingestion/source/metadata/business_glossary.py,sha256=eRVRpQI0ZX5OofS1BUhNihFOfWih70TIAkJM7zaMH80,17577
350
350
  datahub/ingestion/source/metadata/lineage.py,sha256=XiZGuY6k3O9qBmgo7AzosIndJHwrvEhapVLdRlDxCuc,9507
@@ -430,15 +430,15 @@ datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FU
430
430
  datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=w2CPm5XEU-KMUSIpb58aKOaxTDHfM5NvghutCVRicy4,23247
431
431
  datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
432
432
  datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=fu-8S9eADIXZcd_kHc6cBeMa-on9RF9qG3yqjJnS3DE,26085
433
- datahub/ingestion/source/snowflake/snowflake_query.py,sha256=e6WodpmNto-I8lmexRd7VO0lxJDxM66MCGnG5dzr1Dk,38067
434
- datahub/ingestion/source/snowflake/snowflake_report.py,sha256=KjNvYufQMVkFP7F5sEFumKorkiFAmFVCQ1jYqXr0ev0,6419
435
- datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=fatrKpBUY9CnzXhLJcFlHkHGt0QWFhkYH9ZXwWoQCLA,20392
436
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=JjzhhyEN9QBUv-64sHhkq-4Vq1XhDtz9npLMiqlSICo,38893
433
+ datahub/ingestion/source/snowflake/snowflake_query.py,sha256=PuqoseJbqkQEIYkmlLvPJxcVOGG7HVs4U-WWFQgQEWs,38211
434
+ datahub/ingestion/source/snowflake/snowflake_report.py,sha256=_-rD7Q4MzKY8fYzJHSBnGX4gurwujL3UoRzcP_TZURs,6468
435
+ datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=K31vJ19ZCIqtJkszsJWF1eppu8U23gkZYfb5jw231dc,20997
436
+ datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=st4qoOdMGuo6fJQh-cJf_2hnczIuv6VRXGO4x3p1MgQ,39416
437
437
  datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=ud3Ah4qHrmSfpD8Od-gPdzwtON9dJa0eqHt-8Yr5h2Q,6366
438
438
  datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYhWeK4XafJq3ssFL9kcS03jEeWT4,5506
439
439
  datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=fyfWmFVz2WZrpTJWNIe9m0WpDHgeFrGPf8diORJZUwo,6212
440
440
  datahub/ingestion/source/snowflake/snowflake_usage_v2.py,sha256=PEmYNMXJRUvLQmVd8juVqjokfuSPuH9ppcM0ruXamxA,24807
441
- datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=Ux4sieWe79KZztquvrPkpJoOegLfTAWVv1A73UUlbGs,11365
441
+ datahub/ingestion/source/snowflake/snowflake_utils.py,sha256=443P7t839_iRymWMIg-dd7to21smsazS110UKEYbpEU,12588
442
442
  datahub/ingestion/source/snowflake/snowflake_v2.py,sha256=lo_3asTuIZbF-LuEUcYL-9NIZ720n7oB9mYA6WVTWA4,31960
443
443
  datahub/ingestion/source/sql/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
444
444
  datahub/ingestion/source/sql/athena.py,sha256=G3cIY8H_76lIUAzQWW2kLnZOEsfbakmojxbiHb3dYZ8,24059
@@ -475,7 +475,7 @@ datahub/ingestion/source/state/checkpoint.py,sha256=x9Xww-MIFXSKjeg1tOZXE72LehCm
475
475
  datahub/ingestion/source/state/entity_removal_state.py,sha256=zvIsmYg7oiIu2FhecU0VfLBNToUqvKoKyDeiFfkOcyc,6611
476
476
  datahub/ingestion/source/state/profiling_state.py,sha256=lsWu7oZhB9nSlqoklvjs-LjS4XF0p6BxSAcLY-xKRzM,512
477
477
  datahub/ingestion/source/state/profiling_state_handler.py,sha256=IgvmTszxjR53JX-uTNjFx7ZMWK34DlAWjA3QKSLa644,4293
478
- datahub/ingestion/source/state/redundant_run_skip_handler.py,sha256=ktPwtN1DcpwAPQ4zu6DQV-o8tjj-5RcftJibnU3upZU,9484
478
+ datahub/ingestion/source/state/redundant_run_skip_handler.py,sha256=WGf9KMRxxtw_4J6lQcH2nHezKmdaWatf5ulFNG9qaFc,9468
479
479
  datahub/ingestion/source/state/sql_common_state.py,sha256=OtJpJfMTBSgyR37dn3w-nnZwlc0nFNb2GoUzIWhnyAc,143
480
480
  datahub/ingestion/source/state/stale_entity_removal_handler.py,sha256=E9d47MEKjeIGVD4PAcyU6fWBa72gOH0HFc3ljNx2eYA,14998
481
481
  datahub/ingestion/source/state/stateful_ingestion_base.py,sha256=N0Qzp2t2qAf27WndhsvGbCYCd29dSrLY3TSfbO0hoKA,17369
@@ -486,8 +486,8 @@ datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider
486
486
  datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py,sha256=xsH7Ao_05VTjqpkzLkhdf5B1ULMzFoD8vkJJIJU9w-U,4077
487
487
  datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
488
488
  datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
489
- datahub/ingestion/source/tableau/tableau.py,sha256=khC6lPXMz-t7Oqbxw0GH-O47NTthJe38clIP1XXLzsg,135453
490
- datahub/ingestion/source/tableau/tableau_common.py,sha256=Dy_2pvkPucZJsG_LvQZLlxNEkjh-yOXHlZ4jurq9opM,26069
489
+ datahub/ingestion/source/tableau/tableau.py,sha256=2M0d4IYn0kcMFlQ2yAvPRnXKZcj_xcqvEJik7QDnebI,136605
490
+ datahub/ingestion/source/tableau/tableau_common.py,sha256=WugmFZvLgrHjvhUVBBZGRXiBJcsh2qcZK2TnWo5UQEA,26007
491
491
  datahub/ingestion/source/tableau/tableau_constant.py,sha256=nWElhtDo5kj5mWivZFmtVF_4Ugw0-EatBYWyDVzu5hE,2501
492
492
  datahub/ingestion/source/unity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
493
493
  datahub/ingestion/source/unity/analyze_profiler.py,sha256=2pqkFY30CfN4aHgFZZntjeG0hNhBytZJvXC13VfTc1I,4689
@@ -559,8 +559,8 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
559
559
  datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
560
560
  datahub/lite/lite_util.py,sha256=pgBpT3vTO1YCQ2njZRNyicSkHYeEmQCt41BaXU8WvMo,4503
561
561
  datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
562
- datahub/metadata/_schema_classes.py,sha256=g7jG02LNEUUFRP76CCJhdi9JIsvbanWivbeWjb9B0p8,950122
563
- datahub/metadata/schema.avsc,sha256=YttbenOzBUCzHHXR3HztORZ746UckyicBk7IyXvG-yU,675487
562
+ datahub/metadata/_schema_classes.py,sha256=iPeBXGvbNEm0vw5pYwunnvx7bTtBdmIQVtzMOlS6bSI,955042
563
+ datahub/metadata/schema.avsc,sha256=wMMSgx3OtzD1tNTC4dh_PFBZrnco21i2jO5J7oy2PgE,677545
564
564
  datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
565
565
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
566
566
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
@@ -627,7 +627,7 @@ datahub/metadata/com/linkedin/pegasus2avro/secret/__init__.py,sha256=qk61EqqVZF6
627
627
  datahub/metadata/com/linkedin/pegasus2avro/settings/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
628
628
  datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py,sha256=1AcLBo5isT59x-EWYjOv_70Y5YJpowAjTSH4ZkZQVJ0,711
629
629
  datahub/metadata/com/linkedin/pegasus2avro/step/__init__.py,sha256=HLNNbqBlyhcg09eXWx_AMD_JoOtBPYEi2kv12PE0R9E,329
630
- datahub/metadata/com/linkedin/pegasus2avro/structured/__init__.py,sha256=RPFvtZT2yxPmcXNMFKZDunKRpfNXe35CHImlqgwWgiA,888
630
+ datahub/metadata/com/linkedin/pegasus2avro/structured/__init__.py,sha256=Cry61gPw6m5MQuJpPxADRm3jhI0XVqzznyD3fVKMkvc,1013
631
631
  datahub/metadata/com/linkedin/pegasus2avro/tag/__init__.py,sha256=Odb4mzloKJIlpoFHODEIxt_OIgFNrZExcyQtvXxjOFQ,290
632
632
  datahub/metadata/com/linkedin/pegasus2avro/telemetry/__init__.py,sha256=N4CJwzAqTrRoCQ2Aoa_e8cUZI_fzn9Zdo2okvO-_nWE,302
633
633
  datahub/metadata/com/linkedin/pegasus2avro/test/__init__.py,sha256=Z4DlDtf-NELFpx44Pk4RL1JlGuxtgEAMa6Sko8QBsGw,711
@@ -829,8 +829,9 @@ datahub/metadata/schemas/Siblings.avsc,sha256=NTktntlHuA1InH3TgrspWlFBntYlqmp3er
829
829
  datahub/metadata/schemas/SourceCode.avsc,sha256=tUgo2rczO5x1fxw3fYNWQj-51vRNmNIj38b1wayA0aQ,1370
830
830
  datahub/metadata/schemas/Status.avsc,sha256=rPZSXSJdwnNywqNx2qll8cdt54aYgI-YUbRr3GK7h78,522
831
831
  datahub/metadata/schemas/StructuredProperties.avsc,sha256=N0NNDrkqbIgEHrb5uz1ynwZh3mb_ICVK7tDcnBLMfjI,4032
832
- datahub/metadata/schemas/StructuredPropertyDefinition.avsc,sha256=peoH0PU-niLwr7upPn0Vxn_9erK3NfsWrd66EvzajjY,11455
833
- datahub/metadata/schemas/StructuredPropertyKey.avsc,sha256=dW7cVCGkEKNnD5DwUXf72rfCffamsMGDnURu7qNK1rs,582
832
+ datahub/metadata/schemas/StructuredPropertyDefinition.avsc,sha256=IsyjxjqDaBYo-q-Fl0pQL0kIvWNdDFNQMAlGGQXFMNc,11461
833
+ datahub/metadata/schemas/StructuredPropertyKey.avsc,sha256=lp7tQBgeriEU1YMQ6a4-6aUGSWDqNl00lLDym97j1yI,618
834
+ datahub/metadata/schemas/StructuredPropertySettings.avsc,sha256=EDNlXfT1TqogfulCanIc-nuYO9ZxRFOGzD9tl3ZJdB8,3732
834
835
  datahub/metadata/schemas/SubTypes.avsc,sha256=bhXbzK020zDyQno97Xp05vmoMeZ82IGu2jz7pWDo3RQ,655
835
836
  datahub/metadata/schemas/TagKey.avsc,sha256=BfckMlx-wg_LV1_PFVgItfNBPtCQ8_erGeQM4LzOXmY,640
836
837
  datahub/metadata/schemas/TagProperties.avsc,sha256=Qzttxd7BB38JUwwl7tZzIV1Warnh-uQO-Ahw9Sd-vH4,883
@@ -866,12 +867,12 @@ datahub/sql_parsing/_models.py,sha256=il-xm1RcLdi1phJUV3xrTecdOGH31akqheuSC2N4Yh
866
867
  datahub/sql_parsing/_sqlglot_patch.py,sha256=iYJ8zOThHqqbamD5jdNr9iHTWD7ewNeHzPiTb6-rO3Y,7043
867
868
  datahub/sql_parsing/datajob.py,sha256=1X8KpEk-y3_8xJuA_Po27EHZgOcxK9QADI6Om9gSGn0,1751
868
869
  datahub/sql_parsing/query_types.py,sha256=FKjDzszZzsrCfYfm7dgD6T_8865qxWl767fdGyHWBh4,2720
869
- datahub/sql_parsing/schema_resolver.py,sha256=9wbJT80K4nsIHHOuQLso9QoFQAYwfSJZnqHwsaU3UTY,10197
870
+ datahub/sql_parsing/schema_resolver.py,sha256=9INZWdxA2dMSLK6RXaVqjbjyLY_VKMhCkQv_Xd6Ln3I,10848
870
871
  datahub/sql_parsing/split_statements.py,sha256=uZhAXLaRxDfmK0lPBW2oM_YVdJfSMhdgndnfd9iIXuA,5001
871
872
  datahub/sql_parsing/sql_parsing_aggregator.py,sha256=gLelf5l73EufB8qijb9ZDLANkt4o05schGg4DY-bOJs,69937
872
873
  datahub/sql_parsing/sql_parsing_common.py,sha256=h_V_m54hJ9EUh5kczq7cYOIeNeo4bgf0Px0H-Nq-UIg,2602
873
874
  datahub/sql_parsing/sql_parsing_result_utils.py,sha256=prwWTj1EB2fRPv1eMB4EkpFNafIYAt-X8TIK0NWqank,796
874
- datahub/sql_parsing/sqlglot_lineage.py,sha256=zvIkfBIT5LmdwT0KxSqpbPjJtiSoOpkZAT3mPaFZ2ko,46208
875
+ datahub/sql_parsing/sqlglot_lineage.py,sha256=CLDOc0HNqL_539eahOP3QOoldIYC6CF29id4Xe3TlEM,47018
875
876
  datahub/sql_parsing/sqlglot_utils.py,sha256=8MYzkyekhup3ihVStRPuwneWPNu17xhBg5SG8iVfFRY,14431
876
877
  datahub/sql_parsing/tool_meta_extractor.py,sha256=pE-pkRKBfNTXEJkaQM9NlG807mc-X6OtetgskJySCs8,2908
877
878
  datahub/telemetry/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -883,6 +884,7 @@ datahub/testing/check_sql_parser_result.py,sha256=f7U7IUSbfV4VACdNI857wPZ9tAZ9j6
883
884
  datahub/testing/check_str_enum.py,sha256=yqk0XXHOGteN-IGqCp5JHy0Kca13BnI09ZqKc4Nwl3E,1187
884
885
  datahub/testing/compare_metadata_json.py,sha256=EzIPHtRL00a1PSdaA82LU0oRo85GqjF7_jjWG_NwfW8,5274
885
886
  datahub/testing/docker_utils.py,sha256=g169iy_jNR_mg0p8X31cChZqjOryutAIHUYLq3xqueY,2415
887
+ datahub/testing/doctest.py,sha256=1_8WEhHZ2eRQtw8vsXKzr9L5zzvs0Tcr6q4mnkyyvtw,295
886
888
  datahub/testing/mcp_diff.py,sha256=_sBFhmclYXJGQ_JYDrvKWXNGXt9ACvqeQvFaZrRHa8Q,10729
887
889
  datahub/upgrade/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
888
890
  datahub/upgrade/upgrade.py,sha256=Hp9KzUdmRlNw1oZaiex8a68YmeyxDsl4VZjogyfLONY,16328
@@ -972,8 +974,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
972
974
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
973
975
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
974
976
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
975
- acryl_datahub-0.15.0rc16.dist-info/METADATA,sha256=ztX_Sh23KkAYHOFM12dj1iw70zfmILqPc65b0oSQg4w,173559
976
- acryl_datahub-0.15.0rc16.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
977
- acryl_datahub-0.15.0rc16.dist-info/entry_points.txt,sha256=Yj0PWB0LQOq4Rj2fyR6ETx4BUGw4TOcNL0ZNoAZ9kQg,9504
978
- acryl_datahub-0.15.0rc16.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
979
- acryl_datahub-0.15.0rc16.dist-info/RECORD,,
977
+ acryl_datahub-0.15.0rc17.dist-info/METADATA,sha256=5ZB602QpwWUttdsXUipTSML9XJPatWCW7XIkyHdVQWA,173559
978
+ acryl_datahub-0.15.0rc17.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
979
+ acryl_datahub-0.15.0rc17.dist-info/entry_points.txt,sha256=Yj0PWB0LQOq4Rj2fyR6ETx4BUGw4TOcNL0ZNoAZ9kQg,9504
980
+ acryl_datahub-0.15.0rc17.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
981
+ acryl_datahub-0.15.0rc17.dist-info/RECORD,,
datahub/__init__.py CHANGED
@@ -3,7 +3,7 @@ import warnings
3
3
 
4
4
  # Published at https://pypi.org/project/acryl-datahub/.
5
5
  __package_name__ = "acryl-datahub"
6
- __version__ = "0.15.0rc16"
6
+ __version__ = "0.15.0rc17"
7
7
 
8
8
 
9
9
  def is_dev_mode() -> bool:
@@ -118,11 +118,13 @@ class StructuredProperties(ConfigModel):
118
118
  @property
119
119
  def fqn(self) -> str:
120
120
  assert self.urn is not None
121
- return (
122
- self.qualified_name
123
- or self.id
124
- or Urn.from_string(self.urn).get_entity_id()[0]
125
- )
121
+ id = Urn.create_from_string(self.urn).get_entity_id()[0]
122
+ if self.qualified_name is not None:
123
+ # ensure that qualified name and ID match
124
+ assert (
125
+ self.qualified_name == id
126
+ ), "ID in the urn and the qualified_name must match"
127
+ return id
126
128
 
127
129
  @validator("urn", pre=True, always=True)
128
130
  def urn_must_be_present(cls, v, values):
datahub/cli/delete_cli.py CHANGED
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ from concurrent.futures import ThreadPoolExecutor, as_completed
2
3
  from dataclasses import dataclass
3
4
  from datetime import datetime
4
5
  from random import choices
@@ -345,6 +346,9 @@ def undo_by_filter(
345
346
  default=False,
346
347
  help="Only delete soft-deleted entities, for hard deletion",
347
348
  )
349
+ @click.option(
350
+ "--workers", type=int, default=1, help="Num of workers to use for deletion."
351
+ )
348
352
  @upgrade.check_upgrade
349
353
  @telemetry.with_telemetry()
350
354
  def by_filter(
@@ -362,6 +366,7 @@ def by_filter(
362
366
  batch_size: int,
363
367
  dry_run: bool,
364
368
  only_soft_deleted: bool,
369
+ workers: int = 1,
365
370
  ) -> None:
366
371
  """Delete metadata from datahub using a single urn or a combination of filters."""
367
372
 
@@ -382,16 +387,19 @@ def by_filter(
382
387
  # TODO: add some validation on entity_type
383
388
 
384
389
  if not force and not soft and not dry_run:
390
+ message = (
391
+ "Hard deletion will permanently delete data from DataHub and can be slow. "
392
+ "We generally recommend using soft deletes instead. "
393
+ "Do you want to continue?"
394
+ )
385
395
  if only_soft_deleted:
386
396
  click.confirm(
387
- "This will permanently delete data from DataHub. Do you want to continue?",
397
+ message,
388
398
  abort=True,
389
399
  )
390
400
  else:
391
401
  click.confirm(
392
- "Hard deletion will permanently delete data from DataHub and can be slow. "
393
- "We generally recommend using soft deletes instead. "
394
- "Do you want to continue?",
402
+ message,
395
403
  abort=True,
396
404
  )
397
405
 
@@ -462,26 +470,64 @@ def by_filter(
462
470
  abort=True,
463
471
  )
464
472
 
465
- urns_iter = urns
466
- if not delete_by_urn and not dry_run:
467
- urns_iter = progressbar.progressbar(urns, redirect_stdout=True)
473
+ _delete_urns_parallel(
474
+ graph=graph,
475
+ urns=urns,
476
+ aspect_name=aspect,
477
+ soft=soft,
478
+ dry_run=dry_run,
479
+ delete_by_urn=delete_by_urn,
480
+ start_time=start_time,
481
+ end_time=end_time,
482
+ workers=workers,
483
+ )
484
+
468
485
 
469
- # Run the deletion.
486
+ def _delete_urns_parallel(
487
+ graph: DataHubGraph,
488
+ urns: List[str],
489
+ delete_by_urn: bool,
490
+ start_time: Optional[datetime],
491
+ end_time: Optional[datetime],
492
+ aspect_name: Optional[str] = None,
493
+ soft: bool = True,
494
+ dry_run: bool = False,
495
+ workers: int = 1,
496
+ ) -> None:
470
497
  deletion_result = DeletionResult()
471
- with PerfTimer() as timer:
472
- for urn in urns_iter:
473
- one_result = _delete_one_urn(
474
- graph=graph,
475
- urn=urn,
476
- aspect_name=aspect,
477
- soft=soft,
478
- dry_run=dry_run,
479
- start_time=start_time,
480
- end_time=end_time,
498
+
499
+ def process_urn(urn):
500
+ return _delete_one_urn(
501
+ graph=graph,
502
+ urn=urn,
503
+ aspect_name=aspect_name,
504
+ soft=soft,
505
+ dry_run=dry_run,
506
+ start_time=start_time,
507
+ end_time=end_time,
508
+ )
509
+
510
+ with PerfTimer() as timer, ThreadPoolExecutor(max_workers=workers) as executor:
511
+ future_to_urn = {executor.submit(process_urn, urn): urn for urn in urns}
512
+
513
+ completed_futures = as_completed(future_to_urn)
514
+ if not delete_by_urn and not dry_run:
515
+ futures_iter = progressbar.progressbar(
516
+ as_completed(future_to_urn),
517
+ max_value=len(future_to_urn),
518
+ redirect_stdout=True,
481
519
  )
482
- deletion_result.merge(one_result)
520
+ else:
521
+ futures_iter = completed_futures
522
+
523
+ for future in futures_iter:
524
+ try:
525
+ one_result = future.result()
526
+ deletion_result.merge(one_result)
527
+ except Exception as e:
528
+ urn = future_to_urn[future]
529
+ click.secho(f"Error processing URN {urn}: {e}", fg="red")
483
530
 
484
- # Report out a summary of the deletion result.
485
531
  click.echo(
486
532
  deletion_result.format_message(
487
533
  dry_run=dry_run, soft=soft, time_sec=timer.elapsed_seconds()
@@ -258,7 +258,7 @@ class AllowDenyPattern(ConfigModel):
258
258
  return AllowDenyPattern()
259
259
 
260
260
  def allowed(self, string: str) -> bool:
261
- if self._denied(string):
261
+ if self.denied(string):
262
262
  return False
263
263
 
264
264
  return any(
@@ -266,7 +266,7 @@ class AllowDenyPattern(ConfigModel):
266
266
  for allow_pattern in self.allow
267
267
  )
268
268
 
269
- def _denied(self, string: str) -> bool:
269
+ def denied(self, string: str) -> bool:
270
270
  for deny_pattern in self.deny:
271
271
  if re.match(deny_pattern, string, self.regex_flags):
272
272
  return True
@@ -290,7 +290,7 @@ class AllowDenyPattern(ConfigModel):
290
290
  raise ValueError(
291
291
  "allow list must be fully specified to get list of allowed strings"
292
292
  )
293
- return [a for a in self.allow if not self._denied(a)]
293
+ return [a for a in self.allow if not self.denied(a)]
294
294
 
295
295
  def __eq__(self, other): # type: ignore
296
296
  return isinstance(other, self.__class__) and self.__dict__ == other.__dict__
@@ -492,11 +492,15 @@ class Source(Closeable, metaclass=ABCMeta):
492
492
 
493
493
  def _infer_platform(self) -> Optional[str]:
494
494
  config = self.get_config()
495
- return (
495
+ platform = (
496
496
  getattr(config, "platform_name", None)
497
497
  or getattr(self, "platform", None)
498
498
  or getattr(config, "platform", None)
499
499
  )
500
+ if platform is None and hasattr(self, "get_platform_id"):
501
+ platform = type(self).get_platform_id()
502
+
503
+ return platform
500
504
 
501
505
  def _get_browse_path_processor(self, dry_run: bool) -> MetadataWorkUnitProcessor:
502
506
  config = self.get_config()
@@ -148,10 +148,10 @@ class DatahubIngestionRunSummaryProvider(PipelineRunListener):
148
148
 
149
149
  def _get_recipe_to_report(self, ctx: PipelineContext) -> str:
150
150
  assert ctx.pipeline_config
151
- if not self.report_recipe or not ctx.pipeline_config._raw_dict:
151
+ if not self.report_recipe or not ctx.pipeline_config.get_raw_dict():
152
152
  return ""
153
153
  else:
154
- return json.dumps(redact_raw_config(ctx.pipeline_config._raw_dict))
154
+ return json.dumps(redact_raw_config(ctx.pipeline_config.get_raw_dict()))
155
155
 
156
156
  def _emit_aspect(self, entity_urn: Urn, aspect_value: _Aspect) -> None:
157
157
  self.sink.write_record_async(
@@ -221,7 +221,7 @@ class Pipeline:
221
221
  dry_run: bool = False,
222
222
  preview_mode: bool = False,
223
223
  preview_workunits: int = 10,
224
- report_to: Optional[str] = None,
224
+ report_to: Optional[str] = "datahub",
225
225
  no_progress: bool = False,
226
226
  ):
227
227
  self.config = config
@@ -117,3 +117,9 @@ class PipelineConfig(ConfigModel):
117
117
  config = cls.parse_obj(resolved_dict)
118
118
  config._raw_dict = raw_dict
119
119
  return config
120
+
121
+ def get_raw_dict(self) -> Dict:
122
+ result = self._raw_dict
123
+ if result is None:
124
+ result = self.dict()
125
+ return result
@@ -141,6 +141,10 @@ class KafkaSourceConfig(
141
141
  default=False,
142
142
  description="Disables the utilization of the TopicRecordNameStrategy for Schema Registry subjects. For more information, visit: https://docs.confluent.io/platform/current/schema-registry/serdes-develop/index.html#handling-differences-between-preregistered-and-client-derived-schemas:~:text=io.confluent.kafka.serializers.subject.TopicRecordNameStrategy",
143
143
  )
144
+ ingest_schemas_as_entities: bool = pydantic.Field(
145
+ default=False,
146
+ description="Enables ingesting schemas from schema registry as separate entities, in addition to the topics",
147
+ )
144
148
 
145
149
 
146
150
  def get_kafka_consumer(
@@ -343,17 +347,20 @@ class KafkaSource(StatefulIngestionSourceBase, TestableSource):
343
347
  else:
344
348
  self.report.report_dropped(topic)
345
349
 
346
- # Get all subjects from schema registry and ingest them as SCHEMA DatasetSubTypes
347
- for subject in self.schema_registry_client.get_subjects():
348
- try:
349
- yield from self._extract_record(
350
- subject, True, topic_detail=None, extra_topic_config=None
351
- )
352
- except Exception as e:
353
- logger.warning(f"Failed to extract subject {subject}", exc_info=True)
354
- self.report.report_warning(
355
- "subject", f"Exception while extracting topic {subject}: {e}"
356
- )
350
+ if self.source_config.ingest_schemas_as_entities:
351
+ # Get all subjects from schema registry and ingest them as SCHEMA DatasetSubTypes
352
+ for subject in self.schema_registry_client.get_subjects():
353
+ try:
354
+ yield from self._extract_record(
355
+ subject, True, topic_detail=None, extra_topic_config=None
356
+ )
357
+ except Exception as e:
358
+ logger.warning(
359
+ f"Failed to extract subject {subject}", exc_info=True
360
+ )
361
+ self.report.report_warning(
362
+ "subject", f"Exception while extracting topic {subject}: {e}"
363
+ )
357
364
 
358
365
  def _extract_record(
359
366
  self,
@@ -88,8 +88,7 @@ class LookerFieldContext:
88
88
  for upstream_field_match in re.finditer(r"\${TABLE}\.[\"]*([\.\w]+)", sql):
89
89
  matched_field = upstream_field_match.group(1)
90
90
  # Remove quotes from field names
91
- matched_field = matched_field.replace('"', "").replace("`", "").lower()
92
- column_names.append(matched_field)
91
+ column_names.append(matched_field.replace('"', "").replace("`", "").lower())
93
92
 
94
93
  return column_names
95
94