acryl-datahub 0.15.0rc19__py3-none-any.whl → 0.15.0rc21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0rc19.dist-info → acryl_datahub-0.15.0rc21.dist-info}/METADATA +2334 -2334
- {acryl_datahub-0.15.0rc19.dist-info → acryl_datahub-0.15.0rc21.dist-info}/RECORD +20 -20
- datahub/__init__.py +1 -1
- datahub/api/entities/structuredproperties/structuredproperties.py +56 -68
- datahub/emitter/rest_emitter.py +17 -4
- datahub/ingestion/sink/datahub_rest.py +12 -1
- datahub/ingestion/source/dremio/dremio_api.py +193 -86
- datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +2 -0
- datahub/ingestion/source/dremio/dremio_reporting.py +15 -0
- datahub/ingestion/source/kafka/kafka_connect.py +81 -51
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +2 -1
- datahub/ingestion/source/snowflake/snowflake_query.py +13 -0
- datahub/ingestion/source/snowflake/snowflake_schema.py +16 -0
- datahub/ingestion/source/snowflake/snowflake_schema_gen.py +23 -0
- datahub/metadata/_schema_classes.py +400 -400
- datahub/metadata/_urns/urn_defs.py +1355 -1355
- datahub/metadata/schema.avsc +17221 -17574
- {acryl_datahub-0.15.0rc19.dist-info → acryl_datahub-0.15.0rc21.dist-info}/WHEEL +0 -0
- {acryl_datahub-0.15.0rc19.dist-info → acryl_datahub-0.15.0rc21.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-0.15.0rc19.dist-info → acryl_datahub-0.15.0rc21.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
datahub/__init__.py,sha256=
|
|
1
|
+
datahub/__init__.py,sha256=caUPlyD6P05EsMKzRYtlTS611d82sT4szr8_WAu_rJ4,575
|
|
2
2
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
3
3
|
datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
|
|
4
4
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -52,7 +52,7 @@ datahub/api/entities/forms/forms_graphql_constants.py,sha256=DKpnKlMKTjmnyrCTvp6
|
|
|
52
52
|
datahub/api/entities/platformresource/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
53
53
|
datahub/api/entities/platformresource/platform_resource.py,sha256=pVAjv6NoH746Mfvdak7ji0eqlEcEeV-Ji7M5gyNXmds,10603
|
|
54
54
|
datahub/api/entities/structuredproperties/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
55
|
-
datahub/api/entities/structuredproperties/structuredproperties.py,sha256=
|
|
55
|
+
datahub/api/entities/structuredproperties/structuredproperties.py,sha256=YO4mdn6BziOzvzoFe-g2KfZlOZy8gqwMyyzj_7vF4BY,8845
|
|
56
56
|
datahub/api/graphql/__init__.py,sha256=5yl0dJxO-2d_QuykdJrDIbWq4ja9bo0t2dAEh89JOog,142
|
|
57
57
|
datahub/api/graphql/assertion.py,sha256=ponITypRQ8vE8kiqRNpvdoniNJzi4aeBK97UvkF0VhA,2818
|
|
58
58
|
datahub/api/graphql/base.py,sha256=9q637r6v-RGOd8Mk8HW2g0vt9zpqFexsQ5R6TPEHVbs,1614
|
|
@@ -119,7 +119,7 @@ datahub/emitter/mcp.py,sha256=hAAYziDdkwjazQU0DtWMbQWY8wS09ACrKJbqxoWXdgc,9637
|
|
|
119
119
|
datahub/emitter/mcp_builder.py,sha256=ju-1dZMKs5dlWcTi4zcNRVmhkfhmfX3JFULZSbgxSFs,9968
|
|
120
120
|
datahub/emitter/mcp_patch_builder.py,sha256=W85q1maVUMpOIo5lwLRn82rLXRVoZ_gurl_a-pvVCpE,4291
|
|
121
121
|
datahub/emitter/request_helper.py,sha256=33ORG3S3OVy97_jlWBRn7yUM5XCIkRN6WSdJvN7Ofcg,670
|
|
122
|
-
datahub/emitter/rest_emitter.py,sha256=
|
|
122
|
+
datahub/emitter/rest_emitter.py,sha256=3kG_aPKy9pLibd4SJNtdJxn792c5TJliFjjCOw6NoUM,15533
|
|
123
123
|
datahub/emitter/serialization_helper.py,sha256=q12Avmf70Vy4ttQGMJoTKlE5EsybMKNg2w3MQeZiHvk,3652
|
|
124
124
|
datahub/emitter/sql_parsing_builder.py,sha256=Cr5imZrm3dYDSCACt5MFscgHCtVbHTD6IjUmsvsKoEs,11991
|
|
125
125
|
datahub/emitter/synchronized_file_emitter.py,sha256=s4ATuxalI4GDAkrZTaGSegxBdvvNPZ9jRSdtElU0kNs,1805
|
|
@@ -180,7 +180,7 @@ datahub/ingestion/sink/blackhole.py,sha256=-jYcWo4i8q7312bCIoHrGr7nT9JdPvA7c4jvS
|
|
|
180
180
|
datahub/ingestion/sink/console.py,sha256=TZfhA0Ec2eNCrMH7RRy2JOdUE-U-hkoIQrPm1CmKLQs,591
|
|
181
181
|
datahub/ingestion/sink/datahub_kafka.py,sha256=_cjuXu5I6G0zJ2UK7hMbaKjMPZXeIwRMgm7CVeTiNtc,2578
|
|
182
182
|
datahub/ingestion/sink/datahub_lite.py,sha256=7u2aWm7ENLshKHl-PkjJg6Mrw4bWs8sTfKIBz4mm8Ak,1879
|
|
183
|
-
datahub/ingestion/sink/datahub_rest.py,sha256=
|
|
183
|
+
datahub/ingestion/sink/datahub_rest.py,sha256=ME8OygJgd7AowrokJLmdjYHxIQEy5jXWS0yKwOLR934,12592
|
|
184
184
|
datahub/ingestion/sink/file.py,sha256=SxXJPJpkIGoaqRjCcSmj2ZE3xE4rLlBABBGwpTj5LWI,3271
|
|
185
185
|
datahub/ingestion/sink/sink_registry.py,sha256=JRBWx8qEYg0ubSTyhqwgSWctgxwyp6fva9GoN2LwBao,490
|
|
186
186
|
datahub/ingestion/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -282,13 +282,13 @@ datahub/ingestion/source/delta_lake/delta_lake_utils.py,sha256=VqIDPEXepOnlk4oWM
|
|
|
282
282
|
datahub/ingestion/source/delta_lake/report.py,sha256=uqWWivPltlZ7dwpOOluTvHOKKsSusqihn67clCAwxoM,467
|
|
283
283
|
datahub/ingestion/source/delta_lake/source.py,sha256=jLCN6SeAv3bCD4w4ZDw15eIbFF3yVWcxVtBklovFEBg,13548
|
|
284
284
|
datahub/ingestion/source/dremio/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
285
|
-
datahub/ingestion/source/dremio/dremio_api.py,sha256=
|
|
285
|
+
datahub/ingestion/source/dremio/dremio_api.py,sha256=am8o_mQq7zteI4zasnkRb9B9-_BFrchTIA_oJkqRagA,33470
|
|
286
286
|
datahub/ingestion/source/dremio/dremio_aspects.py,sha256=3VeHzCw9q1ytngmsq_K4Ll9tWD2V8EDFySBImHdhPAw,18287
|
|
287
287
|
datahub/ingestion/source/dremio/dremio_config.py,sha256=5SP66ewGYN0OnyWgpU33EZOmtICsclTtBX5DSYLwl3c,5782
|
|
288
|
-
datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py,sha256
|
|
288
|
+
datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py,sha256=-Fefw59tXR6QA8ifOz_mieDccMMG_vyQgp7j-BaXFHQ,3070
|
|
289
289
|
datahub/ingestion/source/dremio/dremio_entities.py,sha256=3H3vIvj5ab4d8gmB9-rbZfwRgW87gT1DdjWiMjNgqJ4,15069
|
|
290
290
|
datahub/ingestion/source/dremio/dremio_profiling.py,sha256=TAcnpo8ZRKhLDHnQSJzJg3YdwTSyEa73LUAzENs7wG4,12287
|
|
291
|
-
datahub/ingestion/source/dremio/dremio_reporting.py,sha256=
|
|
291
|
+
datahub/ingestion/source/dremio/dremio_reporting.py,sha256=pYyEOAxiotyVySumY85Ql8vtGsne7B9sDrdqeVFnWLQ,1742
|
|
292
292
|
datahub/ingestion/source/dremio/dremio_source.py,sha256=NJxDXWd19A3MPplPiLPAjxTmjeJBA04PcPytRSslmYQ,26323
|
|
293
293
|
datahub/ingestion/source/dremio/dremio_sql_queries.py,sha256=W0rcXawlwJOHNYr5o73rilMijtFOO3cVkn6pY-JLc6o,8186
|
|
294
294
|
datahub/ingestion/source/dynamodb/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -321,7 +321,7 @@ datahub/ingestion/source/identity/azure_ad.py,sha256=GdmJFD4UMsb5353Z7phXRf-YsXR
|
|
|
321
321
|
datahub/ingestion/source/identity/okta.py,sha256=PnRokWLG8wSoNZlXJiRZiW6APTEHO09q4n2j_l6m3V0,30756
|
|
322
322
|
datahub/ingestion/source/kafka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
323
323
|
datahub/ingestion/source/kafka/kafka.py,sha256=9SR7bqp9J0rPYde5IClhnAuVNy9ItsB8-ZeXtTc_mEY,26442
|
|
324
|
-
datahub/ingestion/source/kafka/kafka_connect.py,sha256=
|
|
324
|
+
datahub/ingestion/source/kafka/kafka_connect.py,sha256=Jm1MYky_OPIwvVHuEjgOjK0e6-jA-dYnsLZ7r-Y_9mA,56208
|
|
325
325
|
datahub/ingestion/source/kafka/kafka_schema_registry_base.py,sha256=13XjSwqyVhH1CJUFHAbWdmmv_Rw0Ju_9HQdBmIzPNNA,566
|
|
326
326
|
datahub/ingestion/source/looker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
327
327
|
datahub/ingestion/source/looker/lkml_patched.py,sha256=XShEU7Wbz0DubDhYMjKf9wjKZrBJa2XPg9MIjp8rPhk,733
|
|
@@ -427,13 +427,13 @@ datahub/ingestion/source/snowflake/snowflake_assertion.py,sha256=_l3k4aI9wvioE81
|
|
|
427
427
|
datahub/ingestion/source/snowflake/snowflake_config.py,sha256=LZqnTELtzRNf0vsKG-xXggXyt13S9RYvHOZEZHRjgNk,18851
|
|
428
428
|
datahub/ingestion/source/snowflake/snowflake_connection.py,sha256=yzv-01FdmfDSCJY5rqKNNodXxzg3SS5DF7oA4WXArOA,17793
|
|
429
429
|
datahub/ingestion/source/snowflake/snowflake_data_reader.py,sha256=ffR5E2uhD71FUMXd3XOg2rHwrp1rbbGEFTAbqKcmI2s,2195
|
|
430
|
-
datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=
|
|
430
|
+
datahub/ingestion/source/snowflake/snowflake_lineage_v2.py,sha256=EnTJoRIQKcZOIYfb_NUff_YA8IdIroaFD1JHUn-M6ok,23346
|
|
431
431
|
datahub/ingestion/source/snowflake/snowflake_profiler.py,sha256=0DJiSwII6FY34urlBja2FW66NaVvhbBWmG0p7u8Xyrc,7548
|
|
432
432
|
datahub/ingestion/source/snowflake/snowflake_queries.py,sha256=fu-8S9eADIXZcd_kHc6cBeMa-on9RF9qG3yqjJnS3DE,26085
|
|
433
|
-
datahub/ingestion/source/snowflake/snowflake_query.py,sha256=
|
|
433
|
+
datahub/ingestion/source/snowflake/snowflake_query.py,sha256=yDu_1aTAG7eLEh1w1FGmn2-c6NJZURdslnI6fC_4B_0,38723
|
|
434
434
|
datahub/ingestion/source/snowflake/snowflake_report.py,sha256=_-rD7Q4MzKY8fYzJHSBnGX4gurwujL3UoRzcP_TZURs,6468
|
|
435
|
-
datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=
|
|
436
|
-
datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=
|
|
435
|
+
datahub/ingestion/source/snowflake/snowflake_schema.py,sha256=z5ZPgh-TILAz0DeIwDxRCsj980CM2BbftXiFpM1dV_Y,21674
|
|
436
|
+
datahub/ingestion/source/snowflake/snowflake_schema_gen.py,sha256=vof3mNImstnlL8kc0OkTHzMIqnbEkt9RmnYBX1JX0oE,40386
|
|
437
437
|
datahub/ingestion/source/snowflake/snowflake_shares.py,sha256=ud3Ah4qHrmSfpD8Od-gPdzwtON9dJa0eqHt-8Yr5h2Q,6366
|
|
438
438
|
datahub/ingestion/source/snowflake/snowflake_summary.py,sha256=kTmuCtRnvHqM8WBYhWeK4XafJq3ssFL9kcS03jEeWT4,5506
|
|
439
439
|
datahub/ingestion/source/snowflake/snowflake_tag.py,sha256=fyfWmFVz2WZrpTJWNIe9m0WpDHgeFrGPf8diORJZUwo,6212
|
|
@@ -559,12 +559,12 @@ datahub/lite/lite_registry.py,sha256=bpH0kasP-LtwwUFNA2QsOIehfekAYfJtN-AkQLmSWnw
|
|
|
559
559
|
datahub/lite/lite_server.py,sha256=p9Oa2nNs65mqcssSIVOr7VOzWqfVstz6ZQEdT4f82S0,1949
|
|
560
560
|
datahub/lite/lite_util.py,sha256=pgBpT3vTO1YCQ2njZRNyicSkHYeEmQCt41BaXU8WvMo,4503
|
|
561
561
|
datahub/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
562
|
-
datahub/metadata/_schema_classes.py,sha256=
|
|
563
|
-
datahub/metadata/schema.avsc,sha256=
|
|
562
|
+
datahub/metadata/_schema_classes.py,sha256=FTLom36n7gr6zxYfPWWoy9AmdnB4KOIXYRoVZbS9kog,955042
|
|
563
|
+
datahub/metadata/schema.avsc,sha256=D-rNu2SC2tyvqju8pQwGNGGT9zy1_fzxzoigH5YmUvo,722242
|
|
564
564
|
datahub/metadata/schema_classes.py,sha256=X5Jl5EaSxyHdXOQv14pJ5WkQALun4MRpJ4q12wVFE18,1299
|
|
565
565
|
datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
|
|
566
566
|
datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
567
|
-
datahub/metadata/_urns/urn_defs.py,sha256=
|
|
567
|
+
datahub/metadata/_urns/urn_defs.py,sha256=LFHZGzHlDA0KJes1Xg7-lWetXusi7bubA7Q5hu4ER88,107119
|
|
568
568
|
datahub/metadata/com/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
|
|
569
569
|
datahub/metadata/com/linkedin/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
|
|
570
570
|
datahub/metadata/com/linkedin/events/__init__.py,sha256=s_dR0plZF-rOxxIbE8ojekJqwiHzl2WYR-Z3kW6kKS0,298
|
|
@@ -974,8 +974,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
974
974
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
975
975
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
976
976
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
977
|
-
acryl_datahub-0.15.
|
|
978
|
-
acryl_datahub-0.15.
|
|
979
|
-
acryl_datahub-0.15.
|
|
980
|
-
acryl_datahub-0.15.
|
|
981
|
-
acryl_datahub-0.15.
|
|
977
|
+
acryl_datahub-0.15.0rc21.dist-info/METADATA,sha256=e3Tw7Cix7Z1uR8zyUtppjUv0ztJa2Kga0yl7nwPMbF8,173559
|
|
978
|
+
acryl_datahub-0.15.0rc21.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
979
|
+
acryl_datahub-0.15.0rc21.dist-info/entry_points.txt,sha256=Yj0PWB0LQOq4Rj2fyR6ETx4BUGw4TOcNL0ZNoAZ9kQg,9504
|
|
980
|
+
acryl_datahub-0.15.0rc21.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
981
|
+
acryl_datahub-0.15.0rc21.dist-info/RECORD,,
|
datahub/__init__.py
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from contextlib import contextmanager
|
|
3
2
|
from enum import Enum
|
|
4
3
|
from pathlib import Path
|
|
5
|
-
from typing import
|
|
4
|
+
from typing import List, Optional
|
|
6
5
|
|
|
7
6
|
import yaml
|
|
8
7
|
from pydantic import validator
|
|
@@ -10,6 +9,7 @@ from ruamel.yaml import YAML
|
|
|
10
9
|
|
|
11
10
|
from datahub.configuration.common import ConfigModel
|
|
12
11
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
12
|
+
from datahub.ingestion.api.global_context import get_graph_context, set_graph_context
|
|
13
13
|
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
|
|
14
14
|
from datahub.metadata.schema_classes import (
|
|
15
15
|
PropertyValueClass,
|
|
@@ -24,23 +24,10 @@ logger = logging.getLogger(__name__)
|
|
|
24
24
|
class StructuredPropertiesConfig:
|
|
25
25
|
"""Configuration class to hold the graph client"""
|
|
26
26
|
|
|
27
|
-
_graph: Optional[DataHubGraph] = None
|
|
28
|
-
|
|
29
|
-
@classmethod
|
|
30
|
-
@contextmanager
|
|
31
|
-
def use_graph(cls, graph: DataHubGraph) -> Generator[None, None, None]:
|
|
32
|
-
"""Context manager to temporarily set a custom graph"""
|
|
33
|
-
previous_graph = cls._graph
|
|
34
|
-
cls._graph = graph
|
|
35
|
-
try:
|
|
36
|
-
yield
|
|
37
|
-
finally:
|
|
38
|
-
cls._graph = previous_graph
|
|
39
|
-
|
|
40
27
|
@classmethod
|
|
41
|
-
def
|
|
28
|
+
def get_graph_required(cls) -> DataHubGraph:
|
|
42
29
|
"""Get the current graph, falling back to default if none set"""
|
|
43
|
-
return
|
|
30
|
+
return get_graph_context() or get_default_graph()
|
|
44
31
|
|
|
45
32
|
|
|
46
33
|
class AllowedTypes(Enum):
|
|
@@ -79,7 +66,7 @@ class TypeQualifierAllowedTypes(ConfigModel):
|
|
|
79
66
|
@validator("allowed_types", each_item=True)
|
|
80
67
|
def validate_allowed_types(cls, v):
|
|
81
68
|
if v:
|
|
82
|
-
graph = StructuredPropertiesConfig.
|
|
69
|
+
graph = StructuredPropertiesConfig.get_graph_required()
|
|
83
70
|
validated_urn = Urn.make_entity_type_urn(v)
|
|
84
71
|
if not graph.exists(validated_urn):
|
|
85
72
|
raise ValueError(
|
|
@@ -106,7 +93,7 @@ class StructuredProperties(ConfigModel):
|
|
|
106
93
|
@validator("entity_types", each_item=True)
|
|
107
94
|
def validate_entity_types(cls, v):
|
|
108
95
|
if v:
|
|
109
|
-
graph = StructuredPropertiesConfig.
|
|
96
|
+
graph = StructuredPropertiesConfig.get_graph_required()
|
|
110
97
|
validated_urn = Urn.make_entity_type_urn(v)
|
|
111
98
|
if not graph.exists(validated_urn):
|
|
112
99
|
raise ValueError(
|
|
@@ -136,63 +123,64 @@ class StructuredProperties(ConfigModel):
|
|
|
136
123
|
|
|
137
124
|
@staticmethod
|
|
138
125
|
def create(file: str, graph: Optional[DataHubGraph] = None) -> None:
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
126
|
+
with set_graph_context(graph):
|
|
127
|
+
graph = StructuredPropertiesConfig.get_graph_required()
|
|
128
|
+
|
|
142
129
|
with open(file) as fp:
|
|
143
130
|
structuredproperties: List[dict] = yaml.safe_load(fp)
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
131
|
+
for structuredproperty_raw in structuredproperties:
|
|
132
|
+
structuredproperty = StructuredProperties.parse_obj(
|
|
133
|
+
structuredproperty_raw
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
if not structuredproperty.type.islower():
|
|
137
|
+
structuredproperty.type = structuredproperty.type.lower()
|
|
138
|
+
logger.warning(
|
|
139
|
+
f"Structured property type should be lowercase. Updated to {structuredproperty.type}"
|
|
147
140
|
)
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
f"Structured property type should be lowercase. Updated to {structuredproperty.type}"
|
|
152
|
-
)
|
|
153
|
-
if not AllowedTypes.check_allowed_type(structuredproperty.type):
|
|
154
|
-
raise ValueError(
|
|
155
|
-
f"Type {structuredproperty.type} is not allowed. Allowed types are {AllowedTypes.values()}"
|
|
156
|
-
)
|
|
157
|
-
mcp = MetadataChangeProposalWrapper(
|
|
158
|
-
entityUrn=structuredproperty.urn,
|
|
159
|
-
aspect=StructuredPropertyDefinitionClass(
|
|
160
|
-
qualifiedName=structuredproperty.fqn,
|
|
161
|
-
valueType=Urn.make_data_type_urn(structuredproperty.type),
|
|
162
|
-
displayName=structuredproperty.display_name,
|
|
163
|
-
description=structuredproperty.description,
|
|
164
|
-
entityTypes=[
|
|
165
|
-
Urn.make_entity_type_urn(entity_type)
|
|
166
|
-
for entity_type in structuredproperty.entity_types or []
|
|
167
|
-
],
|
|
168
|
-
cardinality=structuredproperty.cardinality,
|
|
169
|
-
immutable=structuredproperty.immutable,
|
|
170
|
-
allowedValues=(
|
|
171
|
-
[
|
|
172
|
-
PropertyValueClass(
|
|
173
|
-
value=v.value, description=v.description
|
|
174
|
-
)
|
|
175
|
-
for v in structuredproperty.allowed_values
|
|
176
|
-
]
|
|
177
|
-
if structuredproperty.allowed_values
|
|
178
|
-
else None
|
|
179
|
-
),
|
|
180
|
-
typeQualifier=(
|
|
181
|
-
{
|
|
182
|
-
"allowedTypes": structuredproperty.type_qualifier.allowed_types
|
|
183
|
-
}
|
|
184
|
-
if structuredproperty.type_qualifier
|
|
185
|
-
else None
|
|
186
|
-
),
|
|
187
|
-
),
|
|
141
|
+
if not AllowedTypes.check_allowed_type(structuredproperty.type):
|
|
142
|
+
raise ValueError(
|
|
143
|
+
f"Type {structuredproperty.type} is not allowed. Allowed types are {AllowedTypes.values()}"
|
|
188
144
|
)
|
|
189
|
-
|
|
145
|
+
mcp = MetadataChangeProposalWrapper(
|
|
146
|
+
entityUrn=structuredproperty.urn,
|
|
147
|
+
aspect=StructuredPropertyDefinitionClass(
|
|
148
|
+
qualifiedName=structuredproperty.fqn,
|
|
149
|
+
valueType=Urn.make_data_type_urn(structuredproperty.type),
|
|
150
|
+
displayName=structuredproperty.display_name,
|
|
151
|
+
description=structuredproperty.description,
|
|
152
|
+
entityTypes=[
|
|
153
|
+
Urn.make_entity_type_urn(entity_type)
|
|
154
|
+
for entity_type in structuredproperty.entity_types or []
|
|
155
|
+
],
|
|
156
|
+
cardinality=structuredproperty.cardinality,
|
|
157
|
+
immutable=structuredproperty.immutable,
|
|
158
|
+
allowedValues=(
|
|
159
|
+
[
|
|
160
|
+
PropertyValueClass(
|
|
161
|
+
value=v.value, description=v.description
|
|
162
|
+
)
|
|
163
|
+
for v in structuredproperty.allowed_values
|
|
164
|
+
]
|
|
165
|
+
if structuredproperty.allowed_values
|
|
166
|
+
else None
|
|
167
|
+
),
|
|
168
|
+
typeQualifier=(
|
|
169
|
+
{
|
|
170
|
+
"allowedTypes": structuredproperty.type_qualifier.allowed_types
|
|
171
|
+
}
|
|
172
|
+
if structuredproperty.type_qualifier
|
|
173
|
+
else None
|
|
174
|
+
),
|
|
175
|
+
),
|
|
176
|
+
)
|
|
177
|
+
graph.emit_mcp(mcp)
|
|
190
178
|
|
|
191
|
-
|
|
179
|
+
logger.info(f"Created structured property {structuredproperty.urn}")
|
|
192
180
|
|
|
193
181
|
@classmethod
|
|
194
182
|
def from_datahub(cls, graph: DataHubGraph, urn: str) -> "StructuredProperties":
|
|
195
|
-
with
|
|
183
|
+
with set_graph_context(graph):
|
|
196
184
|
structured_property: Optional[
|
|
197
185
|
StructuredPropertyDefinitionClass
|
|
198
186
|
] = graph.get_aspect(urn, StructuredPropertyDefinitionClass)
|
datahub/emitter/rest_emitter.py
CHANGED
|
@@ -46,8 +46,18 @@ _DEFAULT_RETRY_MAX_TIMES = int(
|
|
|
46
46
|
os.getenv("DATAHUB_REST_EMITTER_DEFAULT_RETRY_MAX_TIMES", "4")
|
|
47
47
|
)
|
|
48
48
|
|
|
49
|
-
# The limit is 16mb. We will use a max of 15mb to have some space
|
|
50
|
-
|
|
49
|
+
# The limit is 16mb. We will use a max of 15mb to have some space
|
|
50
|
+
# for overhead like request headers.
|
|
51
|
+
# This applies to pretty much all calls to GMS.
|
|
52
|
+
INGEST_MAX_PAYLOAD_BYTES = 15 * 1024 * 1024
|
|
53
|
+
|
|
54
|
+
# This limit is somewhat arbitrary. All GMS endpoints will timeout
|
|
55
|
+
# and return a 500 if processing takes too long. To avoid sending
|
|
56
|
+
# too much to the backend and hitting a timeout, we try to limit
|
|
57
|
+
# the number of MCPs we send in a batch.
|
|
58
|
+
BATCH_INGEST_MAX_PAYLOAD_LENGTH = int(
|
|
59
|
+
os.getenv("DATAHUB_REST_EMITTER_BATCH_MAX_PAYLOAD_LENGTH", 200)
|
|
60
|
+
)
|
|
51
61
|
|
|
52
62
|
|
|
53
63
|
class DataHubRestEmitter(Closeable, Emitter):
|
|
@@ -290,11 +300,14 @@ class DataHubRestEmitter(Closeable, Emitter):
|
|
|
290
300
|
# As a safety mechanism, we need to make sure we don't exceed the max payload size for GMS.
|
|
291
301
|
# If we will exceed the limit, we need to break it up into chunks.
|
|
292
302
|
mcp_obj_chunks: List[List[str]] = []
|
|
293
|
-
current_chunk_size =
|
|
303
|
+
current_chunk_size = INGEST_MAX_PAYLOAD_BYTES
|
|
294
304
|
for mcp_obj in mcp_objs:
|
|
295
305
|
mcp_obj_size = len(json.dumps(mcp_obj))
|
|
296
306
|
|
|
297
|
-
if
|
|
307
|
+
if (
|
|
308
|
+
mcp_obj_size + current_chunk_size > INGEST_MAX_PAYLOAD_BYTES
|
|
309
|
+
or len(mcp_obj_chunks[-1]) >= BATCH_INGEST_MAX_PAYLOAD_LENGTH
|
|
310
|
+
):
|
|
298
311
|
mcp_obj_chunks.append([])
|
|
299
312
|
current_chunk_size = 0
|
|
300
313
|
mcp_obj_chunks[-1].append(mcp_obj)
|
|
@@ -18,7 +18,10 @@ from datahub.configuration.common import (
|
|
|
18
18
|
)
|
|
19
19
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
20
20
|
from datahub.emitter.mcp_builder import mcps_from_mce
|
|
21
|
-
from datahub.emitter.rest_emitter import
|
|
21
|
+
from datahub.emitter.rest_emitter import (
|
|
22
|
+
BATCH_INGEST_MAX_PAYLOAD_LENGTH,
|
|
23
|
+
DataHubRestEmitter,
|
|
24
|
+
)
|
|
22
25
|
from datahub.ingestion.api.common import RecordEnvelope, WorkUnit
|
|
23
26
|
from datahub.ingestion.api.sink import (
|
|
24
27
|
NoopWriteCallback,
|
|
@@ -71,6 +74,14 @@ class DatahubRestSinkConfig(DatahubClientConfig):
|
|
|
71
74
|
# Only applies in async batch mode.
|
|
72
75
|
max_per_batch: pydantic.PositiveInt = 100
|
|
73
76
|
|
|
77
|
+
@pydantic.validator("max_per_batch", always=True)
|
|
78
|
+
def validate_max_per_batch(cls, v):
|
|
79
|
+
if v > BATCH_INGEST_MAX_PAYLOAD_LENGTH:
|
|
80
|
+
raise ValueError(
|
|
81
|
+
f"max_per_batch must be less than or equal to {BATCH_INGEST_MAX_PAYLOAD_LENGTH}"
|
|
82
|
+
)
|
|
83
|
+
return v
|
|
84
|
+
|
|
74
85
|
|
|
75
86
|
@dataclasses.dataclass
|
|
76
87
|
class DataHubRestSinkReport(SinkReport):
|