acryl-datahub-cloud 0.3.8.2rc4__py3-none-any.whl → 0.3.8.2rc5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/datahub_usage_reporting/query_builder.py +1 -0
- acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +140 -130
- acryl_datahub_cloud/metadata/schema.avsc +14 -0
- acryl_datahub_cloud/metadata/schemas/StructuredPropertyDefinition.avsc +14 -0
- {acryl_datahub_cloud-0.3.8.2rc4.dist-info → acryl_datahub_cloud-0.3.8.2rc5.dist-info}/METADATA +46 -46
- {acryl_datahub_cloud-0.3.8.2rc4.dist-info → acryl_datahub_cloud-0.3.8.2rc5.dist-info}/RECORD +10 -10
- {acryl_datahub_cloud-0.3.8.2rc4.dist-info → acryl_datahub_cloud-0.3.8.2rc5.dist-info}/WHEEL +1 -1
- {acryl_datahub_cloud-0.3.8.2rc4.dist-info → acryl_datahub_cloud-0.3.8.2rc5.dist-info}/entry_points.txt +0 -0
- {acryl_datahub_cloud-0.3.8.2rc4.dist-info → acryl_datahub_cloud-0.3.8.2rc5.dist-info}/top_level.txt +0 -0
|
@@ -172,6 +172,15 @@ class DataHubUsageFeatureReportingSourceConfig(
|
|
|
172
172
|
description="Flag to enable polars streaming mode.'",
|
|
173
173
|
)
|
|
174
174
|
|
|
175
|
+
# Running the whole pipeline in streaming mode was very unstable in the past.
|
|
176
|
+
# It seems like with the latest version of Polars it is much more stable.
|
|
177
|
+
# This option is only needed here until we are sure that the streaming mode is stable.
|
|
178
|
+
# then we can remove it and control it with the streaming_mode option.
|
|
179
|
+
experimental_full_streaming: bool = Field(
|
|
180
|
+
False,
|
|
181
|
+
description="Flag to enable full streaming mode.'",
|
|
182
|
+
)
|
|
183
|
+
|
|
175
184
|
disable_write_usage: bool = Field(
|
|
176
185
|
True,
|
|
177
186
|
description="Flag to disable write usage statistics collection.'",
|
|
@@ -300,6 +309,12 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
300
309
|
if "siblings" in doc["_source"] and doc["_source"]["siblings"]
|
|
301
310
|
else []
|
|
302
311
|
),
|
|
312
|
+
"combinedSearchRankingMultiplier": (
|
|
313
|
+
doc["_source"]["combinedSearchRankingMultiplier"]
|
|
314
|
+
if "combinedSearchRankingMultiplier" in doc["_source"]
|
|
315
|
+
and doc["_source"]["combinedSearchRankingMultiplier"]
|
|
316
|
+
else None
|
|
317
|
+
),
|
|
303
318
|
"isView": (
|
|
304
319
|
"View" in doc["_source"]["typeNames"]
|
|
305
320
|
if "typeNames" in doc["_source"] and doc["_source"]["typeNames"]
|
|
@@ -544,9 +559,10 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
544
559
|
usageSearchScoreMultiplier=usage_search_score_multiplier,
|
|
545
560
|
usageFreshnessScoreMultiplier=freshness_factor,
|
|
546
561
|
customDatahubScoreMultiplier=regexp_factor,
|
|
547
|
-
combinedSearchRankingMultiplier
|
|
548
|
-
|
|
549
|
-
|
|
562
|
+
# We make sure the combinedSearchRankingMultiplier is never less than 1
|
|
563
|
+
combinedSearchRankingMultiplier=max(
|
|
564
|
+
1, (usage_search_score_multiplier * freshness_factor * regexp_factor)
|
|
565
|
+
),
|
|
550
566
|
)
|
|
551
567
|
|
|
552
568
|
def load_data_from_es(
|
|
@@ -968,7 +984,9 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
968
984
|
self, lazy_frame: polars.LazyFrame
|
|
969
985
|
) -> Iterable[MetadataWorkUnit]:
|
|
970
986
|
num = 0
|
|
971
|
-
for row in lazy_frame.collect(
|
|
987
|
+
for row in lazy_frame.collect(
|
|
988
|
+
streaming=self.config.experimental_full_streaming
|
|
989
|
+
).to_struct():
|
|
972
990
|
num += 1
|
|
973
991
|
|
|
974
992
|
if "siblings" in row and row["siblings"]:
|
|
@@ -979,113 +997,68 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
979
997
|
)
|
|
980
998
|
|
|
981
999
|
if "queries_rank_percentile" in row:
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
row["
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
)
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
if row["queries_rank_percentile"]
|
|
992
|
-
else 0
|
|
993
|
-
),
|
|
1000
|
+
# If usage data is missing we set the search ranking multipliers to 1
|
|
1001
|
+
search_ranking_multipliers = (
|
|
1002
|
+
self.search_score(
|
|
1003
|
+
urn=row["urn"],
|
|
1004
|
+
last_update_time=row.get("last_modified_at", 0) or 0,
|
|
1005
|
+
usage_percentile=row.get("queries_rank_percentile", 0) or 0,
|
|
1006
|
+
)
|
|
1007
|
+
if row.get("queries_rank_percentile", 0)
|
|
1008
|
+
else SearchRankingMultipliers()
|
|
994
1009
|
)
|
|
995
1010
|
elif "viewsCount30Days_rank_percentile" in row:
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
row["
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
else 0
|
|
1007
|
-
),
|
|
1011
|
+
# If usage data is missing we set the search ranking multipliers to 1
|
|
1012
|
+
search_ranking_multipliers = (
|
|
1013
|
+
self.search_score(
|
|
1014
|
+
urn=row["urn"],
|
|
1015
|
+
last_update_time=row.get("last_modified_at", 0) or 0,
|
|
1016
|
+
usage_percentile=row.get("viewsCount30Days_rank_percentile", 0)
|
|
1017
|
+
or 0,
|
|
1018
|
+
)
|
|
1019
|
+
if row.get("viewsCount30Days_rank_percentile", 0)
|
|
1020
|
+
else SearchRankingMultipliers()
|
|
1008
1021
|
)
|
|
1009
1022
|
logger.debug(f"Urn: {row['urn']} Score: {search_ranking_multipliers}")
|
|
1010
1023
|
|
|
1011
1024
|
usage_feature = UsageFeaturesClass(
|
|
1012
|
-
queryCountLast30Days=(
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
if "totalSqlQueries" in row and row["totalSqlQueries"]
|
|
1020
|
-
else 0
|
|
1021
|
-
),
|
|
1022
|
-
queryCountRankLast30Days=(
|
|
1023
|
-
int(row["queries_rank"])
|
|
1024
|
-
if "queries_rank" in row and row["queries_rank"] is not None
|
|
1025
|
-
else None
|
|
1026
|
-
),
|
|
1027
|
-
queryCountPercentileLast30Days=(
|
|
1028
|
-
int(row["queries_rank_percentile"])
|
|
1029
|
-
if "queries_rank_percentile" in row
|
|
1030
|
-
and row["queries_rank_percentile"]
|
|
1031
|
-
else 0
|
|
1032
|
-
),
|
|
1025
|
+
queryCountLast30Days=int(row.get("totalSqlQueries", 0) or 0),
|
|
1026
|
+
usageCountLast30Days=int(row.get("totalSqlQueries", 0) or 0),
|
|
1027
|
+
queryCountRankLast30Days=int(row.get("queries_rank"))
|
|
1028
|
+
if row.get("queries_rank")
|
|
1029
|
+
else None,
|
|
1030
|
+
queryCountPercentileLast30Days=row.get("queries_rank_percentile", 0)
|
|
1031
|
+
or 0,
|
|
1033
1032
|
# queryCountPercentileLast30Days=int(
|
|
1034
1033
|
# row["queries_rank_percentile"]) if "queries_rank_percentile" in row and row[
|
|
1035
1034
|
# "queries_rank_percentile"] else 0,
|
|
1036
1035
|
topUsersLast30Days=(
|
|
1037
|
-
list(chain.from_iterable(row
|
|
1038
|
-
if row
|
|
1039
|
-
else None
|
|
1040
|
-
),
|
|
1041
|
-
uniqueUserCountLast30Days=(
|
|
1042
|
-
int(row["distinct_user"]) if row["distinct_user"] else 0
|
|
1043
|
-
),
|
|
1044
|
-
uniqueUserRankLast30Days=(
|
|
1045
|
-
int(row["distinct_user_rank"])
|
|
1046
|
-
if "distinct_user_rank" in row
|
|
1047
|
-
and row["distinct_user_rank"] is not None
|
|
1048
|
-
else None
|
|
1049
|
-
),
|
|
1050
|
-
uniqueUserPercentileLast30Days=(
|
|
1051
|
-
int(row["distinct_user_rank_percentile"])
|
|
1052
|
-
if "distinct_user_rank_percentile" in row
|
|
1053
|
-
and row["distinct_user_rank_percentile"]
|
|
1054
|
-
else 0
|
|
1055
|
-
),
|
|
1056
|
-
writeCountLast30Days=(
|
|
1057
|
-
int(row["write_count"])
|
|
1058
|
-
if "write_count" in row and row["write_count"]
|
|
1059
|
-
else 0
|
|
1060
|
-
if not self.config.disable_write_usage
|
|
1061
|
-
else None
|
|
1062
|
-
),
|
|
1063
|
-
writeCountPercentileLast30Days=(
|
|
1064
|
-
int(row["write_rank_percentile"])
|
|
1065
|
-
if "write_count" in row and row["write_rank_percentile"]
|
|
1066
|
-
else 0
|
|
1067
|
-
if not self.config.disable_write_usage
|
|
1036
|
+
list(chain.from_iterable(row.get("top_users")))
|
|
1037
|
+
if row.get("top_users")
|
|
1068
1038
|
else None
|
|
1069
1039
|
),
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
int(row["viewsTotal"])
|
|
1077
|
-
if "viewsTotal" in row and row["viewsTotal"]
|
|
1078
|
-
else 0
|
|
1040
|
+
uniqueUserCountLast30Days=int(row.get("distinct_user", 0) or 0),
|
|
1041
|
+
uniqueUserRankLast30Days=int(row.get("distinct_user_rank"))
|
|
1042
|
+
if row.get("distinct_user_rank")
|
|
1043
|
+
else None,
|
|
1044
|
+
uniqueUserPercentileLast30Days=int(
|
|
1045
|
+
row.get("distinct_user_rank_percentile", 0) or 0
|
|
1079
1046
|
),
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1047
|
+
writeCountLast30Days=int(row.get("write_rank_percentile", 0) or 0)
|
|
1048
|
+
if not self.config.disable_write_usage
|
|
1049
|
+
else None,
|
|
1050
|
+
writeCountPercentileLast30Days=int(
|
|
1051
|
+
row.get("write_rank_percentile", 0) or 0
|
|
1052
|
+
)
|
|
1053
|
+
if not self.config.disable_write_usage
|
|
1054
|
+
else None,
|
|
1055
|
+
writeCountRankLast30Days=int(row.get("write_rank") or 0)
|
|
1056
|
+
if not self.config.disable_write_usage
|
|
1057
|
+
else None,
|
|
1058
|
+
viewCountTotal=int(row.get("viewsTotal", 0) or 0),
|
|
1059
|
+
viewCountLast30Days=int(row.get("viewsCount30Days", 0) or 0),
|
|
1060
|
+
viewCountPercentileLast30Days=int(
|
|
1061
|
+
row.get("viewsCount30Days_rank_percentile", 0) or 0
|
|
1089
1062
|
),
|
|
1090
1063
|
usageSearchScoreMultiplier=search_ranking_multipliers.usageSearchScoreMultiplier,
|
|
1091
1064
|
usageFreshnessScoreMultiplier=search_ranking_multipliers.usageFreshnessScoreMultiplier,
|
|
@@ -1095,11 +1068,7 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
1095
1068
|
|
|
1096
1069
|
yield from self.generate_usage_feature_mcp(row["urn"], usage_feature)
|
|
1097
1070
|
|
|
1098
|
-
if (
|
|
1099
|
-
"siblings" in row
|
|
1100
|
-
and row["siblings"]
|
|
1101
|
-
and self.config.sibling_usage_enabled
|
|
1102
|
-
):
|
|
1071
|
+
if row.get("siblings") and self.config.sibling_usage_enabled:
|
|
1103
1072
|
for sibling in row["siblings"]:
|
|
1104
1073
|
if dbt_platform_regexp.match(sibling):
|
|
1105
1074
|
yield from self.generate_usage_feature_mcp(
|
|
@@ -1114,26 +1083,15 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
1114
1083
|
num += 1
|
|
1115
1084
|
|
|
1116
1085
|
query_usage_features = QueryUsageFeaturesClass(
|
|
1117
|
-
queryCountLast30Days=(
|
|
1118
|
-
int(row["totalSqlQueries"])
|
|
1119
|
-
if "totalSqlQueries" in row and row["totalSqlQueries"]
|
|
1120
|
-
else 0
|
|
1121
|
-
),
|
|
1086
|
+
queryCountLast30Days=int(row.get("totalSqlQueries", 0) or 0),
|
|
1122
1087
|
queryCountTotal=None, # This is not implemented
|
|
1123
|
-
runsPercentileLast30days=(
|
|
1124
|
-
|
|
1125
|
-
if "queries_rank_percentile" in row
|
|
1126
|
-
and row["queries_rank_percentile"]
|
|
1127
|
-
else 0
|
|
1128
|
-
),
|
|
1129
|
-
lastExecutedAt=(
|
|
1130
|
-
int(row["last_modified_at"])
|
|
1131
|
-
if "last_modified_at" in row and row["last_modified_at"]
|
|
1132
|
-
else 0
|
|
1088
|
+
runsPercentileLast30days=int(
|
|
1089
|
+
row.get("queries_rank_percentile", 0) or 0
|
|
1133
1090
|
),
|
|
1091
|
+
lastExecutedAt=int(row.get("last_modified_at", 0)),
|
|
1134
1092
|
topUsersLast30Days=(
|
|
1135
|
-
list(chain.from_iterable(row
|
|
1136
|
-
if row
|
|
1093
|
+
list(chain.from_iterable(row.get("top_users", [])))
|
|
1094
|
+
if row.get("top_users")
|
|
1137
1095
|
else None
|
|
1138
1096
|
),
|
|
1139
1097
|
queryCostLast30Days=None, # Not implemented yet
|
|
@@ -1180,16 +1138,17 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
1180
1138
|
def generate_dashboard_chart_usage(
|
|
1181
1139
|
self, entity_index: str, usage_index: str
|
|
1182
1140
|
) -> polars.LazyFrame:
|
|
1183
|
-
|
|
1141
|
+
entity_schema = {
|
|
1184
1142
|
"entity_urn": polars.Categorical,
|
|
1185
1143
|
"removed": polars.Boolean,
|
|
1186
1144
|
"last_modified_at": polars.Int64,
|
|
1187
1145
|
"siblings": polars.List(polars.String),
|
|
1146
|
+
"combinedSearchRankingMultiplier": polars.Float64,
|
|
1188
1147
|
"isView": polars.Boolean,
|
|
1189
1148
|
}
|
|
1190
1149
|
|
|
1191
|
-
|
|
1192
|
-
schema=
|
|
1150
|
+
entities_df = self.load_data_from_es_to_lf(
|
|
1151
|
+
schema=entity_schema,
|
|
1193
1152
|
index=entity_index,
|
|
1194
1153
|
query=QueryBuilder.get_dataset_entities_query(),
|
|
1195
1154
|
process_function=self.soft_deleted_batch,
|
|
@@ -1220,7 +1179,7 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
1220
1179
|
)
|
|
1221
1180
|
|
|
1222
1181
|
lf = (
|
|
1223
|
-
lf.join(
|
|
1182
|
+
lf.join(entities_df, left_on="urn", right_on="entity_urn", how="inner")
|
|
1224
1183
|
.filter(polars.col("removed") == False) # noqa: E712
|
|
1225
1184
|
.drop(["removed"])
|
|
1226
1185
|
)
|
|
@@ -1268,8 +1227,10 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
1268
1227
|
)
|
|
1269
1228
|
.drop(["first_viewsCount"])
|
|
1270
1229
|
)
|
|
1271
|
-
|
|
1272
|
-
|
|
1230
|
+
views_with_inceremental_sum = views_sum_with_top_users.join(
|
|
1231
|
+
incremental_views_sum, on="urn", how="left"
|
|
1232
|
+
)
|
|
1233
|
+
total_views = views_with_inceremental_sum.with_columns(
|
|
1273
1234
|
polars.when(
|
|
1274
1235
|
polars.col("total_user_count")
|
|
1275
1236
|
.is_null()
|
|
@@ -1280,11 +1241,54 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
1280
1241
|
.alias("viewsCount30Days")
|
|
1281
1242
|
)
|
|
1282
1243
|
|
|
1283
|
-
|
|
1284
|
-
|
|
1244
|
+
total_views_with_rank_and_percentiles = self.gen_rank_and_percentile(
|
|
1245
|
+
total_views, "viewsCount30Days", "urn", "platform", "viewsCount30Days_"
|
|
1246
|
+
).drop(["siblings_right"])
|
|
1247
|
+
|
|
1248
|
+
total_views_with_rank_and_percentiles_with_zeroed_stale_usages = (
|
|
1249
|
+
self.generate_empty_usage_for_stale_entities(
|
|
1250
|
+
entities_df, total_views_with_rank_and_percentiles
|
|
1251
|
+
)
|
|
1285
1252
|
)
|
|
1286
1253
|
|
|
1287
|
-
return
|
|
1254
|
+
return total_views_with_rank_and_percentiles_with_zeroed_stale_usages
|
|
1255
|
+
|
|
1256
|
+
def generate_empty_usage_for_stale_entities(
|
|
1257
|
+
self, entities_lf: polars.LazyFrame, usages_lf: polars.LazyFrame
|
|
1258
|
+
) -> polars.LazyFrame:
|
|
1259
|
+
# We need to merge datasets with existing search scores to make sure we can downrank them if there were no usage in the last n days
|
|
1260
|
+
# We drop last_modified_at to not use it in merge because we are getting last_modified_at from the usage index
|
|
1261
|
+
df_with_search_scores = (
|
|
1262
|
+
entities_lf.filter(
|
|
1263
|
+
polars.col("combinedSearchRankingMultiplier")
|
|
1264
|
+
.is_not_null()
|
|
1265
|
+
# We only want to downrank datasets that have a search score multiplier greater than 1. 1 is the minimum score of a dataset
|
|
1266
|
+
.and_(polars.col("combinedSearchRankingMultiplier").ne(1))
|
|
1267
|
+
) # noqa: E712
|
|
1268
|
+
.filter(polars.col("removed") == False) # noqa: E712
|
|
1269
|
+
.drop(["removed"])
|
|
1270
|
+
.drop(["last_modified_at"])
|
|
1271
|
+
# We set this to 0 because we want to downrank datasets that have no usage
|
|
1272
|
+
.with_columns(polars.lit(0).alias("combinedSearchRankingMultiplier"))
|
|
1273
|
+
.rename({"entity_urn": "urn"})
|
|
1274
|
+
)
|
|
1275
|
+
common_fields = list(
|
|
1276
|
+
set(usages_lf.columns).intersection(set(df_with_search_scores.columns))
|
|
1277
|
+
)
|
|
1278
|
+
usages_lf = df_with_search_scores.join(
|
|
1279
|
+
usages_lf, on="urn", how="full", suffix="_right"
|
|
1280
|
+
)
|
|
1281
|
+
## Merge all common fields automatically
|
|
1282
|
+
for common_field in common_fields:
|
|
1283
|
+
right_col = f"{common_field}_right"
|
|
1284
|
+
usages_lf = usages_lf.with_columns(
|
|
1285
|
+
[
|
|
1286
|
+
polars.col(common_field)
|
|
1287
|
+
.fill_null(polars.col(right_col))
|
|
1288
|
+
.alias(common_field)
|
|
1289
|
+
]
|
|
1290
|
+
).drop(right_col)
|
|
1291
|
+
return usages_lf
|
|
1288
1292
|
|
|
1289
1293
|
def generate_query_usage(self) -> polars.LazyFrame:
|
|
1290
1294
|
usage_index = "query_queryusagestatisticsaspect_v1"
|
|
@@ -1365,16 +1369,21 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
1365
1369
|
|
|
1366
1370
|
# Polaris/pandas join merges the join column into one column and that's why we need to filter based on the removed column
|
|
1367
1371
|
lf = (
|
|
1368
|
-
lf.join(datasets_lf, left_on="urn", right_on="entity_urn", how="
|
|
1372
|
+
lf.join(datasets_lf, left_on="urn", right_on="entity_urn", how="left")
|
|
1369
1373
|
.filter(polars.col("removed") == False) # noqa: E712
|
|
1370
1374
|
.drop(["removed"])
|
|
1371
1375
|
)
|
|
1376
|
+
|
|
1372
1377
|
total_queries = lf.group_by("urn", "platform").agg(
|
|
1373
1378
|
polars.col("totalSqlQueries").sum(),
|
|
1374
1379
|
polars.col("last_modified_at").max().alias("last_modified_at"),
|
|
1375
1380
|
polars.col("siblings").first().alias("siblings"),
|
|
1376
1381
|
)
|
|
1377
1382
|
|
|
1383
|
+
total_queries = self.generate_empty_usage_for_stale_entities(
|
|
1384
|
+
datasets_lf, total_queries
|
|
1385
|
+
)
|
|
1386
|
+
|
|
1378
1387
|
top_users = self.generate_top_users(lf)
|
|
1379
1388
|
|
|
1380
1389
|
usage_with_top_users = total_queries.join(top_users, on="urn", how="left")
|
|
@@ -1510,6 +1519,7 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
1510
1519
|
"removed": polars.Boolean,
|
|
1511
1520
|
"last_modified_at": polars.Int64,
|
|
1512
1521
|
"siblings": polars.List(polars.String),
|
|
1522
|
+
"combinedSearchRankingMultiplier": polars.Float64,
|
|
1513
1523
|
"isView": polars.Boolean,
|
|
1514
1524
|
}
|
|
1515
1525
|
|
|
@@ -11147,6 +11147,13 @@
|
|
|
11147
11147
|
"doc": "The display name of the property. This is the name that will be shown in the UI and can be used to look up the property id."
|
|
11148
11148
|
},
|
|
11149
11149
|
{
|
|
11150
|
+
"UrnValidation": {
|
|
11151
|
+
"entityTypes": [
|
|
11152
|
+
"dataType"
|
|
11153
|
+
],
|
|
11154
|
+
"exist": true,
|
|
11155
|
+
"strict": true
|
|
11156
|
+
},
|
|
11150
11157
|
"java": {
|
|
11151
11158
|
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
11152
11159
|
},
|
|
@@ -11235,6 +11242,13 @@
|
|
|
11235
11242
|
"fieldName": "entityTypes"
|
|
11236
11243
|
}
|
|
11237
11244
|
},
|
|
11245
|
+
"UrnValidation": {
|
|
11246
|
+
"entityTypes": [
|
|
11247
|
+
"entityType"
|
|
11248
|
+
],
|
|
11249
|
+
"exist": true,
|
|
11250
|
+
"strict": true
|
|
11251
|
+
},
|
|
11238
11252
|
"Urn": "Urn",
|
|
11239
11253
|
"urn_is_array": true,
|
|
11240
11254
|
"type": {
|
|
@@ -23,6 +23,13 @@
|
|
|
23
23
|
"doc": "The display name of the property. This is the name that will be shown in the UI and can be used to look up the property id."
|
|
24
24
|
},
|
|
25
25
|
{
|
|
26
|
+
"UrnValidation": {
|
|
27
|
+
"entityTypes": [
|
|
28
|
+
"dataType"
|
|
29
|
+
],
|
|
30
|
+
"exist": true,
|
|
31
|
+
"strict": true
|
|
32
|
+
},
|
|
26
33
|
"java": {
|
|
27
34
|
"class": "com.linkedin.pegasus2avro.common.urn.Urn"
|
|
28
35
|
},
|
|
@@ -111,6 +118,13 @@
|
|
|
111
118
|
"fieldName": "entityTypes"
|
|
112
119
|
}
|
|
113
120
|
},
|
|
121
|
+
"UrnValidation": {
|
|
122
|
+
"entityTypes": [
|
|
123
|
+
"entityType"
|
|
124
|
+
],
|
|
125
|
+
"exist": true,
|
|
126
|
+
"strict": true
|
|
127
|
+
},
|
|
114
128
|
"type": {
|
|
115
129
|
"type": "array",
|
|
116
130
|
"items": "string"
|
{acryl_datahub_cloud-0.3.8.2rc4.dist-info → acryl_datahub_cloud-0.3.8.2rc5.dist-info}/METADATA
RENAMED
|
@@ -1,90 +1,90 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: acryl-datahub-cloud
|
|
3
|
-
Version: 0.3.8.
|
|
3
|
+
Version: 0.3.8.2rc5
|
|
4
4
|
Requires-Dist: avro-gen3==0.7.16
|
|
5
5
|
Requires-Dist: acryl-datahub
|
|
6
6
|
Provides-Extra: datahub-lineage-features
|
|
7
|
+
Requires-Dist: pyarrow; extra == "datahub-lineage-features"
|
|
8
|
+
Requires-Dist: duckdb; extra == "datahub-lineage-features"
|
|
7
9
|
Requires-Dist: opensearch-py==2.4.2; extra == "datahub-lineage-features"
|
|
8
10
|
Requires-Dist: pandas; extra == "datahub-lineage-features"
|
|
9
11
|
Requires-Dist: pydantic<2; extra == "datahub-lineage-features"
|
|
10
|
-
Requires-Dist: duckdb; extra == "datahub-lineage-features"
|
|
11
|
-
Requires-Dist: pyarrow; extra == "datahub-lineage-features"
|
|
12
12
|
Provides-Extra: datahub-reporting-forms
|
|
13
|
-
Requires-Dist:
|
|
13
|
+
Requires-Dist: pyarrow; extra == "datahub-reporting-forms"
|
|
14
|
+
Requires-Dist: duckdb; extra == "datahub-reporting-forms"
|
|
15
|
+
Requires-Dist: boto3; extra == "datahub-reporting-forms"
|
|
14
16
|
Requires-Dist: pandas; extra == "datahub-reporting-forms"
|
|
17
|
+
Requires-Dist: termcolor==2.5.0; extra == "datahub-reporting-forms"
|
|
15
18
|
Requires-Dist: pydantic<2; extra == "datahub-reporting-forms"
|
|
16
|
-
Requires-Dist: boto3; extra == "datahub-reporting-forms"
|
|
17
|
-
Requires-Dist: duckdb; extra == "datahub-reporting-forms"
|
|
18
|
-
Requires-Dist: pyarrow; extra == "datahub-reporting-forms"
|
|
19
19
|
Provides-Extra: datahub-reporting-extract-graph
|
|
20
|
+
Requires-Dist: pyarrow; extra == "datahub-reporting-extract-graph"
|
|
21
|
+
Requires-Dist: duckdb; extra == "datahub-reporting-extract-graph"
|
|
22
|
+
Requires-Dist: boto3; extra == "datahub-reporting-extract-graph"
|
|
20
23
|
Requires-Dist: opensearch-py==2.4.2; extra == "datahub-reporting-extract-graph"
|
|
21
24
|
Requires-Dist: pandas; extra == "datahub-reporting-extract-graph"
|
|
22
25
|
Requires-Dist: pydantic<2; extra == "datahub-reporting-extract-graph"
|
|
23
|
-
Requires-Dist: boto3; extra == "datahub-reporting-extract-graph"
|
|
24
|
-
Requires-Dist: duckdb; extra == "datahub-reporting-extract-graph"
|
|
25
|
-
Requires-Dist: pyarrow; extra == "datahub-reporting-extract-graph"
|
|
26
26
|
Provides-Extra: datahub-reporting-extract-sql
|
|
27
|
+
Requires-Dist: pyarrow; extra == "datahub-reporting-extract-sql"
|
|
28
|
+
Requires-Dist: duckdb; extra == "datahub-reporting-extract-sql"
|
|
29
|
+
Requires-Dist: boto3; extra == "datahub-reporting-extract-sql"
|
|
27
30
|
Requires-Dist: pandas; extra == "datahub-reporting-extract-sql"
|
|
28
31
|
Requires-Dist: pydantic<2; extra == "datahub-reporting-extract-sql"
|
|
29
|
-
Requires-Dist: boto3; extra == "datahub-reporting-extract-sql"
|
|
30
|
-
Requires-Dist: duckdb; extra == "datahub-reporting-extract-sql"
|
|
31
|
-
Requires-Dist: pyarrow; extra == "datahub-reporting-extract-sql"
|
|
32
32
|
Provides-Extra: datahub-usage-reporting
|
|
33
|
-
Requires-Dist:
|
|
34
|
-
Requires-Dist: pandas; extra == "datahub-usage-reporting"
|
|
35
|
-
Requires-Dist: elasticsearch==7.13.4; extra == "datahub-usage-reporting"
|
|
33
|
+
Requires-Dist: pyarrow; extra == "datahub-usage-reporting"
|
|
36
34
|
Requires-Dist: duckdb; extra == "datahub-usage-reporting"
|
|
37
|
-
Requires-Dist:
|
|
35
|
+
Requires-Dist: numpy<2; extra == "datahub-usage-reporting"
|
|
36
|
+
Requires-Dist: scipy<=1.14.1; extra == "datahub-usage-reporting"
|
|
37
|
+
Requires-Dist: elasticsearch==7.13.4; extra == "datahub-usage-reporting"
|
|
38
|
+
Requires-Dist: pandas; extra == "datahub-usage-reporting"
|
|
38
39
|
Requires-Dist: boto3; extra == "datahub-usage-reporting"
|
|
39
|
-
Requires-Dist:
|
|
40
|
+
Requires-Dist: polars==1.23.0; extra == "datahub-usage-reporting"
|
|
40
41
|
Requires-Dist: opensearch-py==2.4.2; extra == "datahub-usage-reporting"
|
|
41
|
-
Requires-Dist:
|
|
42
|
+
Requires-Dist: pyarrow<=18.0.0; extra == "datahub-usage-reporting"
|
|
43
|
+
Requires-Dist: termcolor==2.5.0; extra == "datahub-usage-reporting"
|
|
42
44
|
Requires-Dist: pydantic<2; extra == "datahub-usage-reporting"
|
|
43
|
-
Requires-Dist: scipy<=1.14.1; extra == "datahub-usage-reporting"
|
|
44
|
-
Requires-Dist: polars==1.19.0; extra == "datahub-usage-reporting"
|
|
45
45
|
Provides-Extra: datahub-metadata-sharing
|
|
46
46
|
Requires-Dist: tenacity; extra == "datahub-metadata-sharing"
|
|
47
47
|
Provides-Extra: acryl-cs-issues
|
|
48
|
-
Requires-Dist: zenpy; extra == "acryl-cs-issues"
|
|
49
|
-
Requires-Dist: jinja2; extra == "acryl-cs-issues"
|
|
50
48
|
Requires-Dist: openai; extra == "acryl-cs-issues"
|
|
51
49
|
Requires-Dist: slack-sdk; extra == "acryl-cs-issues"
|
|
50
|
+
Requires-Dist: jinja2; extra == "acryl-cs-issues"
|
|
51
|
+
Requires-Dist: zenpy; extra == "acryl-cs-issues"
|
|
52
52
|
Provides-Extra: all
|
|
53
|
-
Requires-Dist:
|
|
54
|
-
Requires-Dist: termcolor==2.5.0; extra == "all"
|
|
55
|
-
Requires-Dist: jinja2; extra == "all"
|
|
56
|
-
Requires-Dist: elasticsearch==7.13.4; extra == "all"
|
|
57
|
-
Requires-Dist: pyarrow<=18.0.0; extra == "all"
|
|
58
|
-
Requires-Dist: boto3; extra == "all"
|
|
53
|
+
Requires-Dist: pyarrow; extra == "all"
|
|
59
54
|
Requires-Dist: numpy<2; extra == "all"
|
|
60
55
|
Requires-Dist: scipy<=1.14.1; extra == "all"
|
|
56
|
+
Requires-Dist: elasticsearch==7.13.4; extra == "all"
|
|
57
|
+
Requires-Dist: tenacity; extra == "all"
|
|
58
|
+
Requires-Dist: slack-sdk; extra == "all"
|
|
61
59
|
Requires-Dist: zenpy; extra == "all"
|
|
60
|
+
Requires-Dist: boto3; extra == "all"
|
|
61
|
+
Requires-Dist: jinja2; extra == "all"
|
|
62
|
+
Requires-Dist: pydantic<2; extra == "all"
|
|
63
|
+
Requires-Dist: duckdb; extra == "all"
|
|
62
64
|
Requires-Dist: pandas; extra == "all"
|
|
63
65
|
Requires-Dist: openai; extra == "all"
|
|
64
|
-
Requires-Dist:
|
|
65
|
-
Requires-Dist: pyarrow; extra == "all"
|
|
66
|
+
Requires-Dist: polars==1.23.0; extra == "all"
|
|
66
67
|
Requires-Dist: opensearch-py==2.4.2; extra == "all"
|
|
67
|
-
Requires-Dist:
|
|
68
|
-
Requires-Dist:
|
|
69
|
-
Requires-Dist: slack-sdk; extra == "all"
|
|
68
|
+
Requires-Dist: pyarrow<=18.0.0; extra == "all"
|
|
69
|
+
Requires-Dist: termcolor==2.5.0; extra == "all"
|
|
70
70
|
Provides-Extra: dev
|
|
71
|
+
Requires-Dist: pyarrow; extra == "dev"
|
|
72
|
+
Requires-Dist: duckdb; extra == "dev"
|
|
73
|
+
Requires-Dist: numpy<2; extra == "dev"
|
|
74
|
+
Requires-Dist: scipy<=1.14.1; extra == "dev"
|
|
75
|
+
Requires-Dist: elasticsearch==7.13.4; extra == "dev"
|
|
71
76
|
Requires-Dist: tenacity; extra == "dev"
|
|
72
|
-
Requires-Dist:
|
|
77
|
+
Requires-Dist: slack-sdk; extra == "dev"
|
|
78
|
+
Requires-Dist: zenpy; extra == "dev"
|
|
73
79
|
Requires-Dist: pandas; extra == "dev"
|
|
74
|
-
Requires-Dist: jinja2; extra == "dev"
|
|
75
80
|
Requires-Dist: openai; extra == "dev"
|
|
76
|
-
Requires-Dist: elasticsearch==7.13.4; extra == "dev"
|
|
77
|
-
Requires-Dist: zenpy; extra == "dev"
|
|
78
|
-
Requires-Dist: duckdb; extra == "dev"
|
|
79
|
-
Requires-Dist: pyarrow<=18.0.0; extra == "dev"
|
|
80
81
|
Requires-Dist: boto3; extra == "dev"
|
|
81
|
-
Requires-Dist:
|
|
82
|
-
Requires-Dist: pyarrow; extra == "dev"
|
|
82
|
+
Requires-Dist: polars==1.23.0; extra == "dev"
|
|
83
83
|
Requires-Dist: opensearch-py==2.4.2; extra == "dev"
|
|
84
|
-
Requires-Dist:
|
|
84
|
+
Requires-Dist: pyarrow<=18.0.0; extra == "dev"
|
|
85
|
+
Requires-Dist: jinja2; extra == "dev"
|
|
86
|
+
Requires-Dist: acryl-datahub[dev]; extra == "dev"
|
|
87
|
+
Requires-Dist: termcolor==2.5.0; extra == "dev"
|
|
85
88
|
Requires-Dist: pydantic<2; extra == "dev"
|
|
86
|
-
Requires-Dist: scipy<=1.14.1; extra == "dev"
|
|
87
|
-
Requires-Dist: polars==1.19.0; extra == "dev"
|
|
88
|
-
Requires-Dist: slack-sdk; extra == "dev"
|
|
89
89
|
Dynamic: provides-extra
|
|
90
90
|
Dynamic: requires-dist
|
{acryl_datahub_cloud-0.3.8.2rc4.dist-info → acryl_datahub_cloud-0.3.8.2rc5.dist-info}/RECORD
RENAMED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
acryl_datahub_cloud/__init__.py,sha256=axrMXkn0RW80YmuZgwUP_YQImcv6L28duZLWnW-gaNM,521
|
|
2
|
-
acryl_datahub_cloud/_codegen_config.json,sha256=
|
|
2
|
+
acryl_datahub_cloud/_codegen_config.json,sha256=bYIuxLP1GvkXG4fP20J2YVCVGNOvJ1EEVRFVbP6BgnA,557
|
|
3
3
|
acryl_datahub_cloud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
acryl_datahub_cloud/acryl_cs_issues/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
5
5
|
acryl_datahub_cloud/acryl_cs_issues/acryl_customer.py,sha256=uFjR2SqGS34y09-S9WqOqNGY8nOq6ptGf4y9781i8Z4,25230
|
|
@@ -27,16 +27,16 @@ acryl_datahub_cloud/datahub_restore/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQ
|
|
|
27
27
|
acryl_datahub_cloud/datahub_restore/do_restore.py,sha256=Pjd3qE1lYXltKhpkI1KvO-7fM_ksnisFvmJ8bqGcT8Q,2284
|
|
28
28
|
acryl_datahub_cloud/datahub_restore/source.py,sha256=i4NJ3os4mzAnOHnmR-OaHxVUe4rMeLle2mucCT_-8yA,5339
|
|
29
29
|
acryl_datahub_cloud/datahub_usage_reporting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
30
|
-
acryl_datahub_cloud/datahub_usage_reporting/query_builder.py,sha256=
|
|
30
|
+
acryl_datahub_cloud/datahub_usage_reporting/query_builder.py,sha256=hBHJRbsPJBeVpbu_QgCrFHQAR0cxAep2fGYkbFPahpc,5892
|
|
31
31
|
acryl_datahub_cloud/datahub_usage_reporting/usage_feature_patch_builder.py,sha256=gR9neaHfi0JMQmAKMlgJCEuZIni7cdPFApGOKa5Pn4Y,14406
|
|
32
|
-
acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py,sha256=
|
|
32
|
+
acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py,sha256=qJNrXu6fwPePenSCLMkiRSGyQgROcju54fULVPPq3VA,65875
|
|
33
33
|
acryl_datahub_cloud/elasticsearch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
34
34
|
acryl_datahub_cloud/elasticsearch/config.py,sha256=6QNBOmoQZu1cJrDIBZyvZgdQt0QLfP82hdQkPtP-4HE,1220
|
|
35
35
|
acryl_datahub_cloud/elasticsearch/graph_service.py,sha256=K4ykcSMxlrhlDrchhte3vEb1mcw8QkOmdIFSVSX4OVU,2788
|
|
36
36
|
acryl_datahub_cloud/lineage_features/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
37
|
acryl_datahub_cloud/lineage_features/source.py,sha256=Edve1oBoR87RTloAfjAuxgULlMI_HNSFuQfbiVjkac4,6412
|
|
38
38
|
acryl_datahub_cloud/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
|
|
39
|
-
acryl_datahub_cloud/metadata/schema.avsc,sha256=
|
|
39
|
+
acryl_datahub_cloud/metadata/schema.avsc,sha256=IptmiRUVlqmv1wQRvvzt2xfBOpx8-X1SpZpVt_6RSKY,1003121
|
|
40
40
|
acryl_datahub_cloud/metadata/schema_classes.py,sha256=-lEIbtHuajGDV9tseTy-OVhAFt76U99_-QHibq8DGk8,1388729
|
|
41
41
|
acryl_datahub_cloud/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
|
|
42
42
|
acryl_datahub_cloud/metadata/_urns/urn_defs.py,sha256=UB7a-SusVpQfxgyBiUZ4uaHn8xIhIYOPybOMbhhUSds,125081
|
|
@@ -375,7 +375,7 @@ acryl_datahub_cloud/metadata/schemas/SourceCode.avsc,sha256=tUgo2rczO5x1fxw3fYNW
|
|
|
375
375
|
acryl_datahub_cloud/metadata/schemas/Status.avsc,sha256=rPZSXSJdwnNywqNx2qll8cdt54aYgI-YUbRr3GK7h78,522
|
|
376
376
|
acryl_datahub_cloud/metadata/schemas/StorageFeatures.avsc,sha256=F5LFc4P05YrKReB8ZsxJNXSLg2FJaE7vk63NM4GN0dE,1752
|
|
377
377
|
acryl_datahub_cloud/metadata/schemas/StructuredProperties.avsc,sha256=N0NNDrkqbIgEHrb5uz1ynwZh3mb_ICVK7tDcnBLMfjI,4032
|
|
378
|
-
acryl_datahub_cloud/metadata/schemas/StructuredPropertyDefinition.avsc,sha256=
|
|
378
|
+
acryl_datahub_cloud/metadata/schemas/StructuredPropertyDefinition.avsc,sha256=xxtbACqH6OpJgW1gtP6lswkQnG__J_esH7HKCOdqUNs,12571
|
|
379
379
|
acryl_datahub_cloud/metadata/schemas/StructuredPropertyKey.avsc,sha256=RpAH8fW-64C6yVU8_D1h5vYeg8fNp5t2S6VLpOEcMZM,649
|
|
380
380
|
acryl_datahub_cloud/metadata/schemas/StructuredPropertySettings.avsc,sha256=EDNlXfT1TqogfulCanIc-nuYO9ZxRFOGzD9tl3ZJdB8,3732
|
|
381
381
|
acryl_datahub_cloud/metadata/schemas/SubTypes.avsc,sha256=bhXbzK020zDyQno97Xp05vmoMeZ82IGu2jz7pWDo3RQ,655
|
|
@@ -398,8 +398,8 @@ acryl_datahub_cloud/metadata/schemas/VersionSetKey.avsc,sha256=psjGNNcFua3Zs9Xlh
|
|
|
398
398
|
acryl_datahub_cloud/metadata/schemas/VersionSetProperties.avsc,sha256=yrhhVNioD11nFlDO7IfUbxAQjhA9Tr-4wnAYH5I9W74,1172
|
|
399
399
|
acryl_datahub_cloud/metadata/schemas/ViewProperties.avsc,sha256=3HhcbH5493dJUnEUtFMYMVfbYQ52aDedm5L4j77Nym4,1032
|
|
400
400
|
acryl_datahub_cloud/metadata/schemas/__init__.py,sha256=uvLNC3VyCkWA_v8e9FdA1leFf46NFKDD0AajCfihepI,581
|
|
401
|
-
acryl_datahub_cloud-0.3.8.
|
|
402
|
-
acryl_datahub_cloud-0.3.8.
|
|
403
|
-
acryl_datahub_cloud-0.3.8.
|
|
404
|
-
acryl_datahub_cloud-0.3.8.
|
|
405
|
-
acryl_datahub_cloud-0.3.8.
|
|
401
|
+
acryl_datahub_cloud-0.3.8.2rc5.dist-info/METADATA,sha256=xBnAcZP3jeFENrzm2fvLnm35Fc7Y6Nsrj3hVc5Tq7Wo,4432
|
|
402
|
+
acryl_datahub_cloud-0.3.8.2rc5.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
|
|
403
|
+
acryl_datahub_cloud-0.3.8.2rc5.dist-info/entry_points.txt,sha256=HpypFu4cwe0kT3zXFcqbOG-QTNjzYaV6NiCX0Pgy9LI,1086
|
|
404
|
+
acryl_datahub_cloud-0.3.8.2rc5.dist-info/top_level.txt,sha256=EwgCxfX-DzJANwxj-Mx_j4TOfAFhmc_FgMbRPzWsoZs,20
|
|
405
|
+
acryl_datahub_cloud-0.3.8.2rc5.dist-info/RECORD,,
|
|
File without changes
|
{acryl_datahub_cloud-0.3.8.2rc4.dist-info → acryl_datahub_cloud-0.3.8.2rc5.dist-info}/top_level.txt
RENAMED
|
File without changes
|