acryl-datahub-cloud 0.3.8.1rc2__py3-none-any.whl → 0.3.8.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub-cloud might be problematic. Click here for more details.
- acryl_datahub_cloud/_codegen_config.json +1 -1
- acryl_datahub_cloud/datahub_reporting/datahub_form_reporting.py +18 -1
- acryl_datahub_cloud/datahub_restore/__init__.py +0 -0
- acryl_datahub_cloud/datahub_restore/do_restore.py +85 -0
- acryl_datahub_cloud/datahub_restore/source.py +139 -0
- acryl_datahub_cloud/datahub_usage_reporting/query_builder.py +1 -0
- acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py +139 -130
- acryl_datahub_cloud/metadata/_urns/urn_defs.py +1438 -1438
- acryl_datahub_cloud/metadata/schema.avsc +20098 -20513
- acryl_datahub_cloud/metadata/schema_classes.py +511 -494
- acryl_datahub_cloud/metadata/schemas/CorpUserInfo.avsc +13 -0
- acryl_datahub_cloud/metadata/schemas/DashboardInfo.avsc +5 -5
- acryl_datahub_cloud/metadata/schemas/MetadataChangeEvent.avsc +18 -5
- acryl_datahub_cloud/metadata/schemas/StructuredPropertyDefinition.avsc +14 -0
- {acryl_datahub_cloud-0.3.8.1rc2.dist-info → acryl_datahub_cloud-0.3.8.2.dist-info}/METADATA +46 -46
- {acryl_datahub_cloud-0.3.8.1rc2.dist-info → acryl_datahub_cloud-0.3.8.2.dist-info}/RECORD +19 -16
- {acryl_datahub_cloud-0.3.8.1rc2.dist-info → acryl_datahub_cloud-0.3.8.2.dist-info}/WHEEL +1 -1
- {acryl_datahub_cloud-0.3.8.1rc2.dist-info → acryl_datahub_cloud-0.3.8.2.dist-info}/entry_points.txt +1 -0
- {acryl_datahub_cloud-0.3.8.1rc2.dist-info → acryl_datahub_cloud-0.3.8.2.dist-info}/top_level.txt +0 -0
|
@@ -172,6 +172,15 @@ class DataHubUsageFeatureReportingSourceConfig(
|
|
|
172
172
|
description="Flag to enable polars streaming mode.'",
|
|
173
173
|
)
|
|
174
174
|
|
|
175
|
+
# Running the whole pipeline in streaming mode was very unstable in the past.
|
|
176
|
+
# It seems like with the latest version of Polars it is much more stable.
|
|
177
|
+
# This option is only needed here until we are sure that the streaming mode is stable.
|
|
178
|
+
# then we can remove it and control it with the streaming_mode option.
|
|
179
|
+
experimental_full_streaming: bool = Field(
|
|
180
|
+
False,
|
|
181
|
+
description="Flag to enable full streaming mode.'",
|
|
182
|
+
)
|
|
183
|
+
|
|
175
184
|
disable_write_usage: bool = Field(
|
|
176
185
|
True,
|
|
177
186
|
description="Flag to disable write usage statistics collection.'",
|
|
@@ -300,6 +309,12 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
300
309
|
if "siblings" in doc["_source"] and doc["_source"]["siblings"]
|
|
301
310
|
else []
|
|
302
311
|
),
|
|
312
|
+
"combinedSearchRankingMultiplier": (
|
|
313
|
+
doc["_source"]["combinedSearchRankingMultiplier"]
|
|
314
|
+
if "combinedSearchRankingMultiplier" in doc["_source"]
|
|
315
|
+
and doc["_source"]["combinedSearchRankingMultiplier"]
|
|
316
|
+
else None
|
|
317
|
+
),
|
|
303
318
|
"isView": (
|
|
304
319
|
"View" in doc["_source"]["typeNames"]
|
|
305
320
|
if "typeNames" in doc["_source"] and doc["_source"]["typeNames"]
|
|
@@ -544,9 +559,10 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
544
559
|
usageSearchScoreMultiplier=usage_search_score_multiplier,
|
|
545
560
|
usageFreshnessScoreMultiplier=freshness_factor,
|
|
546
561
|
customDatahubScoreMultiplier=regexp_factor,
|
|
547
|
-
combinedSearchRankingMultiplier
|
|
548
|
-
|
|
549
|
-
|
|
562
|
+
# We make sure the combinedSearchRankingMultiplier is never less than 1
|
|
563
|
+
combinedSearchRankingMultiplier=max(
|
|
564
|
+
1, (usage_search_score_multiplier * freshness_factor * regexp_factor)
|
|
565
|
+
),
|
|
550
566
|
)
|
|
551
567
|
|
|
552
568
|
def load_data_from_es(
|
|
@@ -968,7 +984,9 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
968
984
|
self, lazy_frame: polars.LazyFrame
|
|
969
985
|
) -> Iterable[MetadataWorkUnit]:
|
|
970
986
|
num = 0
|
|
971
|
-
for row in lazy_frame.collect(
|
|
987
|
+
for row in lazy_frame.collect(
|
|
988
|
+
streaming=self.config.experimental_full_streaming
|
|
989
|
+
).to_struct():
|
|
972
990
|
num += 1
|
|
973
991
|
|
|
974
992
|
if "siblings" in row and row["siblings"]:
|
|
@@ -979,113 +997,68 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
979
997
|
)
|
|
980
998
|
|
|
981
999
|
if "queries_rank_percentile" in row:
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
row["
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
)
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
if row["queries_rank_percentile"]
|
|
992
|
-
else 0
|
|
993
|
-
),
|
|
1000
|
+
# If usage data is missing we set the search ranking multipliers to 1
|
|
1001
|
+
search_ranking_multipliers = (
|
|
1002
|
+
self.search_score(
|
|
1003
|
+
urn=row["urn"],
|
|
1004
|
+
last_update_time=row.get("last_modified_at", 0) or 0,
|
|
1005
|
+
usage_percentile=row.get("queries_rank_percentile", 0) or 0,
|
|
1006
|
+
)
|
|
1007
|
+
if row.get("queries_rank_percentile", 0)
|
|
1008
|
+
else SearchRankingMultipliers()
|
|
994
1009
|
)
|
|
995
1010
|
elif "viewsCount30Days_rank_percentile" in row:
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
row["
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
else 0
|
|
1007
|
-
),
|
|
1011
|
+
# If usage data is missing we set the search ranking multipliers to 1
|
|
1012
|
+
search_ranking_multipliers = (
|
|
1013
|
+
self.search_score(
|
|
1014
|
+
urn=row["urn"],
|
|
1015
|
+
last_update_time=row.get("last_modified_at", 0) or 0,
|
|
1016
|
+
usage_percentile=row.get("viewsCount30Days_rank_percentile", 0)
|
|
1017
|
+
or 0,
|
|
1018
|
+
)
|
|
1019
|
+
if row.get("viewsCount30Days_rank_percentile", 0)
|
|
1020
|
+
else SearchRankingMultipliers()
|
|
1008
1021
|
)
|
|
1009
1022
|
logger.debug(f"Urn: {row['urn']} Score: {search_ranking_multipliers}")
|
|
1010
1023
|
|
|
1011
1024
|
usage_feature = UsageFeaturesClass(
|
|
1012
|
-
queryCountLast30Days=(
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1016
|
-
|
|
1017
|
-
|
|
1018
|
-
|
|
1019
|
-
if "totalSqlQueries" in row and row["totalSqlQueries"]
|
|
1020
|
-
else 0
|
|
1021
|
-
),
|
|
1022
|
-
queryCountRankLast30Days=(
|
|
1023
|
-
int(row["queries_rank"])
|
|
1024
|
-
if "queries_rank" in row and row["queries_rank"] is not None
|
|
1025
|
-
else None
|
|
1026
|
-
),
|
|
1027
|
-
queryCountPercentileLast30Days=(
|
|
1028
|
-
int(row["queries_rank_percentile"])
|
|
1029
|
-
if "queries_rank_percentile" in row
|
|
1030
|
-
and row["queries_rank_percentile"]
|
|
1031
|
-
else 0
|
|
1032
|
-
),
|
|
1025
|
+
queryCountLast30Days=int(row.get("totalSqlQueries", 0) or 0),
|
|
1026
|
+
usageCountLast30Days=int(row.get("totalSqlQueries", 0) or 0),
|
|
1027
|
+
queryCountRankLast30Days=int(row.get("queries_rank"))
|
|
1028
|
+
if row.get("queries_rank")
|
|
1029
|
+
else None,
|
|
1030
|
+
queryCountPercentileLast30Days=row.get("queries_rank_percentile", 0)
|
|
1031
|
+
or 0,
|
|
1033
1032
|
# queryCountPercentileLast30Days=int(
|
|
1034
1033
|
# row["queries_rank_percentile"]) if "queries_rank_percentile" in row and row[
|
|
1035
1034
|
# "queries_rank_percentile"] else 0,
|
|
1036
1035
|
topUsersLast30Days=(
|
|
1037
|
-
list(chain.from_iterable(row
|
|
1038
|
-
if row
|
|
1039
|
-
else None
|
|
1040
|
-
),
|
|
1041
|
-
uniqueUserCountLast30Days=(
|
|
1042
|
-
int(row["distinct_user"]) if row["distinct_user"] else 0
|
|
1043
|
-
),
|
|
1044
|
-
uniqueUserRankLast30Days=(
|
|
1045
|
-
int(row["distinct_user_rank"])
|
|
1046
|
-
if "distinct_user_rank" in row
|
|
1047
|
-
and row["distinct_user_rank"] is not None
|
|
1048
|
-
else None
|
|
1049
|
-
),
|
|
1050
|
-
uniqueUserPercentileLast30Days=(
|
|
1051
|
-
int(row["distinct_user_rank_percentile"])
|
|
1052
|
-
if "distinct_user_rank_percentile" in row
|
|
1053
|
-
and row["distinct_user_rank_percentile"]
|
|
1054
|
-
else 0
|
|
1055
|
-
),
|
|
1056
|
-
writeCountLast30Days=(
|
|
1057
|
-
int(row["write_count"])
|
|
1058
|
-
if "write_count" in row and row["write_count"]
|
|
1059
|
-
else 0
|
|
1060
|
-
if not self.config.disable_write_usage
|
|
1061
|
-
else None
|
|
1062
|
-
),
|
|
1063
|
-
writeCountPercentileLast30Days=(
|
|
1064
|
-
int(row["write_rank_percentile"])
|
|
1065
|
-
if "write_count" in row and row["write_rank_percentile"]
|
|
1066
|
-
else 0
|
|
1067
|
-
if not self.config.disable_write_usage
|
|
1036
|
+
list(chain.from_iterable(row.get("top_users")))
|
|
1037
|
+
if row.get("top_users")
|
|
1068
1038
|
else None
|
|
1069
1039
|
),
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
int(row["viewsTotal"])
|
|
1077
|
-
if "viewsTotal" in row and row["viewsTotal"]
|
|
1078
|
-
else 0
|
|
1040
|
+
uniqueUserCountLast30Days=int(row.get("distinct_user", 0) or 0),
|
|
1041
|
+
uniqueUserRankLast30Days=int(row.get("distinct_user_rank"))
|
|
1042
|
+
if row.get("distinct_user_rank")
|
|
1043
|
+
else None,
|
|
1044
|
+
uniqueUserPercentileLast30Days=int(
|
|
1045
|
+
row.get("distinct_user_rank_percentile", 0) or 0
|
|
1079
1046
|
),
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
|
|
1047
|
+
writeCountLast30Days=int(row.get("write_rank_percentile", 0) or 0)
|
|
1048
|
+
if not self.config.disable_write_usage
|
|
1049
|
+
else None,
|
|
1050
|
+
writeCountPercentileLast30Days=int(
|
|
1051
|
+
row.get("write_rank_percentile", 0) or 0
|
|
1052
|
+
)
|
|
1053
|
+
if not self.config.disable_write_usage
|
|
1054
|
+
else None,
|
|
1055
|
+
writeCountRankLast30Days=int(row.get("write_rank") or 0)
|
|
1056
|
+
if not self.config.disable_write_usage
|
|
1057
|
+
else None,
|
|
1058
|
+
viewCountTotal=int(row.get("viewsTotal", 0) or 0),
|
|
1059
|
+
viewCountLast30Days=int(row.get("viewsCount30Days", 0) or 0),
|
|
1060
|
+
viewCountPercentileLast30Days=int(
|
|
1061
|
+
row.get("viewsCount30Days_rank_percentile", 0) or 0
|
|
1089
1062
|
),
|
|
1090
1063
|
usageSearchScoreMultiplier=search_ranking_multipliers.usageSearchScoreMultiplier,
|
|
1091
1064
|
usageFreshnessScoreMultiplier=search_ranking_multipliers.usageFreshnessScoreMultiplier,
|
|
@@ -1095,11 +1068,7 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
1095
1068
|
|
|
1096
1069
|
yield from self.generate_usage_feature_mcp(row["urn"], usage_feature)
|
|
1097
1070
|
|
|
1098
|
-
if (
|
|
1099
|
-
"siblings" in row
|
|
1100
|
-
and row["siblings"]
|
|
1101
|
-
and self.config.sibling_usage_enabled
|
|
1102
|
-
):
|
|
1071
|
+
if row.get("siblings") and self.config.sibling_usage_enabled:
|
|
1103
1072
|
for sibling in row["siblings"]:
|
|
1104
1073
|
if dbt_platform_regexp.match(sibling):
|
|
1105
1074
|
yield from self.generate_usage_feature_mcp(
|
|
@@ -1114,26 +1083,15 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
1114
1083
|
num += 1
|
|
1115
1084
|
|
|
1116
1085
|
query_usage_features = QueryUsageFeaturesClass(
|
|
1117
|
-
queryCountLast30Days=(
|
|
1118
|
-
int(row["totalSqlQueries"])
|
|
1119
|
-
if "totalSqlQueries" in row and row["totalSqlQueries"]
|
|
1120
|
-
else 0
|
|
1121
|
-
),
|
|
1086
|
+
queryCountLast30Days=int(row.get("totalSqlQueries", 0) or 0),
|
|
1122
1087
|
queryCountTotal=None, # This is not implemented
|
|
1123
|
-
runsPercentileLast30days=(
|
|
1124
|
-
|
|
1125
|
-
if "queries_rank_percentile" in row
|
|
1126
|
-
and row["queries_rank_percentile"]
|
|
1127
|
-
else 0
|
|
1128
|
-
),
|
|
1129
|
-
lastExecutedAt=(
|
|
1130
|
-
int(row["last_modified_at"])
|
|
1131
|
-
if "last_modified_at" in row and row["last_modified_at"]
|
|
1132
|
-
else 0
|
|
1088
|
+
runsPercentileLast30days=int(
|
|
1089
|
+
row.get("queries_rank_percentile", 0) or 0
|
|
1133
1090
|
),
|
|
1091
|
+
lastExecutedAt=int(row.get("last_modified_at", 0)),
|
|
1134
1092
|
topUsersLast30Days=(
|
|
1135
|
-
list(chain.from_iterable(row
|
|
1136
|
-
if row
|
|
1093
|
+
list(chain.from_iterable(row.get("top_users", [])))
|
|
1094
|
+
if row.get("top_users")
|
|
1137
1095
|
else None
|
|
1138
1096
|
),
|
|
1139
1097
|
queryCostLast30Days=None, # Not implemented yet
|
|
@@ -1180,16 +1138,17 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
1180
1138
|
def generate_dashboard_chart_usage(
|
|
1181
1139
|
self, entity_index: str, usage_index: str
|
|
1182
1140
|
) -> polars.LazyFrame:
|
|
1183
|
-
|
|
1141
|
+
entity_schema = {
|
|
1184
1142
|
"entity_urn": polars.Categorical,
|
|
1185
1143
|
"removed": polars.Boolean,
|
|
1186
1144
|
"last_modified_at": polars.Int64,
|
|
1187
1145
|
"siblings": polars.List(polars.String),
|
|
1146
|
+
"combinedSearchRankingMultiplier": polars.Float64,
|
|
1188
1147
|
"isView": polars.Boolean,
|
|
1189
1148
|
}
|
|
1190
1149
|
|
|
1191
|
-
|
|
1192
|
-
schema=
|
|
1150
|
+
entities_df = self.load_data_from_es_to_lf(
|
|
1151
|
+
schema=entity_schema,
|
|
1193
1152
|
index=entity_index,
|
|
1194
1153
|
query=QueryBuilder.get_dataset_entities_query(),
|
|
1195
1154
|
process_function=self.soft_deleted_batch,
|
|
@@ -1220,7 +1179,7 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
1220
1179
|
)
|
|
1221
1180
|
|
|
1222
1181
|
lf = (
|
|
1223
|
-
lf.join(
|
|
1182
|
+
lf.join(entities_df, left_on="urn", right_on="entity_urn", how="inner")
|
|
1224
1183
|
.filter(polars.col("removed") == False) # noqa: E712
|
|
1225
1184
|
.drop(["removed"])
|
|
1226
1185
|
)
|
|
@@ -1268,8 +1227,10 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
1268
1227
|
)
|
|
1269
1228
|
.drop(["first_viewsCount"])
|
|
1270
1229
|
)
|
|
1271
|
-
|
|
1272
|
-
|
|
1230
|
+
views_with_inceremental_sum = views_sum_with_top_users.join(
|
|
1231
|
+
incremental_views_sum, on="urn", how="left"
|
|
1232
|
+
)
|
|
1233
|
+
total_views = views_with_inceremental_sum.with_columns(
|
|
1273
1234
|
polars.when(
|
|
1274
1235
|
polars.col("total_user_count")
|
|
1275
1236
|
.is_null()
|
|
@@ -1280,11 +1241,53 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
1280
1241
|
.alias("viewsCount30Days")
|
|
1281
1242
|
)
|
|
1282
1243
|
|
|
1283
|
-
|
|
1284
|
-
|
|
1244
|
+
total_views_with_rank_and_percentiles = self.gen_rank_and_percentile(
|
|
1245
|
+
total_views, "viewsCount30Days", "urn", "platform", "viewsCount30Days_"
|
|
1246
|
+
).drop(["siblings_right"])
|
|
1247
|
+
|
|
1248
|
+
total_views_with_rank_and_percentiles_with_zeroed_stale_usages = (
|
|
1249
|
+
self.generate_empty_usage_for_stale_entities(
|
|
1250
|
+
entities_df, total_views_with_rank_and_percentiles
|
|
1251
|
+
)
|
|
1285
1252
|
)
|
|
1286
1253
|
|
|
1287
|
-
return
|
|
1254
|
+
return total_views_with_rank_and_percentiles_with_zeroed_stale_usages
|
|
1255
|
+
|
|
1256
|
+
def generate_empty_usage_for_stale_entities(
|
|
1257
|
+
self, entities_lf: polars.LazyFrame, usages_lf: polars.LazyFrame
|
|
1258
|
+
) -> polars.LazyFrame:
|
|
1259
|
+
# We need to merge datasets with existing search scores to make sure we can downrank them if there were no usage in the last n days
|
|
1260
|
+
# We drop last_modified_at to not use it in merge because we are getting last_modified_at from the usage index
|
|
1261
|
+
df_with_search_scores = (
|
|
1262
|
+
entities_lf.filter(
|
|
1263
|
+
polars.col("combinedSearchRankingMultiplier").is_not_null()
|
|
1264
|
+
# We only want to downrank datasets that have a search score multiplier greater than 1. 1 is the minimum score of a dataset
|
|
1265
|
+
.and_(polars.col("combinedSearchRankingMultiplier").ne(1))
|
|
1266
|
+
) # noqa: E712
|
|
1267
|
+
.filter(polars.col("removed") == False) # noqa: E712
|
|
1268
|
+
.drop(["removed"])
|
|
1269
|
+
.drop(["last_modified_at"])
|
|
1270
|
+
# We set this to 0 because we want to downrank datasets that have no usage
|
|
1271
|
+
.with_columns(polars.lit(0).alias("combinedSearchRankingMultiplier"))
|
|
1272
|
+
.rename({"entity_urn": "urn"})
|
|
1273
|
+
)
|
|
1274
|
+
common_fields = list(
|
|
1275
|
+
set(usages_lf.columns).intersection(set(df_with_search_scores.columns))
|
|
1276
|
+
)
|
|
1277
|
+
usages_lf = df_with_search_scores.join(
|
|
1278
|
+
usages_lf, on="urn", how="full", suffix="_right"
|
|
1279
|
+
)
|
|
1280
|
+
# Merge all common fields automatically
|
|
1281
|
+
for common_field in common_fields:
|
|
1282
|
+
right_col = f"{common_field}_right"
|
|
1283
|
+
usages_lf = usages_lf.with_columns(
|
|
1284
|
+
[
|
|
1285
|
+
polars.col(common_field)
|
|
1286
|
+
.fill_null(polars.col(right_col))
|
|
1287
|
+
.alias(common_field)
|
|
1288
|
+
]
|
|
1289
|
+
).drop(right_col)
|
|
1290
|
+
return usages_lf
|
|
1288
1291
|
|
|
1289
1292
|
def generate_query_usage(self) -> polars.LazyFrame:
|
|
1290
1293
|
usage_index = "query_queryusagestatisticsaspect_v1"
|
|
@@ -1365,16 +1368,21 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
1365
1368
|
|
|
1366
1369
|
# Polaris/pandas join merges the join column into one column and that's why we need to filter based on the removed column
|
|
1367
1370
|
lf = (
|
|
1368
|
-
lf.join(datasets_lf, left_on="urn", right_on="entity_urn", how="
|
|
1371
|
+
lf.join(datasets_lf, left_on="urn", right_on="entity_urn", how="left")
|
|
1369
1372
|
.filter(polars.col("removed") == False) # noqa: E712
|
|
1370
1373
|
.drop(["removed"])
|
|
1371
1374
|
)
|
|
1375
|
+
|
|
1372
1376
|
total_queries = lf.group_by("urn", "platform").agg(
|
|
1373
1377
|
polars.col("totalSqlQueries").sum(),
|
|
1374
1378
|
polars.col("last_modified_at").max().alias("last_modified_at"),
|
|
1375
1379
|
polars.col("siblings").first().alias("siblings"),
|
|
1376
1380
|
)
|
|
1377
1381
|
|
|
1382
|
+
total_queries = self.generate_empty_usage_for_stale_entities(
|
|
1383
|
+
datasets_lf, total_queries
|
|
1384
|
+
)
|
|
1385
|
+
|
|
1378
1386
|
top_users = self.generate_top_users(lf)
|
|
1379
1387
|
|
|
1380
1388
|
usage_with_top_users = total_queries.join(top_users, on="urn", how="left")
|
|
@@ -1510,6 +1518,7 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
|
|
|
1510
1518
|
"removed": polars.Boolean,
|
|
1511
1519
|
"last_modified_at": polars.Int64,
|
|
1512
1520
|
"siblings": polars.List(polars.String),
|
|
1521
|
+
"combinedSearchRankingMultiplier": polars.Float64,
|
|
1513
1522
|
"isView": polars.Boolean,
|
|
1514
1523
|
}
|
|
1515
1524
|
|