acryl-datahub-cloud 0.3.8.2rc4__py3-none-any.whl → 0.3.8.2rc5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "acryl-datahub-cloud",
3
- "version": "0.3.8.2rc4",
3
+ "version": "0.3.8.2rc5",
4
4
  "install_requires": [
5
5
  "avro-gen3==0.7.16",
6
6
  "acryl-datahub"
@@ -13,6 +13,7 @@ class QueryBuilder:
13
13
  "removed",
14
14
  "siblings",
15
15
  "typeNames",
16
+ "combinedSearchRankingMultiplier",
16
17
  ]
17
18
  },
18
19
  }
@@ -172,6 +172,15 @@ class DataHubUsageFeatureReportingSourceConfig(
172
172
  description="Flag to enable polars streaming mode.'",
173
173
  )
174
174
 
175
+ # Running the whole pipeline in streaming mode was very unstable in the past.
176
+ # It seems like with the latest version of Polars it is much more stable.
177
+ # This option is only needed here until we are sure that the streaming mode is stable.
178
+ # then we can remove it and control it with the streaming_mode option.
179
+ experimental_full_streaming: bool = Field(
180
+ False,
181
+ description="Flag to enable full streaming mode.'",
182
+ )
183
+
175
184
  disable_write_usage: bool = Field(
176
185
  True,
177
186
  description="Flag to disable write usage statistics collection.'",
@@ -300,6 +309,12 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
300
309
  if "siblings" in doc["_source"] and doc["_source"]["siblings"]
301
310
  else []
302
311
  ),
312
+ "combinedSearchRankingMultiplier": (
313
+ doc["_source"]["combinedSearchRankingMultiplier"]
314
+ if "combinedSearchRankingMultiplier" in doc["_source"]
315
+ and doc["_source"]["combinedSearchRankingMultiplier"]
316
+ else None
317
+ ),
303
318
  "isView": (
304
319
  "View" in doc["_source"]["typeNames"]
305
320
  if "typeNames" in doc["_source"] and doc["_source"]["typeNames"]
@@ -544,9 +559,10 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
544
559
  usageSearchScoreMultiplier=usage_search_score_multiplier,
545
560
  usageFreshnessScoreMultiplier=freshness_factor,
546
561
  customDatahubScoreMultiplier=regexp_factor,
547
- combinedSearchRankingMultiplier=usage_search_score_multiplier
548
- * freshness_factor
549
- * regexp_factor,
562
+ # We make sure the combinedSearchRankingMultiplier is never less than 1
563
+ combinedSearchRankingMultiplier=max(
564
+ 1, (usage_search_score_multiplier * freshness_factor * regexp_factor)
565
+ ),
550
566
  )
551
567
 
552
568
  def load_data_from_es(
@@ -968,7 +984,9 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
968
984
  self, lazy_frame: polars.LazyFrame
969
985
  ) -> Iterable[MetadataWorkUnit]:
970
986
  num = 0
971
- for row in lazy_frame.collect().to_struct():
987
+ for row in lazy_frame.collect(
988
+ streaming=self.config.experimental_full_streaming
989
+ ).to_struct():
972
990
  num += 1
973
991
 
974
992
  if "siblings" in row and row["siblings"]:
@@ -979,113 +997,68 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
979
997
  )
980
998
 
981
999
  if "queries_rank_percentile" in row:
982
- search_ranking_multipliers = self.search_score(
983
- urn=row["urn"],
984
- last_update_time=(
985
- row["last_modified_at"]
986
- if "last_modified_at" in row and row["last_modified_at"]
987
- else 0
988
- ),
989
- usage_percentile=(
990
- row["queries_rank_percentile"]
991
- if row["queries_rank_percentile"]
992
- else 0
993
- ),
1000
+ # If usage data is missing we set the search ranking multipliers to 1
1001
+ search_ranking_multipliers = (
1002
+ self.search_score(
1003
+ urn=row["urn"],
1004
+ last_update_time=row.get("last_modified_at", 0) or 0,
1005
+ usage_percentile=row.get("queries_rank_percentile", 0) or 0,
1006
+ )
1007
+ if row.get("queries_rank_percentile", 0)
1008
+ else SearchRankingMultipliers()
994
1009
  )
995
1010
  elif "viewsCount30Days_rank_percentile" in row:
996
- search_ranking_multipliers = self.search_score(
997
- urn=row["urn"],
998
- last_update_time=(
999
- row["last_modified_at"]
1000
- if "last_modified_at" in row and row["last_modified_at"]
1001
- else 0
1002
- ),
1003
- usage_percentile=(
1004
- row["viewsCount30Days_rank_percentile"]
1005
- if row["viewsCount30Days_rank_percentile"]
1006
- else 0
1007
- ),
1011
+ # If usage data is missing we set the search ranking multipliers to 1
1012
+ search_ranking_multipliers = (
1013
+ self.search_score(
1014
+ urn=row["urn"],
1015
+ last_update_time=row.get("last_modified_at", 0) or 0,
1016
+ usage_percentile=row.get("viewsCount30Days_rank_percentile", 0)
1017
+ or 0,
1018
+ )
1019
+ if row.get("viewsCount30Days_rank_percentile", 0)
1020
+ else SearchRankingMultipliers()
1008
1021
  )
1009
1022
  logger.debug(f"Urn: {row['urn']} Score: {search_ranking_multipliers}")
1010
1023
 
1011
1024
  usage_feature = UsageFeaturesClass(
1012
- queryCountLast30Days=(
1013
- int(row["totalSqlQueries"])
1014
- if "totalSqlQueries" in row and row["totalSqlQueries"]
1015
- else 0
1016
- ),
1017
- usageCountLast30Days=(
1018
- int(row["totalSqlQueries"])
1019
- if "totalSqlQueries" in row and row["totalSqlQueries"]
1020
- else 0
1021
- ),
1022
- queryCountRankLast30Days=(
1023
- int(row["queries_rank"])
1024
- if "queries_rank" in row and row["queries_rank"] is not None
1025
- else None
1026
- ),
1027
- queryCountPercentileLast30Days=(
1028
- int(row["queries_rank_percentile"])
1029
- if "queries_rank_percentile" in row
1030
- and row["queries_rank_percentile"]
1031
- else 0
1032
- ),
1025
+ queryCountLast30Days=int(row.get("totalSqlQueries", 0) or 0),
1026
+ usageCountLast30Days=int(row.get("totalSqlQueries", 0) or 0),
1027
+ queryCountRankLast30Days=int(row.get("queries_rank"))
1028
+ if row.get("queries_rank")
1029
+ else None,
1030
+ queryCountPercentileLast30Days=row.get("queries_rank_percentile", 0)
1031
+ or 0,
1033
1032
  # queryCountPercentileLast30Days=int(
1034
1033
  # row["queries_rank_percentile"]) if "queries_rank_percentile" in row and row[
1035
1034
  # "queries_rank_percentile"] else 0,
1036
1035
  topUsersLast30Days=(
1037
- list(chain.from_iterable(row["top_users"]))
1038
- if row["top_users"]
1039
- else None
1040
- ),
1041
- uniqueUserCountLast30Days=(
1042
- int(row["distinct_user"]) if row["distinct_user"] else 0
1043
- ),
1044
- uniqueUserRankLast30Days=(
1045
- int(row["distinct_user_rank"])
1046
- if "distinct_user_rank" in row
1047
- and row["distinct_user_rank"] is not None
1048
- else None
1049
- ),
1050
- uniqueUserPercentileLast30Days=(
1051
- int(row["distinct_user_rank_percentile"])
1052
- if "distinct_user_rank_percentile" in row
1053
- and row["distinct_user_rank_percentile"]
1054
- else 0
1055
- ),
1056
- writeCountLast30Days=(
1057
- int(row["write_count"])
1058
- if "write_count" in row and row["write_count"]
1059
- else 0
1060
- if not self.config.disable_write_usage
1061
- else None
1062
- ),
1063
- writeCountPercentileLast30Days=(
1064
- int(row["write_rank_percentile"])
1065
- if "write_count" in row and row["write_rank_percentile"]
1066
- else 0
1067
- if not self.config.disable_write_usage
1036
+ list(chain.from_iterable(row.get("top_users")))
1037
+ if row.get("top_users")
1068
1038
  else None
1069
1039
  ),
1070
- writeCountRankLast30Days=(
1071
- int(row["write_rank"])
1072
- if "write_rank" in row and row["write_rank"]
1073
- else None
1074
- ),
1075
- viewCountTotal=(
1076
- int(row["viewsTotal"])
1077
- if "viewsTotal" in row and row["viewsTotal"]
1078
- else 0
1040
+ uniqueUserCountLast30Days=int(row.get("distinct_user", 0) or 0),
1041
+ uniqueUserRankLast30Days=int(row.get("distinct_user_rank"))
1042
+ if row.get("distinct_user_rank")
1043
+ else None,
1044
+ uniqueUserPercentileLast30Days=int(
1045
+ row.get("distinct_user_rank_percentile", 0) or 0
1079
1046
  ),
1080
- viewCountLast30Days=(
1081
- int(row["viewsCount30Days"])
1082
- if "viewsCount30Days" in row and row["viewsCount30Days"]
1083
- else 0
1084
- ),
1085
- viewCountPercentileLast30Days=(
1086
- int(row["viewsCount30Days_rank_percentile"])
1087
- if "viewsCount30Days_rank_percentile" in row
1088
- else 0
1047
+ writeCountLast30Days=int(row.get("write_rank_percentile", 0) or 0)
1048
+ if not self.config.disable_write_usage
1049
+ else None,
1050
+ writeCountPercentileLast30Days=int(
1051
+ row.get("write_rank_percentile", 0) or 0
1052
+ )
1053
+ if not self.config.disable_write_usage
1054
+ else None,
1055
+ writeCountRankLast30Days=int(row.get("write_rank") or 0)
1056
+ if not self.config.disable_write_usage
1057
+ else None,
1058
+ viewCountTotal=int(row.get("viewsTotal", 0) or 0),
1059
+ viewCountLast30Days=int(row.get("viewsCount30Days", 0) or 0),
1060
+ viewCountPercentileLast30Days=int(
1061
+ row.get("viewsCount30Days_rank_percentile", 0) or 0
1089
1062
  ),
1090
1063
  usageSearchScoreMultiplier=search_ranking_multipliers.usageSearchScoreMultiplier,
1091
1064
  usageFreshnessScoreMultiplier=search_ranking_multipliers.usageFreshnessScoreMultiplier,
@@ -1095,11 +1068,7 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
1095
1068
 
1096
1069
  yield from self.generate_usage_feature_mcp(row["urn"], usage_feature)
1097
1070
 
1098
- if (
1099
- "siblings" in row
1100
- and row["siblings"]
1101
- and self.config.sibling_usage_enabled
1102
- ):
1071
+ if row.get("siblings") and self.config.sibling_usage_enabled:
1103
1072
  for sibling in row["siblings"]:
1104
1073
  if dbt_platform_regexp.match(sibling):
1105
1074
  yield from self.generate_usage_feature_mcp(
@@ -1114,26 +1083,15 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
1114
1083
  num += 1
1115
1084
 
1116
1085
  query_usage_features = QueryUsageFeaturesClass(
1117
- queryCountLast30Days=(
1118
- int(row["totalSqlQueries"])
1119
- if "totalSqlQueries" in row and row["totalSqlQueries"]
1120
- else 0
1121
- ),
1086
+ queryCountLast30Days=int(row.get("totalSqlQueries", 0) or 0),
1122
1087
  queryCountTotal=None, # This is not implemented
1123
- runsPercentileLast30days=(
1124
- int(row["queries_rank_percentile"])
1125
- if "queries_rank_percentile" in row
1126
- and row["queries_rank_percentile"]
1127
- else 0
1128
- ),
1129
- lastExecutedAt=(
1130
- int(row["last_modified_at"])
1131
- if "last_modified_at" in row and row["last_modified_at"]
1132
- else 0
1088
+ runsPercentileLast30days=int(
1089
+ row.get("queries_rank_percentile", 0) or 0
1133
1090
  ),
1091
+ lastExecutedAt=int(row.get("last_modified_at", 0)),
1134
1092
  topUsersLast30Days=(
1135
- list(chain.from_iterable(row["top_users"]))
1136
- if row["top_users"]
1093
+ list(chain.from_iterable(row.get("top_users", [])))
1094
+ if row.get("top_users")
1137
1095
  else None
1138
1096
  ),
1139
1097
  queryCostLast30Days=None, # Not implemented yet
@@ -1180,16 +1138,17 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
1180
1138
  def generate_dashboard_chart_usage(
1181
1139
  self, entity_index: str, usage_index: str
1182
1140
  ) -> polars.LazyFrame:
1183
- soft_deleted_schema = {
1141
+ entity_schema = {
1184
1142
  "entity_urn": polars.Categorical,
1185
1143
  "removed": polars.Boolean,
1186
1144
  "last_modified_at": polars.Int64,
1187
1145
  "siblings": polars.List(polars.String),
1146
+ "combinedSearchRankingMultiplier": polars.Float64,
1188
1147
  "isView": polars.Boolean,
1189
1148
  }
1190
1149
 
1191
- soft_deleted_df = self.load_data_from_es_to_lf(
1192
- schema=soft_deleted_schema,
1150
+ entities_df = self.load_data_from_es_to_lf(
1151
+ schema=entity_schema,
1193
1152
  index=entity_index,
1194
1153
  query=QueryBuilder.get_dataset_entities_query(),
1195
1154
  process_function=self.soft_deleted_batch,
@@ -1220,7 +1179,7 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
1220
1179
  )
1221
1180
 
1222
1181
  lf = (
1223
- lf.join(soft_deleted_df, left_on="urn", right_on="entity_urn", how="inner")
1182
+ lf.join(entities_df, left_on="urn", right_on="entity_urn", how="inner")
1224
1183
  .filter(polars.col("removed") == False) # noqa: E712
1225
1184
  .drop(["removed"])
1226
1185
  )
@@ -1268,8 +1227,10 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
1268
1227
  )
1269
1228
  .drop(["first_viewsCount"])
1270
1229
  )
1271
- lf = views_sum_with_top_users.join(incremental_views_sum, on="urn", how="left")
1272
- lf = lf.with_columns(
1230
+ views_with_inceremental_sum = views_sum_with_top_users.join(
1231
+ incremental_views_sum, on="urn", how="left"
1232
+ )
1233
+ total_views = views_with_inceremental_sum.with_columns(
1273
1234
  polars.when(
1274
1235
  polars.col("total_user_count")
1275
1236
  .is_null()
@@ -1280,11 +1241,54 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
1280
1241
  .alias("viewsCount30Days")
1281
1242
  )
1282
1243
 
1283
- lf = self.gen_rank_and_percentile(
1284
- lf, "viewsCount30Days", "urn", "platform", "viewsCount30Days_"
1244
+ total_views_with_rank_and_percentiles = self.gen_rank_and_percentile(
1245
+ total_views, "viewsCount30Days", "urn", "platform", "viewsCount30Days_"
1246
+ ).drop(["siblings_right"])
1247
+
1248
+ total_views_with_rank_and_percentiles_with_zeroed_stale_usages = (
1249
+ self.generate_empty_usage_for_stale_entities(
1250
+ entities_df, total_views_with_rank_and_percentiles
1251
+ )
1285
1252
  )
1286
1253
 
1287
- return lf
1254
+ return total_views_with_rank_and_percentiles_with_zeroed_stale_usages
1255
+
1256
+ def generate_empty_usage_for_stale_entities(
1257
+ self, entities_lf: polars.LazyFrame, usages_lf: polars.LazyFrame
1258
+ ) -> polars.LazyFrame:
1259
+ # We need to merge datasets with existing search scores to make sure we can downrank them if there were no usage in the last n days
1260
+ # We drop last_modified_at to not use it in merge because we are getting last_modified_at from the usage index
1261
+ df_with_search_scores = (
1262
+ entities_lf.filter(
1263
+ polars.col("combinedSearchRankingMultiplier")
1264
+ .is_not_null()
1265
+ # We only want to downrank datasets that have a search score multiplier greater than 1. 1 is the minimum score of a dataset
1266
+ .and_(polars.col("combinedSearchRankingMultiplier").ne(1))
1267
+ ) # noqa: E712
1268
+ .filter(polars.col("removed") == False) # noqa: E712
1269
+ .drop(["removed"])
1270
+ .drop(["last_modified_at"])
1271
+ # We set this to 0 because we want to downrank datasets that have no usage
1272
+ .with_columns(polars.lit(0).alias("combinedSearchRankingMultiplier"))
1273
+ .rename({"entity_urn": "urn"})
1274
+ )
1275
+ common_fields = list(
1276
+ set(usages_lf.columns).intersection(set(df_with_search_scores.columns))
1277
+ )
1278
+ usages_lf = df_with_search_scores.join(
1279
+ usages_lf, on="urn", how="full", suffix="_right"
1280
+ )
1281
+ ## Merge all common fields automatically
1282
+ for common_field in common_fields:
1283
+ right_col = f"{common_field}_right"
1284
+ usages_lf = usages_lf.with_columns(
1285
+ [
1286
+ polars.col(common_field)
1287
+ .fill_null(polars.col(right_col))
1288
+ .alias(common_field)
1289
+ ]
1290
+ ).drop(right_col)
1291
+ return usages_lf
1288
1292
 
1289
1293
  def generate_query_usage(self) -> polars.LazyFrame:
1290
1294
  usage_index = "query_queryusagestatisticsaspect_v1"
@@ -1365,16 +1369,21 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
1365
1369
 
1366
1370
  # Polaris/pandas join merges the join column into one column and that's why we need to filter based on the removed column
1367
1371
  lf = (
1368
- lf.join(datasets_lf, left_on="urn", right_on="entity_urn", how="inner")
1372
+ lf.join(datasets_lf, left_on="urn", right_on="entity_urn", how="left")
1369
1373
  .filter(polars.col("removed") == False) # noqa: E712
1370
1374
  .drop(["removed"])
1371
1375
  )
1376
+
1372
1377
  total_queries = lf.group_by("urn", "platform").agg(
1373
1378
  polars.col("totalSqlQueries").sum(),
1374
1379
  polars.col("last_modified_at").max().alias("last_modified_at"),
1375
1380
  polars.col("siblings").first().alias("siblings"),
1376
1381
  )
1377
1382
 
1383
+ total_queries = self.generate_empty_usage_for_stale_entities(
1384
+ datasets_lf, total_queries
1385
+ )
1386
+
1378
1387
  top_users = self.generate_top_users(lf)
1379
1388
 
1380
1389
  usage_with_top_users = total_queries.join(top_users, on="urn", how="left")
@@ -1510,6 +1519,7 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
1510
1519
  "removed": polars.Boolean,
1511
1520
  "last_modified_at": polars.Int64,
1512
1521
  "siblings": polars.List(polars.String),
1522
+ "combinedSearchRankingMultiplier": polars.Float64,
1513
1523
  "isView": polars.Boolean,
1514
1524
  }
1515
1525
 
@@ -11147,6 +11147,13 @@
11147
11147
  "doc": "The display name of the property. This is the name that will be shown in the UI and can be used to look up the property id."
11148
11148
  },
11149
11149
  {
11150
+ "UrnValidation": {
11151
+ "entityTypes": [
11152
+ "dataType"
11153
+ ],
11154
+ "exist": true,
11155
+ "strict": true
11156
+ },
11150
11157
  "java": {
11151
11158
  "class": "com.linkedin.pegasus2avro.common.urn.Urn"
11152
11159
  },
@@ -11235,6 +11242,13 @@
11235
11242
  "fieldName": "entityTypes"
11236
11243
  }
11237
11244
  },
11245
+ "UrnValidation": {
11246
+ "entityTypes": [
11247
+ "entityType"
11248
+ ],
11249
+ "exist": true,
11250
+ "strict": true
11251
+ },
11238
11252
  "Urn": "Urn",
11239
11253
  "urn_is_array": true,
11240
11254
  "type": {
@@ -23,6 +23,13 @@
23
23
  "doc": "The display name of the property. This is the name that will be shown in the UI and can be used to look up the property id."
24
24
  },
25
25
  {
26
+ "UrnValidation": {
27
+ "entityTypes": [
28
+ "dataType"
29
+ ],
30
+ "exist": true,
31
+ "strict": true
32
+ },
26
33
  "java": {
27
34
  "class": "com.linkedin.pegasus2avro.common.urn.Urn"
28
35
  },
@@ -111,6 +118,13 @@
111
118
  "fieldName": "entityTypes"
112
119
  }
113
120
  },
121
+ "UrnValidation": {
122
+ "entityTypes": [
123
+ "entityType"
124
+ ],
125
+ "exist": true,
126
+ "strict": true
127
+ },
114
128
  "type": {
115
129
  "type": "array",
116
130
  "items": "string"
@@ -1,90 +1,90 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: acryl-datahub-cloud
3
- Version: 0.3.8.2rc4
3
+ Version: 0.3.8.2rc5
4
4
  Requires-Dist: avro-gen3==0.7.16
5
5
  Requires-Dist: acryl-datahub
6
6
  Provides-Extra: datahub-lineage-features
7
+ Requires-Dist: pyarrow; extra == "datahub-lineage-features"
8
+ Requires-Dist: duckdb; extra == "datahub-lineage-features"
7
9
  Requires-Dist: opensearch-py==2.4.2; extra == "datahub-lineage-features"
8
10
  Requires-Dist: pandas; extra == "datahub-lineage-features"
9
11
  Requires-Dist: pydantic<2; extra == "datahub-lineage-features"
10
- Requires-Dist: duckdb; extra == "datahub-lineage-features"
11
- Requires-Dist: pyarrow; extra == "datahub-lineage-features"
12
12
  Provides-Extra: datahub-reporting-forms
13
- Requires-Dist: termcolor==2.5.0; extra == "datahub-reporting-forms"
13
+ Requires-Dist: pyarrow; extra == "datahub-reporting-forms"
14
+ Requires-Dist: duckdb; extra == "datahub-reporting-forms"
15
+ Requires-Dist: boto3; extra == "datahub-reporting-forms"
14
16
  Requires-Dist: pandas; extra == "datahub-reporting-forms"
17
+ Requires-Dist: termcolor==2.5.0; extra == "datahub-reporting-forms"
15
18
  Requires-Dist: pydantic<2; extra == "datahub-reporting-forms"
16
- Requires-Dist: boto3; extra == "datahub-reporting-forms"
17
- Requires-Dist: duckdb; extra == "datahub-reporting-forms"
18
- Requires-Dist: pyarrow; extra == "datahub-reporting-forms"
19
19
  Provides-Extra: datahub-reporting-extract-graph
20
+ Requires-Dist: pyarrow; extra == "datahub-reporting-extract-graph"
21
+ Requires-Dist: duckdb; extra == "datahub-reporting-extract-graph"
22
+ Requires-Dist: boto3; extra == "datahub-reporting-extract-graph"
20
23
  Requires-Dist: opensearch-py==2.4.2; extra == "datahub-reporting-extract-graph"
21
24
  Requires-Dist: pandas; extra == "datahub-reporting-extract-graph"
22
25
  Requires-Dist: pydantic<2; extra == "datahub-reporting-extract-graph"
23
- Requires-Dist: boto3; extra == "datahub-reporting-extract-graph"
24
- Requires-Dist: duckdb; extra == "datahub-reporting-extract-graph"
25
- Requires-Dist: pyarrow; extra == "datahub-reporting-extract-graph"
26
26
  Provides-Extra: datahub-reporting-extract-sql
27
+ Requires-Dist: pyarrow; extra == "datahub-reporting-extract-sql"
28
+ Requires-Dist: duckdb; extra == "datahub-reporting-extract-sql"
29
+ Requires-Dist: boto3; extra == "datahub-reporting-extract-sql"
27
30
  Requires-Dist: pandas; extra == "datahub-reporting-extract-sql"
28
31
  Requires-Dist: pydantic<2; extra == "datahub-reporting-extract-sql"
29
- Requires-Dist: boto3; extra == "datahub-reporting-extract-sql"
30
- Requires-Dist: duckdb; extra == "datahub-reporting-extract-sql"
31
- Requires-Dist: pyarrow; extra == "datahub-reporting-extract-sql"
32
32
  Provides-Extra: datahub-usage-reporting
33
- Requires-Dist: termcolor==2.5.0; extra == "datahub-usage-reporting"
34
- Requires-Dist: pandas; extra == "datahub-usage-reporting"
35
- Requires-Dist: elasticsearch==7.13.4; extra == "datahub-usage-reporting"
33
+ Requires-Dist: pyarrow; extra == "datahub-usage-reporting"
36
34
  Requires-Dist: duckdb; extra == "datahub-usage-reporting"
37
- Requires-Dist: pyarrow<=18.0.0; extra == "datahub-usage-reporting"
35
+ Requires-Dist: numpy<2; extra == "datahub-usage-reporting"
36
+ Requires-Dist: scipy<=1.14.1; extra == "datahub-usage-reporting"
37
+ Requires-Dist: elasticsearch==7.13.4; extra == "datahub-usage-reporting"
38
+ Requires-Dist: pandas; extra == "datahub-usage-reporting"
38
39
  Requires-Dist: boto3; extra == "datahub-usage-reporting"
39
- Requires-Dist: pyarrow; extra == "datahub-usage-reporting"
40
+ Requires-Dist: polars==1.23.0; extra == "datahub-usage-reporting"
40
41
  Requires-Dist: opensearch-py==2.4.2; extra == "datahub-usage-reporting"
41
- Requires-Dist: numpy<2; extra == "datahub-usage-reporting"
42
+ Requires-Dist: pyarrow<=18.0.0; extra == "datahub-usage-reporting"
43
+ Requires-Dist: termcolor==2.5.0; extra == "datahub-usage-reporting"
42
44
  Requires-Dist: pydantic<2; extra == "datahub-usage-reporting"
43
- Requires-Dist: scipy<=1.14.1; extra == "datahub-usage-reporting"
44
- Requires-Dist: polars==1.19.0; extra == "datahub-usage-reporting"
45
45
  Provides-Extra: datahub-metadata-sharing
46
46
  Requires-Dist: tenacity; extra == "datahub-metadata-sharing"
47
47
  Provides-Extra: acryl-cs-issues
48
- Requires-Dist: zenpy; extra == "acryl-cs-issues"
49
- Requires-Dist: jinja2; extra == "acryl-cs-issues"
50
48
  Requires-Dist: openai; extra == "acryl-cs-issues"
51
49
  Requires-Dist: slack-sdk; extra == "acryl-cs-issues"
50
+ Requires-Dist: jinja2; extra == "acryl-cs-issues"
51
+ Requires-Dist: zenpy; extra == "acryl-cs-issues"
52
52
  Provides-Extra: all
53
- Requires-Dist: tenacity; extra == "all"
54
- Requires-Dist: termcolor==2.5.0; extra == "all"
55
- Requires-Dist: jinja2; extra == "all"
56
- Requires-Dist: elasticsearch==7.13.4; extra == "all"
57
- Requires-Dist: pyarrow<=18.0.0; extra == "all"
58
- Requires-Dist: boto3; extra == "all"
53
+ Requires-Dist: pyarrow; extra == "all"
59
54
  Requires-Dist: numpy<2; extra == "all"
60
55
  Requires-Dist: scipy<=1.14.1; extra == "all"
56
+ Requires-Dist: elasticsearch==7.13.4; extra == "all"
57
+ Requires-Dist: tenacity; extra == "all"
58
+ Requires-Dist: slack-sdk; extra == "all"
61
59
  Requires-Dist: zenpy; extra == "all"
60
+ Requires-Dist: boto3; extra == "all"
61
+ Requires-Dist: jinja2; extra == "all"
62
+ Requires-Dist: pydantic<2; extra == "all"
63
+ Requires-Dist: duckdb; extra == "all"
62
64
  Requires-Dist: pandas; extra == "all"
63
65
  Requires-Dist: openai; extra == "all"
64
- Requires-Dist: duckdb; extra == "all"
65
- Requires-Dist: pyarrow; extra == "all"
66
+ Requires-Dist: polars==1.23.0; extra == "all"
66
67
  Requires-Dist: opensearch-py==2.4.2; extra == "all"
67
- Requires-Dist: pydantic<2; extra == "all"
68
- Requires-Dist: polars==1.19.0; extra == "all"
69
- Requires-Dist: slack-sdk; extra == "all"
68
+ Requires-Dist: pyarrow<=18.0.0; extra == "all"
69
+ Requires-Dist: termcolor==2.5.0; extra == "all"
70
70
  Provides-Extra: dev
71
+ Requires-Dist: pyarrow; extra == "dev"
72
+ Requires-Dist: duckdb; extra == "dev"
73
+ Requires-Dist: numpy<2; extra == "dev"
74
+ Requires-Dist: scipy<=1.14.1; extra == "dev"
75
+ Requires-Dist: elasticsearch==7.13.4; extra == "dev"
71
76
  Requires-Dist: tenacity; extra == "dev"
72
- Requires-Dist: termcolor==2.5.0; extra == "dev"
77
+ Requires-Dist: slack-sdk; extra == "dev"
78
+ Requires-Dist: zenpy; extra == "dev"
73
79
  Requires-Dist: pandas; extra == "dev"
74
- Requires-Dist: jinja2; extra == "dev"
75
80
  Requires-Dist: openai; extra == "dev"
76
- Requires-Dist: elasticsearch==7.13.4; extra == "dev"
77
- Requires-Dist: zenpy; extra == "dev"
78
- Requires-Dist: duckdb; extra == "dev"
79
- Requires-Dist: pyarrow<=18.0.0; extra == "dev"
80
81
  Requires-Dist: boto3; extra == "dev"
81
- Requires-Dist: acryl-datahub[dev]; extra == "dev"
82
- Requires-Dist: pyarrow; extra == "dev"
82
+ Requires-Dist: polars==1.23.0; extra == "dev"
83
83
  Requires-Dist: opensearch-py==2.4.2; extra == "dev"
84
- Requires-Dist: numpy<2; extra == "dev"
84
+ Requires-Dist: pyarrow<=18.0.0; extra == "dev"
85
+ Requires-Dist: jinja2; extra == "dev"
86
+ Requires-Dist: acryl-datahub[dev]; extra == "dev"
87
+ Requires-Dist: termcolor==2.5.0; extra == "dev"
85
88
  Requires-Dist: pydantic<2; extra == "dev"
86
- Requires-Dist: scipy<=1.14.1; extra == "dev"
87
- Requires-Dist: polars==1.19.0; extra == "dev"
88
- Requires-Dist: slack-sdk; extra == "dev"
89
89
  Dynamic: provides-extra
90
90
  Dynamic: requires-dist
@@ -1,5 +1,5 @@
1
1
  acryl_datahub_cloud/__init__.py,sha256=axrMXkn0RW80YmuZgwUP_YQImcv6L28duZLWnW-gaNM,521
2
- acryl_datahub_cloud/_codegen_config.json,sha256=XRL-98-B46wLKuqWqW9fTNbwj6FLkaF4WAqOqczAe4A,557
2
+ acryl_datahub_cloud/_codegen_config.json,sha256=bYIuxLP1GvkXG4fP20J2YVCVGNOvJ1EEVRFVbP6BgnA,557
3
3
  acryl_datahub_cloud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  acryl_datahub_cloud/acryl_cs_issues/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  acryl_datahub_cloud/acryl_cs_issues/acryl_customer.py,sha256=uFjR2SqGS34y09-S9WqOqNGY8nOq6ptGf4y9781i8Z4,25230
@@ -27,16 +27,16 @@ acryl_datahub_cloud/datahub_restore/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQ
27
27
  acryl_datahub_cloud/datahub_restore/do_restore.py,sha256=Pjd3qE1lYXltKhpkI1KvO-7fM_ksnisFvmJ8bqGcT8Q,2284
28
28
  acryl_datahub_cloud/datahub_restore/source.py,sha256=i4NJ3os4mzAnOHnmR-OaHxVUe4rMeLle2mucCT_-8yA,5339
29
29
  acryl_datahub_cloud/datahub_usage_reporting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
- acryl_datahub_cloud/datahub_usage_reporting/query_builder.py,sha256=ft2PRg_YHTYWnZOVznOEx3PSSqAfiaH0YJtkPcS-NJM,5837
30
+ acryl_datahub_cloud/datahub_usage_reporting/query_builder.py,sha256=hBHJRbsPJBeVpbu_QgCrFHQAR0cxAep2fGYkbFPahpc,5892
31
31
  acryl_datahub_cloud/datahub_usage_reporting/usage_feature_patch_builder.py,sha256=gR9neaHfi0JMQmAKMlgJCEuZIni7cdPFApGOKa5Pn4Y,14406
32
- acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py,sha256=t6dW-sfvjpqAO4Y1cwRl8xkKSZ5B1CmlD-EEhIIZq3g,64277
32
+ acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py,sha256=qJNrXu6fwPePenSCLMkiRSGyQgROcju54fULVPPq3VA,65875
33
33
  acryl_datahub_cloud/elasticsearch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
34
  acryl_datahub_cloud/elasticsearch/config.py,sha256=6QNBOmoQZu1cJrDIBZyvZgdQt0QLfP82hdQkPtP-4HE,1220
35
35
  acryl_datahub_cloud/elasticsearch/graph_service.py,sha256=K4ykcSMxlrhlDrchhte3vEb1mcw8QkOmdIFSVSX4OVU,2788
36
36
  acryl_datahub_cloud/lineage_features/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
37
  acryl_datahub_cloud/lineage_features/source.py,sha256=Edve1oBoR87RTloAfjAuxgULlMI_HNSFuQfbiVjkac4,6412
38
38
  acryl_datahub_cloud/metadata/__init__.py,sha256=AjhXPjI6cnpdcrBRrE5gOWo15vv2TTl2ctU4UAnUN7A,238
39
- acryl_datahub_cloud/metadata/schema.avsc,sha256=VZhHa4KP2Rr2W-DTKfombxwD0PM9dum7_Th_F0Za3B0,1002817
39
+ acryl_datahub_cloud/metadata/schema.avsc,sha256=IptmiRUVlqmv1wQRvvzt2xfBOpx8-X1SpZpVt_6RSKY,1003121
40
40
  acryl_datahub_cloud/metadata/schema_classes.py,sha256=-lEIbtHuajGDV9tseTy-OVhAFt76U99_-QHibq8DGk8,1388729
41
41
  acryl_datahub_cloud/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
42
42
  acryl_datahub_cloud/metadata/_urns/urn_defs.py,sha256=UB7a-SusVpQfxgyBiUZ4uaHn8xIhIYOPybOMbhhUSds,125081
@@ -375,7 +375,7 @@ acryl_datahub_cloud/metadata/schemas/SourceCode.avsc,sha256=tUgo2rczO5x1fxw3fYNW
375
375
  acryl_datahub_cloud/metadata/schemas/Status.avsc,sha256=rPZSXSJdwnNywqNx2qll8cdt54aYgI-YUbRr3GK7h78,522
376
376
  acryl_datahub_cloud/metadata/schemas/StorageFeatures.avsc,sha256=F5LFc4P05YrKReB8ZsxJNXSLg2FJaE7vk63NM4GN0dE,1752
377
377
  acryl_datahub_cloud/metadata/schemas/StructuredProperties.avsc,sha256=N0NNDrkqbIgEHrb5uz1ynwZh3mb_ICVK7tDcnBLMfjI,4032
378
- acryl_datahub_cloud/metadata/schemas/StructuredPropertyDefinition.avsc,sha256=yLmH1SNHL4c7J6aIA5GBrc1rI3aAyWPlT47yYFnZZYk,12295
378
+ acryl_datahub_cloud/metadata/schemas/StructuredPropertyDefinition.avsc,sha256=xxtbACqH6OpJgW1gtP6lswkQnG__J_esH7HKCOdqUNs,12571
379
379
  acryl_datahub_cloud/metadata/schemas/StructuredPropertyKey.avsc,sha256=RpAH8fW-64C6yVU8_D1h5vYeg8fNp5t2S6VLpOEcMZM,649
380
380
  acryl_datahub_cloud/metadata/schemas/StructuredPropertySettings.avsc,sha256=EDNlXfT1TqogfulCanIc-nuYO9ZxRFOGzD9tl3ZJdB8,3732
381
381
  acryl_datahub_cloud/metadata/schemas/SubTypes.avsc,sha256=bhXbzK020zDyQno97Xp05vmoMeZ82IGu2jz7pWDo3RQ,655
@@ -398,8 +398,8 @@ acryl_datahub_cloud/metadata/schemas/VersionSetKey.avsc,sha256=psjGNNcFua3Zs9Xlh
398
398
  acryl_datahub_cloud/metadata/schemas/VersionSetProperties.avsc,sha256=yrhhVNioD11nFlDO7IfUbxAQjhA9Tr-4wnAYH5I9W74,1172
399
399
  acryl_datahub_cloud/metadata/schemas/ViewProperties.avsc,sha256=3HhcbH5493dJUnEUtFMYMVfbYQ52aDedm5L4j77Nym4,1032
400
400
  acryl_datahub_cloud/metadata/schemas/__init__.py,sha256=uvLNC3VyCkWA_v8e9FdA1leFf46NFKDD0AajCfihepI,581
401
- acryl_datahub_cloud-0.3.8.2rc4.dist-info/METADATA,sha256=3rzD51KpcP4vW9YooklUMG6dszc5Y8lTQ0Mf5PaeS8I,4432
402
- acryl_datahub_cloud-0.3.8.2rc4.dist-info/WHEEL,sha256=nn6H5-ilmfVryoAQl3ZQ2l8SH5imPWFpm1A5FgEuFV4,91
403
- acryl_datahub_cloud-0.3.8.2rc4.dist-info/entry_points.txt,sha256=HpypFu4cwe0kT3zXFcqbOG-QTNjzYaV6NiCX0Pgy9LI,1086
404
- acryl_datahub_cloud-0.3.8.2rc4.dist-info/top_level.txt,sha256=EwgCxfX-DzJANwxj-Mx_j4TOfAFhmc_FgMbRPzWsoZs,20
405
- acryl_datahub_cloud-0.3.8.2rc4.dist-info/RECORD,,
401
+ acryl_datahub_cloud-0.3.8.2rc5.dist-info/METADATA,sha256=xBnAcZP3jeFENrzm2fvLnm35Fc7Y6Nsrj3hVc5Tq7Wo,4432
402
+ acryl_datahub_cloud-0.3.8.2rc5.dist-info/WHEEL,sha256=jB7zZ3N9hIM9adW7qlTAyycLYW9npaWKLRzaoVcLKcM,91
403
+ acryl_datahub_cloud-0.3.8.2rc5.dist-info/entry_points.txt,sha256=HpypFu4cwe0kT3zXFcqbOG-QTNjzYaV6NiCX0Pgy9LI,1086
404
+ acryl_datahub_cloud-0.3.8.2rc5.dist-info/top_level.txt,sha256=EwgCxfX-DzJANwxj-Mx_j4TOfAFhmc_FgMbRPzWsoZs,20
405
+ acryl_datahub_cloud-0.3.8.2rc5.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.8.1)
2
+ Generator: setuptools (75.8.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5