acryl-datahub-cloud 0.3.9.1__py3-none-any.whl → 0.3.9.2rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub-cloud might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "acryl-datahub-cloud",
3
- "version": "0.3.9.1",
3
+ "version": "0.3.9.2rc1",
4
4
  "install_requires": [
5
5
  "avro-gen3==0.7.16",
6
6
  "acryl-datahub"
@@ -286,41 +286,53 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
286
286
  def soft_deleted_batch(self, results: Iterable) -> Iterable[Dict]:
287
287
  with PerfTimer() as timer:
288
288
  for doc in results:
289
- yield {
290
- "entity_urn": doc["_source"]["urn"],
291
- "last_modified_at": (
292
- doc["_source"]["lastModifiedAt"]
293
- if "lastModifiedAt" in doc["_source"]
294
- and doc["_source"]["lastModifiedAt"]
295
- else (
289
+ try:
290
+ if "urn" not in doc["_source"]:
291
+ logger.warning(f"Urn not found in ES doc {doc}. Skipping...")
292
+ continue
293
+
294
+ yield {
295
+ "entity_urn": doc["_source"]["urn"],
296
+ "last_modified_at": (
296
297
  doc["_source"]["lastModifiedAt"]
297
298
  if "lastModifiedAt" in doc["_source"]
298
299
  and doc["_source"]["lastModifiedAt"]
300
+ else (
301
+ doc["_source"]["lastModifiedAt"]
302
+ if "lastModifiedAt" in doc["_source"]
303
+ and doc["_source"]["lastModifiedAt"]
304
+ else None
305
+ )
306
+ ),
307
+ "removed": (
308
+ doc["_source"]["removed"]
309
+ if "removed" in doc["_source"] and doc["_source"]["removed"]
310
+ else False
311
+ ),
312
+ "siblings": (
313
+ doc["_source"]["siblings"]
314
+ if "siblings" in doc["_source"]
315
+ and doc["_source"]["siblings"]
316
+ else []
317
+ ),
318
+ "combinedSearchRankingMultiplier": (
319
+ doc["_source"]["combinedSearchRankingMultiplier"]
320
+ if "combinedSearchRankingMultiplier" in doc["_source"]
321
+ and doc["_source"]["combinedSearchRankingMultiplier"]
299
322
  else None
300
- )
301
- ),
302
- "removed": (
303
- doc["_source"]["removed"]
304
- if "removed" in doc["_source"] and doc["_source"]["removed"]
305
- else False
306
- ),
307
- "siblings": (
308
- doc["_source"]["siblings"]
309
- if "siblings" in doc["_source"] and doc["_source"]["siblings"]
310
- else []
311
- ),
312
- "combinedSearchRankingMultiplier": (
313
- doc["_source"]["combinedSearchRankingMultiplier"]
314
- if "combinedSearchRankingMultiplier" in doc["_source"]
315
- and doc["_source"]["combinedSearchRankingMultiplier"]
316
- else None
317
- ),
318
- "isView": (
319
- "View" in doc["_source"]["typeNames"]
320
- if "typeNames" in doc["_source"] and doc["_source"]["typeNames"]
321
- else False
322
- ),
323
- }
323
+ ),
324
+ "isView": (
325
+ "View" in doc["_source"]["typeNames"]
326
+ if "typeNames" in doc["_source"]
327
+ and doc["_source"]["typeNames"]
328
+ else False
329
+ ),
330
+ }
331
+ except KeyError as e:
332
+ logger.warning(
333
+ f"Unable to process row {doc} from ES. It failed with {e}"
334
+ )
335
+ continue
324
336
  time_taken = timer.elapsed_seconds()
325
337
  logger.info(f"Entities processing took {time_taken:.3f} seconds")
326
338
 
@@ -477,27 +489,38 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
477
489
  def process_batch(self, results: Iterable) -> Iterable[Dict]:
478
490
  with PerfTimer() as timer:
479
491
  for doc in results:
492
+ if "urn" not in doc["_source"]:
493
+ logger.warning(f"Urn not found in ES doc {doc}. Skipping...")
494
+ continue
480
495
  match = re.match(platform_regexp, doc["_source"]["urn"])
481
496
  if match:
482
497
  platform = match.group(1)
483
498
  self.report.dataset_platforms_count[platform] += 1
484
499
  else:
485
- logger.warning("Platform not found in urn. Skipping...")
500
+ logger.warning(
501
+ f"Platform not found in urn {doc['_source']['urn']} in doc {doc}. Skipping..."
502
+ )
486
503
  continue
487
504
 
488
- yield {
489
- "timestampMillis": doc["_source"]["timestampMillis"],
490
- "urn": doc["_source"]["urn"],
491
- "eventGranularity": doc["_source"]["eventGranularity"],
492
- "totalSqlQueries": doc["_source"]["totalSqlQueries"],
493
- "uniqueUserCount": doc["_source"]["uniqueUserCount"],
494
- "userCounts": (
495
- doc["_source"]["event"]["userCounts"]
496
- if "userCounts" in doc["_source"]["event"]
497
- else None
498
- ),
499
- "platform": platform,
500
- }
505
+ try:
506
+ yield {
507
+ "timestampMillis": doc["_source"]["timestampMillis"],
508
+ "urn": doc["_source"]["urn"],
509
+ "eventGranularity": doc["_source"].get("eventGranularity"),
510
+ "totalSqlQueries": doc["_source"].get("totalSqlQueries", 0),
511
+ "uniqueUserCount": doc["_source"].get("uniqueUserCount", 0),
512
+ "userCounts": (
513
+ doc["_source"]["event"]["userCounts"]
514
+ if "userCounts" in doc["_source"]["event"]
515
+ else None
516
+ ),
517
+ "platform": platform,
518
+ }
519
+ except KeyError as e:
520
+ logger.warning(
521
+ f"Unable to process row {doc} from ES. The error was: {e}"
522
+ )
523
+ continue
501
524
 
502
525
  time_taken = timer.elapsed_seconds()
503
526
  logger.info(f"DatasetUsage processing took {time_taken:.3f} seconds")
@@ -1088,7 +1111,7 @@ class DataHubUsageFeatureReportingSource(StatefulIngestionSourceBase):
1088
1111
  runsPercentileLast30days=int(
1089
1112
  row.get("queries_rank_percentile", 0) or 0
1090
1113
  ),
1091
- lastExecutedAt=int(row.get("last_modified_at", 0)),
1114
+ lastExecutedAt=int(row.get("last_modified_at", 0) or 0),
1092
1115
  topUsersLast30Days=(
1093
1116
  list(chain.from_iterable(row.get("top_users", [])))
1094
1117
  if row.get("top_users")
@@ -1,90 +1,90 @@
1
- Metadata-Version: 2.2
1
+ Metadata-Version: 2.4
2
2
  Name: acryl-datahub-cloud
3
- Version: 0.3.9.1
3
+ Version: 0.3.9.2rc1
4
4
  Requires-Dist: avro-gen3==0.7.16
5
5
  Requires-Dist: acryl-datahub
6
6
  Provides-Extra: datahub-lineage-features
7
- Requires-Dist: pydantic<2; extra == "datahub-lineage-features"
8
- Requires-Dist: pyarrow; extra == "datahub-lineage-features"
9
- Requires-Dist: pandas; extra == "datahub-lineage-features"
10
7
  Requires-Dist: duckdb; extra == "datahub-lineage-features"
8
+ Requires-Dist: pyarrow; extra == "datahub-lineage-features"
11
9
  Requires-Dist: opensearch-py==2.4.2; extra == "datahub-lineage-features"
10
+ Requires-Dist: pandas; extra == "datahub-lineage-features"
11
+ Requires-Dist: pydantic<2; extra == "datahub-lineage-features"
12
12
  Provides-Extra: datahub-reporting-forms
13
- Requires-Dist: pydantic<2; extra == "datahub-reporting-forms"
14
- Requires-Dist: pyarrow; extra == "datahub-reporting-forms"
15
- Requires-Dist: pandas; extra == "datahub-reporting-forms"
16
- Requires-Dist: termcolor==2.5.0; extra == "datahub-reporting-forms"
17
13
  Requires-Dist: duckdb; extra == "datahub-reporting-forms"
14
+ Requires-Dist: termcolor==2.5.0; extra == "datahub-reporting-forms"
15
+ Requires-Dist: pyarrow; extra == "datahub-reporting-forms"
18
16
  Requires-Dist: boto3; extra == "datahub-reporting-forms"
17
+ Requires-Dist: pandas; extra == "datahub-reporting-forms"
18
+ Requires-Dist: pydantic<2; extra == "datahub-reporting-forms"
19
19
  Provides-Extra: datahub-reporting-extract-graph
20
- Requires-Dist: pydantic<2; extra == "datahub-reporting-extract-graph"
21
- Requires-Dist: opensearch-py==2.4.2; extra == "datahub-reporting-extract-graph"
22
- Requires-Dist: pyarrow; extra == "datahub-reporting-extract-graph"
23
- Requires-Dist: pandas; extra == "datahub-reporting-extract-graph"
24
20
  Requires-Dist: duckdb; extra == "datahub-reporting-extract-graph"
21
+ Requires-Dist: pyarrow; extra == "datahub-reporting-extract-graph"
22
+ Requires-Dist: opensearch-py==2.4.2; extra == "datahub-reporting-extract-graph"
25
23
  Requires-Dist: boto3; extra == "datahub-reporting-extract-graph"
24
+ Requires-Dist: pandas; extra == "datahub-reporting-extract-graph"
25
+ Requires-Dist: pydantic<2; extra == "datahub-reporting-extract-graph"
26
26
  Provides-Extra: datahub-reporting-extract-sql
27
- Requires-Dist: pydantic<2; extra == "datahub-reporting-extract-sql"
28
- Requires-Dist: pyarrow; extra == "datahub-reporting-extract-sql"
29
- Requires-Dist: pandas; extra == "datahub-reporting-extract-sql"
30
27
  Requires-Dist: duckdb; extra == "datahub-reporting-extract-sql"
28
+ Requires-Dist: pyarrow; extra == "datahub-reporting-extract-sql"
31
29
  Requires-Dist: boto3; extra == "datahub-reporting-extract-sql"
30
+ Requires-Dist: pandas; extra == "datahub-reporting-extract-sql"
31
+ Requires-Dist: pydantic<2; extra == "datahub-reporting-extract-sql"
32
32
  Provides-Extra: datahub-usage-reporting
33
- Requires-Dist: pydantic<2; extra == "datahub-usage-reporting"
33
+ Requires-Dist: duckdb; extra == "datahub-usage-reporting"
34
34
  Requires-Dist: pyarrow; extra == "datahub-usage-reporting"
35
+ Requires-Dist: boto3; extra == "datahub-usage-reporting"
35
36
  Requires-Dist: pandas; extra == "datahub-usage-reporting"
36
37
  Requires-Dist: elasticsearch==7.13.4; extra == "datahub-usage-reporting"
37
- Requires-Dist: termcolor==2.5.0; extra == "datahub-usage-reporting"
38
- Requires-Dist: boto3; extra == "datahub-usage-reporting"
38
+ Requires-Dist: pydantic<2; extra == "datahub-usage-reporting"
39
39
  Requires-Dist: numpy<2; extra == "datahub-usage-reporting"
40
- Requires-Dist: scipy<=1.14.1; extra == "datahub-usage-reporting"
40
+ Requires-Dist: termcolor==2.5.0; extra == "datahub-usage-reporting"
41
+ Requires-Dist: opensearch-py==2.4.2; extra == "datahub-usage-reporting"
41
42
  Requires-Dist: pyarrow<=18.0.0; extra == "datahub-usage-reporting"
43
+ Requires-Dist: scipy<=1.14.1; extra == "datahub-usage-reporting"
42
44
  Requires-Dist: polars==1.23.0; extra == "datahub-usage-reporting"
43
- Requires-Dist: duckdb; extra == "datahub-usage-reporting"
44
- Requires-Dist: opensearch-py==2.4.2; extra == "datahub-usage-reporting"
45
45
  Provides-Extra: datahub-metadata-sharing
46
46
  Requires-Dist: tenacity; extra == "datahub-metadata-sharing"
47
47
  Provides-Extra: acryl-cs-issues
48
- Requires-Dist: slack-sdk; extra == "acryl-cs-issues"
49
48
  Requires-Dist: zenpy; extra == "acryl-cs-issues"
50
- Requires-Dist: jinja2; extra == "acryl-cs-issues"
51
49
  Requires-Dist: openai; extra == "acryl-cs-issues"
50
+ Requires-Dist: jinja2; extra == "acryl-cs-issues"
51
+ Requires-Dist: slack-sdk; extra == "acryl-cs-issues"
52
52
  Provides-Extra: all
53
- Requires-Dist: pydantic<2; extra == "all"
54
- Requires-Dist: tenacity; extra == "all"
53
+ Requires-Dist: duckdb; extra == "all"
55
54
  Requires-Dist: pyarrow; extra == "all"
56
- Requires-Dist: elasticsearch==7.13.4; extra == "all"
57
- Requires-Dist: termcolor==2.5.0; extra == "all"
58
55
  Requires-Dist: boto3; extra == "all"
56
+ Requires-Dist: elasticsearch==7.13.4; extra == "all"
57
+ Requires-Dist: slack-sdk; extra == "all"
58
+ Requires-Dist: pydantic<2; extra == "all"
59
59
  Requires-Dist: numpy<2; extra == "all"
60
+ Requires-Dist: jinja2; extra == "all"
60
61
  Requires-Dist: scipy<=1.14.1; extra == "all"
62
+ Requires-Dist: polars==1.23.0; extra == "all"
63
+ Requires-Dist: tenacity; extra == "all"
61
64
  Requires-Dist: openai; extra == "all"
62
- Requires-Dist: pyarrow<=18.0.0; extra == "all"
63
- Requires-Dist: duckdb; extra == "all"
64
- Requires-Dist: jinja2; extra == "all"
65
65
  Requires-Dist: pandas; extra == "all"
66
- Requires-Dist: slack-sdk; extra == "all"
67
- Requires-Dist: zenpy; extra == "all"
68
- Requires-Dist: polars==1.23.0; extra == "all"
66
+ Requires-Dist: termcolor==2.5.0; extra == "all"
69
67
  Requires-Dist: opensearch-py==2.4.2; extra == "all"
68
+ Requires-Dist: zenpy; extra == "all"
69
+ Requires-Dist: pyarrow<=18.0.0; extra == "all"
70
70
  Provides-Extra: dev
71
- Requires-Dist: pydantic<2; extra == "dev"
71
+ Requires-Dist: duckdb; extra == "dev"
72
72
  Requires-Dist: tenacity; extra == "dev"
73
73
  Requires-Dist: pyarrow; extra == "dev"
74
- Requires-Dist: jinja2; extra == "dev"
74
+ Requires-Dist: openai; extra == "dev"
75
+ Requires-Dist: boto3; extra == "dev"
75
76
  Requires-Dist: pandas; extra == "dev"
76
- Requires-Dist: termcolor==2.5.0; extra == "dev"
77
77
  Requires-Dist: elasticsearch==7.13.4; extra == "dev"
78
- Requires-Dist: boto3; extra == "dev"
79
- Requires-Dist: pyarrow<=18.0.0; extra == "dev"
80
78
  Requires-Dist: slack-sdk; extra == "dev"
79
+ Requires-Dist: pydantic<2; extra == "dev"
81
80
  Requires-Dist: numpy<2; extra == "dev"
82
- Requires-Dist: scipy<=1.14.1; extra == "dev"
83
- Requires-Dist: openai; extra == "dev"
84
- Requires-Dist: acryl-datahub[dev]; extra == "dev"
81
+ Requires-Dist: termcolor==2.5.0; extra == "dev"
82
+ Requires-Dist: opensearch-py==2.4.2; extra == "dev"
85
83
  Requires-Dist: polars==1.23.0; extra == "dev"
84
+ Requires-Dist: jinja2; extra == "dev"
85
+ Requires-Dist: pyarrow<=18.0.0; extra == "dev"
86
86
  Requires-Dist: zenpy; extra == "dev"
87
- Requires-Dist: duckdb; extra == "dev"
88
- Requires-Dist: opensearch-py==2.4.2; extra == "dev"
87
+ Requires-Dist: acryl-datahub[dev]; extra == "dev"
88
+ Requires-Dist: scipy<=1.14.1; extra == "dev"
89
89
  Dynamic: provides-extra
90
90
  Dynamic: requires-dist
@@ -1,5 +1,5 @@
1
1
  acryl_datahub_cloud/__init__.py,sha256=axrMXkn0RW80YmuZgwUP_YQImcv6L28duZLWnW-gaNM,521
2
- acryl_datahub_cloud/_codegen_config.json,sha256=qPIBM0LZX6zaqmH8BPrAYXG9Y84mWGJsm2uLTf2YrXE,554
2
+ acryl_datahub_cloud/_codegen_config.json,sha256=cN73Nk_5TbZZ6BomWWCcuZo764z8JxKyC--gJZhzFPk,557
3
3
  acryl_datahub_cloud/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  acryl_datahub_cloud/acryl_cs_issues/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  acryl_datahub_cloud/acryl_cs_issues/acryl_customer.py,sha256=2pAA7GrRfVUvVCxPAgxi4BVgpwq9F_bvVWfjtkjAHVg,25231
@@ -30,7 +30,7 @@ acryl_datahub_cloud/datahub_restore/source.py,sha256=i4NJ3os4mzAnOHnmR-OaHxVUe4r
30
30
  acryl_datahub_cloud/datahub_usage_reporting/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
31
  acryl_datahub_cloud/datahub_usage_reporting/query_builder.py,sha256=hBHJRbsPJBeVpbu_QgCrFHQAR0cxAep2fGYkbFPahpc,5892
32
32
  acryl_datahub_cloud/datahub_usage_reporting/usage_feature_patch_builder.py,sha256=SOYl3xaZbWoY-V9oanLgzMTSCUB2AAayL3sCVGlymXY,14448
33
- acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py,sha256=zOqNcorv4kCaLQQy1W2jDB6CDb-qypLmIO9N7HvHf8M,65798
33
+ acryl_datahub_cloud/datahub_usage_reporting/usage_feature_reporter.py,sha256=vBVDN8h_5P59Weqln0hDOJOJbHrN3_1AdzAKVf3iF2o,66943
34
34
  acryl_datahub_cloud/elasticsearch/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
35
  acryl_datahub_cloud/elasticsearch/config.py,sha256=6QNBOmoQZu1cJrDIBZyvZgdQt0QLfP82hdQkPtP-4HE,1220
36
36
  acryl_datahub_cloud/elasticsearch/graph_service.py,sha256=K4ykcSMxlrhlDrchhte3vEb1mcw8QkOmdIFSVSX4OVU,2788
@@ -408,8 +408,8 @@ acryl_datahub_cloud/metadata/schemas/VersionSetKey.avsc,sha256=psjGNNcFua3Zs9Xlh
408
408
  acryl_datahub_cloud/metadata/schemas/VersionSetProperties.avsc,sha256=fxNxEMxGdUDi_-T0sd6KJks5BWEo5AzboQxpZYKLbiQ,1434
409
409
  acryl_datahub_cloud/metadata/schemas/ViewProperties.avsc,sha256=3HhcbH5493dJUnEUtFMYMVfbYQ52aDedm5L4j77Nym4,1032
410
410
  acryl_datahub_cloud/metadata/schemas/__init__.py,sha256=uvLNC3VyCkWA_v8e9FdA1leFf46NFKDD0AajCfihepI,581
411
- acryl_datahub_cloud-0.3.9.1.dist-info/METADATA,sha256=4e5-52xPHWsanBGgSkRyXKjVG6uP0wloX2dGcQU_73k,4429
412
- acryl_datahub_cloud-0.3.9.1.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
413
- acryl_datahub_cloud-0.3.9.1.dist-info/entry_points.txt,sha256=oXC4Hq3nxUKi9LR5NwsY17q6anLmYJKbapTx9Ij6vuc,1205
414
- acryl_datahub_cloud-0.3.9.1.dist-info/top_level.txt,sha256=EwgCxfX-DzJANwxj-Mx_j4TOfAFhmc_FgMbRPzWsoZs,20
415
- acryl_datahub_cloud-0.3.9.1.dist-info/RECORD,,
411
+ acryl_datahub_cloud-0.3.9.2rc1.dist-info/METADATA,sha256=a1BczUnfAkNmFA5DgZud5xM5kJlOBaF2LmL0bgff9yo,4432
412
+ acryl_datahub_cloud-0.3.9.2rc1.dist-info/WHEEL,sha256=CmyFI0kx5cdEMTLiONQRbGQwjIoR1aIYB7eCAQ4KPJ0,91
413
+ acryl_datahub_cloud-0.3.9.2rc1.dist-info/entry_points.txt,sha256=oXC4Hq3nxUKi9LR5NwsY17q6anLmYJKbapTx9Ij6vuc,1205
414
+ acryl_datahub_cloud-0.3.9.2rc1.dist-info/top_level.txt,sha256=EwgCxfX-DzJANwxj-Mx_j4TOfAFhmc_FgMbRPzWsoZs,20
415
+ acryl_datahub_cloud-0.3.9.2rc1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (76.1.0)
2
+ Generator: setuptools (78.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5