acryl-datahub 0.15.0rc9__py3-none-any.whl → 0.15.0rc11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,4 +1,4 @@
1
- datahub/__init__.py,sha256=-XVob0PhZLue6g_4duveVmhOndcfh_8ifCuVB_YBxkk,574
1
+ datahub/__init__.py,sha256=RtQFMiAfUcKAAt_1AITPz1UOKRx2gIW0yLbLYUEChqU,575
2
2
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
3
3
  datahub/entrypoints.py,sha256=3-qSfXAx3Z0FEkBV5tlO8fQr4xk4ySeDRMVTpS5Xd6A,7793
4
4
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -61,7 +61,7 @@ datahub/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
61
  datahub/cli/check_cli.py,sha256=9dXNyzZayHeoFjwFjLkMVyx6DiCZfeESyI-sYtGA6bE,12850
62
62
  datahub/cli/cli_utils.py,sha256=xMEK4tmoX2f_5BihxWrApOkayVLwhpPvMUU842x6FsI,13111
63
63
  datahub/cli/config_utils.py,sha256=yuXw7RzpRY5x_-MAoqWbv46qUkIeRNAJL4_OeJpYdBE,4879
64
- datahub/cli/delete_cli.py,sha256=YEbPDFcMtbIHDE0yNB9opfVI5r8f7kFOnfyc0rlyIqU,20906
64
+ datahub/cli/delete_cli.py,sha256=Z7iXNr4ZMmghCnldU8laK4SwTNrhQEEnnUH_TeaBKog,21838
65
65
  datahub/cli/docker_check.py,sha256=rED4wHXqxcQ_qNFyIgFEZ85BHT9ZTE5YC-oUKqbRqi0,9432
66
66
  datahub/cli/docker_cli.py,sha256=QGoWFp8ZZsXOSMbgu0Q4snMmMmtP3epWAN-fYglUNEc,36491
67
67
  datahub/cli/env_utils.py,sha256=RQzjg4JE29hjPt4v7p-RuqoOr99w8E3DBHWiN2Sm7T4,252
@@ -97,7 +97,7 @@ datahub/configuration/git.py,sha256=s55eUHxKqVZgtVsISaDyS-1F4iZBiybbjYsjbp5LU5o,
97
97
  datahub/configuration/import_resolver.py,sha256=b4Ie9L7knN1LALEVMxTcNFSklDD6CVE-4Ipy4ZYhNYA,369
98
98
  datahub/configuration/json_loader.py,sha256=vIDnjwXWi9yHDO8KW64EupOzOb_sspehGCD7xGHzg84,302
99
99
  datahub/configuration/kafka.py,sha256=MlIwpd5FFyOyjdDXW_X9JTLNk7f988sPMgevkcZYVgI,2579
100
- datahub/configuration/kafka_consumer_config.py,sha256=14UWK6kTAnKTgHM43aeWjg67fddACsSLzhO6wgp6cm4,1175
100
+ datahub/configuration/kafka_consumer_config.py,sha256=LivsObTt9yC3WoGnslJbF_x4ojfNdxMIMEhb8vvJfcA,2133
101
101
  datahub/configuration/pattern_utils.py,sha256=Q5IB9RfWOOo5FvRVBU7XkhiwHCxSQ1NTMfUlWtWI9qc,699
102
102
  datahub/configuration/pydantic_migration_helpers.py,sha256=4C_COAVZ5iJ8yxcWNgXZNWsY7ULogICNZ368oNF7zWg,1462
103
103
  datahub/configuration/source_common.py,sha256=68LZOuB23zSEcfgQJE1wZQnyYQHVVnEZK3Sniv_nEQs,2107
@@ -201,7 +201,7 @@ datahub/ingestion/source/nifi.py,sha256=ttsjZ9aRUvINmewvKFIQD8Rwa4jcl35WFG-F-jPG
201
201
  datahub/ingestion/source/openapi.py,sha256=3ea2ORz1cuq4e7L2hSjxG9Cw3__pVoJ5UNYTJS3EnKU,17386
202
202
  datahub/ingestion/source/openapi_parser.py,sha256=1_68wHWe_SzWYEyC1YVDw9vxoadKjW1yv8DecvyIhwY,13606
203
203
  datahub/ingestion/source/preset.py,sha256=eq7h1qKs8nfSBVot1ofN-YgZhw_rzq8DG4cKOGfDHko,3948
204
- datahub/ingestion/source/pulsar.py,sha256=bKtBzBTPuKwdTN4nSs7M0FYfA-q6FDc554W2NhnuWEY,19475
204
+ datahub/ingestion/source/pulsar.py,sha256=H8XJC7xIX8Kdkd7006PxllAGVO_Pjza8Xx9VUBOvpPc,19827
205
205
  datahub/ingestion/source/redash.py,sha256=E-a14X19zppPun7_-S-pZ2lRiw1-68QiT-jL7bDzG10,32057
206
206
  datahub/ingestion/source/salesforce.py,sha256=S6LSM6mzl8-zKbrJPoINhM1SCpYfM244Xb74pbEI-J0,31792
207
207
  datahub/ingestion/source/source_registry.py,sha256=a2mLjJPLkSI-gYCTb_7U7Jo4D8jGknNQ_yScPIihXFk,1208
@@ -301,7 +301,7 @@ datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=EAak3hJpe75WZSgz6wP
301
301
  datahub/ingestion/source/fivetran/fivetran_query.py,sha256=vLrTj7e-0NxZ2U4bWTB57pih42WirqPlUvwtIRfStlQ,5275
302
302
  datahub/ingestion/source/gc/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
303
303
  datahub/ingestion/source/gc/datahub_gc.py,sha256=f6Erj3KfD0Hx3ydwL5MUVCZgFzS9c6U2Pkr54JLIUOA,12394
304
- datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=rGIwUKPlNu2XR0YT8DuJGg2pPGIr3MM-YDa5Slo2vNY,14470
304
+ datahub/ingestion/source/gc/dataprocess_cleanup.py,sha256=GOdLVYYfmmHV7Xtr-cwCTiQiRSVwq80Nu0EmARyoEeo,15323
305
305
  datahub/ingestion/source/gc/execution_request_cleanup.py,sha256=cHJmxz4NmA7VjTX2iGEo3wZ_SDrjC_rCQcnRxKgfUVI,8713
306
306
  datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py,sha256=_tms5AqNAJRDRzQmyN_VydzXbdME2lkvTwa5u1La5z8,7353
307
307
  datahub/ingestion/source/gcs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -331,7 +331,7 @@ datahub/ingestion/source/looker/looker_constant.py,sha256=GMKYtNXlpojPxa9azridKf
331
331
  datahub/ingestion/source/looker/looker_dataclasses.py,sha256=ULWLFWsV2cKmTuOFavD8QjEBmnXmvjyr8RbUB62DwJQ,12178
332
332
  datahub/ingestion/source/looker/looker_file_loader.py,sha256=c1ewDrIb9VJg1o-asbwX9gL83kgL01vIETzzbmZIhmw,4267
333
333
  datahub/ingestion/source/looker/looker_lib_wrapper.py,sha256=QTTCW-rPNUoazQG_sTJbCARXJzQ7NKS-XKURp2AAWls,11106
334
- datahub/ingestion/source/looker/looker_liquid_tag.py,sha256=GxK-NkP0Evzv_se3l5f-z-HZZNQRjYr9py_h0T6ReHY,2902
334
+ datahub/ingestion/source/looker/looker_liquid_tag.py,sha256=mO4G4MNA4YZFvZaDBpdiJ2vP3irC82kY34RdaK4Pbfs,3100
335
335
  datahub/ingestion/source/looker/looker_query_model.py,sha256=N0jBbFruiCIIGT6sJn6tNeppeQ78KGTkOwTLirhxFNc,2144
336
336
  datahub/ingestion/source/looker/looker_source.py,sha256=AByQxWVfOBqOtZPaR_cw9SB-tFZtfppiKRkFSbcK1GA,65346
337
337
  datahub/ingestion/source/looker/looker_template_language.py,sha256=EG4ZfVZ0x53lgaYh2ohzL4ZCy9KsX0TA51XqCmsCd2Q,14328
@@ -485,7 +485,7 @@ datahub/ingestion/source/state_provider/datahub_ingestion_checkpointing_provider
485
485
  datahub/ingestion/source/state_provider/file_ingestion_checkpointing_provider.py,sha256=xsH7Ao_05VTjqpkzLkhdf5B1ULMzFoD8vkJJIJU9w-U,4077
486
486
  datahub/ingestion/source/state_provider/state_provider_registry.py,sha256=SVq4mIyGNmLXE9OZx1taOiNPqDoQp03-Ot9rYnB5F3k,401
487
487
  datahub/ingestion/source/tableau/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
488
- datahub/ingestion/source/tableau/tableau.py,sha256=LgsVZPMRiJiyyB9-ljj53T8WGUyBSiQwByW_1TSuxC4,130417
488
+ datahub/ingestion/source/tableau/tableau.py,sha256=AFlDng8EfvBvZL692hMf_sfzGwpHpUU6FW_ElR4uitQ,131551
489
489
  datahub/ingestion/source/tableau/tableau_common.py,sha256=Dy_2pvkPucZJsG_LvQZLlxNEkjh-yOXHlZ4jurq9opM,26069
490
490
  datahub/ingestion/source/tableau/tableau_constant.py,sha256=nWElhtDo5kj5mWivZFmtVF_4Ugw0-EatBYWyDVzu5hE,2501
491
491
  datahub/ingestion/source/unity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -974,8 +974,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
974
974
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
975
975
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
976
976
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
977
- acryl_datahub-0.15.0rc9.dist-info/METADATA,sha256=lTHIKVVIg5N9xzphcWZ_fhOb0yL1MEx4lRidtr2GvJU,172484
978
- acryl_datahub-0.15.0rc9.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
979
- acryl_datahub-0.15.0rc9.dist-info/entry_points.txt,sha256=Yj0PWB0LQOq4Rj2fyR6ETx4BUGw4TOcNL0ZNoAZ9kQg,9504
980
- acryl_datahub-0.15.0rc9.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
981
- acryl_datahub-0.15.0rc9.dist-info/RECORD,,
977
+ acryl_datahub-0.15.0rc11.dist-info/METADATA,sha256=0GnLLvLM_Fvdw4XNusJLRrDZ0vemeu_B0ftz8XQXmbs,174408
978
+ acryl_datahub-0.15.0rc11.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
979
+ acryl_datahub-0.15.0rc11.dist-info/entry_points.txt,sha256=Yj0PWB0LQOq4Rj2fyR6ETx4BUGw4TOcNL0ZNoAZ9kQg,9504
980
+ acryl_datahub-0.15.0rc11.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
981
+ acryl_datahub-0.15.0rc11.dist-info/RECORD,,
datahub/__init__.py CHANGED
@@ -3,7 +3,7 @@ import warnings
3
3
 
4
4
  # Published at https://pypi.org/project/acryl-datahub/.
5
5
  __package_name__ = "acryl-datahub"
6
- __version__ = "0.15.0rc9"
6
+ __version__ = "0.15.0rc11"
7
7
 
8
8
 
9
9
  def is_dev_mode() -> bool:
datahub/cli/delete_cli.py CHANGED
@@ -214,14 +214,47 @@ def references(urn: str, dry_run: bool, force: bool) -> None:
214
214
 
215
215
 
216
216
  @delete.command()
217
- @click.option("--urn", required=True, type=str, help="the urn of the entity")
218
- def undo_by_filter(urn: str) -> None:
217
+ @click.option("--urn", required=False, type=str, help="the urn of the entity")
218
+ @click.option(
219
+ "-p",
220
+ "--platform",
221
+ required=False,
222
+ type=str,
223
+ help="Platform filter (e.g. snowflake)",
224
+ )
225
+ @click.option(
226
+ "-b",
227
+ "--batch-size",
228
+ required=False,
229
+ default=3000,
230
+ type=int,
231
+ help="Batch size when querying for entities to un-soft delete."
232
+ "Maximum 10000. Large batch sizes may cause timeouts.",
233
+ )
234
+ def undo_by_filter(
235
+ urn: Optional[str], platform: Optional[str], batch_size: int
236
+ ) -> None:
219
237
  """
220
- Undo a soft deletion of an entity
238
+ Undo soft deletion by filters
221
239
  """
222
240
  graph = get_default_graph()
223
241
  logger.info(f"Using {graph}")
224
- graph.set_soft_delete_status(urn=urn, delete=False)
242
+ if urn:
243
+ graph.set_soft_delete_status(urn=urn, delete=False)
244
+ else:
245
+ urns = list(
246
+ graph.get_urns_by_filter(
247
+ platform=platform,
248
+ query="*",
249
+ status=RemovedStatusFilter.ONLY_SOFT_DELETED,
250
+ batch_size=batch_size,
251
+ )
252
+ )
253
+ logger.info(f"Going to un-soft delete {len(urns)} urns")
254
+ urns_iter = progressbar.progressbar(urns, redirect_stdout=True)
255
+ for urn in urns_iter:
256
+ assert urn
257
+ graph.set_soft_delete_status(urn=urn, delete=False)
225
258
 
226
259
 
227
260
  @delete.command(no_args_is_help=True)
@@ -1,3 +1,4 @@
1
+ import inspect
1
2
  import logging
2
3
  from typing import Any, Dict, Optional
3
4
 
@@ -34,5 +35,34 @@ class CallableConsumerConfig:
34
35
  "oauth_cb must be a string representing python function reference "
35
36
  "in the format <python-module>:<function-name>."
36
37
  )
38
+
39
+ call_back_fn = import_path(call_back)
40
+ self._validate_call_back_fn_signature(call_back_fn)
41
+
37
42
  # Set the callback
38
- self._config[CallableConsumerConfig.CALLBACK_ATTRIBUTE] = import_path(call_back)
43
+ self._config[CallableConsumerConfig.CALLBACK_ATTRIBUTE] = call_back_fn
44
+
45
+ def _validate_call_back_fn_signature(self, call_back_fn: Any) -> None:
46
+ sig = inspect.signature(call_back_fn)
47
+
48
+ num_positional_args = len(
49
+ [
50
+ param
51
+ for param in sig.parameters.values()
52
+ if param.kind
53
+ in (
54
+ inspect.Parameter.POSITIONAL_ONLY,
55
+ inspect.Parameter.POSITIONAL_OR_KEYWORD,
56
+ )
57
+ and param.default == inspect.Parameter.empty
58
+ ]
59
+ )
60
+
61
+ has_variadic_args = any(
62
+ param.kind == inspect.Parameter.VAR_POSITIONAL
63
+ for param in sig.parameters.values()
64
+ )
65
+
66
+ assert num_positional_args == 1 or (
67
+ has_variadic_args and num_positional_args <= 1
68
+ ), "oauth_cb function must accept single positional argument."
@@ -208,22 +208,28 @@ class DataProcessCleanup:
208
208
  dpis = []
209
209
  start = 0
210
210
  while True:
211
- job_query_result = self.ctx.graph.execute_graphql(
212
- DATA_PROCESS_INSTANCES_QUERY,
213
- {"dataJobUrn": job_urn, "start": start, "count": batch_size},
214
- )
215
- job_data = job_query_result.get("dataJob")
216
- if not job_data:
217
- raise ValueError(f"Error getting job {job_urn}")
218
-
219
- runs_data = job_data.get("runs")
220
- if not runs_data:
221
- raise ValueError(f"Error getting runs for {job_urn}")
222
-
223
- runs = runs_data.get("runs")
224
- dpis.extend(runs)
225
- start += batch_size
226
- if len(runs) < batch_size:
211
+ try:
212
+ job_query_result = self.ctx.graph.execute_graphql(
213
+ DATA_PROCESS_INSTANCES_QUERY,
214
+ {"dataJobUrn": job_urn, "start": start, "count": batch_size},
215
+ )
216
+ job_data = job_query_result.get("dataJob")
217
+ if not job_data:
218
+ logger.error(f"Error getting job {job_urn}")
219
+ break
220
+
221
+ runs_data = job_data.get("runs")
222
+ if not runs_data:
223
+ logger.error(f"Error getting runs for {job_urn}")
224
+ break
225
+
226
+ runs = runs_data.get("runs")
227
+ dpis.extend(runs)
228
+ start += batch_size
229
+ if len(runs) < batch_size:
230
+ break
231
+ except Exception as e:
232
+ logger.error(f"Exception while fetching DPIs for job {job_urn}: {e}")
227
233
  break
228
234
  return dpis
229
235
 
@@ -243,8 +249,12 @@ class DataProcessCleanup:
243
249
  futures[future] = dpi
244
250
 
245
251
  for future in as_completed(futures):
246
- deleted_count_last_n += 1
247
- futures[future]["deleted"] = True
252
+ try:
253
+ future.result()
254
+ deleted_count_last_n += 1
255
+ futures[future]["deleted"] = True
256
+ except Exception as e:
257
+ logger.error(f"Exception while deleting DPI: {e}")
248
258
 
249
259
  if deleted_count_last_n % self.config.batch_size == 0:
250
260
  logger.info(f"Deleted {deleted_count_last_n} DPIs from {job.urn}")
@@ -279,7 +289,7 @@ class DataProcessCleanup:
279
289
  dpis = self.fetch_dpis(job.urn, self.config.batch_size)
280
290
  dpis.sort(
281
291
  key=lambda x: x["created"]["time"]
282
- if x["created"] and x["created"]["time"]
292
+ if "created" in x and "time" in x["created"]
283
293
  else 0,
284
294
  reverse=True,
285
295
  )
@@ -314,15 +324,23 @@ class DataProcessCleanup:
314
324
  if dpi.get("deleted"):
315
325
  continue
316
326
 
317
- if dpi["created"]["time"] < retention_time * 1000:
327
+ if (
328
+ "created" not in dpi
329
+ or "time" not in dpi["created"]
330
+ or dpi["created"]["time"] < retention_time * 1000
331
+ ):
318
332
  future = executor.submit(
319
333
  self.delete_entity, dpi["urn"], "dataprocessInstance"
320
334
  )
321
335
  futures[future] = dpi
322
336
 
323
337
  for future in as_completed(futures):
324
- deleted_count_retention += 1
325
- futures[future]["deleted"] = True
338
+ try:
339
+ future.result()
340
+ deleted_count_retention += 1
341
+ futures[future]["deleted"] = True
342
+ except Exception as e:
343
+ logger.error(f"Exception while deleting DPI: {e}")
326
344
 
327
345
  if deleted_count_retention % self.config.batch_size == 0:
328
346
  logger.info(
@@ -378,8 +396,11 @@ class DataProcessCleanup:
378
396
  dataFlows[flow.urn] = flow
379
397
 
380
398
  scroll_id: Optional[str] = None
399
+ previous_scroll_id: Optional[str] = None
400
+
381
401
  dataJobs: Dict[str, List[DataJobEntity]] = defaultdict(list)
382
402
  deleted_jobs: int = 0
403
+
383
404
  while True:
384
405
  result = self.ctx.graph.execute_graphql(
385
406
  DATAJOB_QUERY,
@@ -426,9 +447,11 @@ class DataProcessCleanup:
426
447
  else:
427
448
  dataJobs[datajob_entity.flow_urn].append(datajob_entity)
428
449
 
429
- if not scroll_id:
450
+ if not scroll_id or previous_scroll_id == scroll_id:
430
451
  break
431
452
 
453
+ previous_scroll_id = scroll_id
454
+
432
455
  logger.info(f"Deleted {deleted_jobs} DataJobs")
433
456
  # Delete empty dataflows if needed
434
457
  if self.config.delete_empty_data_flows:
@@ -443,4 +466,5 @@ class DataProcessCleanup:
443
466
  if deleted_jobs % self.config.batch_size == 0:
444
467
  logger.info(f"Deleted {deleted_data_flows} DataFlows")
445
468
  logger.info(f"Deleted {deleted_data_flows} DataFlows")
469
+
446
470
  return []
@@ -4,6 +4,7 @@ from typing import ClassVar, Optional, TextIO
4
4
  from liquid import Environment
5
5
  from liquid.ast import Node
6
6
  from liquid.context import Context
7
+ from liquid.filter import string_filter
7
8
  from liquid.parse import expect, get_parser
8
9
  from liquid.stream import TokenStream
9
10
  from liquid.tag import Tag
@@ -81,12 +82,18 @@ class ConditionTag(Tag):
81
82
  custom_tags = [ConditionTag]
82
83
 
83
84
 
85
+ @string_filter
86
+ def sql_quote_filter(variable: str) -> str:
87
+ return f"'{variable}'"
88
+
89
+
84
90
  @lru_cache(maxsize=1)
85
91
  def _create_env() -> Environment:
86
- env: Environment = Environment()
92
+ env: Environment = Environment(strict_filters=False)
87
93
  # register tag. One time activity
88
94
  for custom_tag in custom_tags:
89
95
  env.add_tag(custom_tag)
96
+ env.add_filter("sql_quote", sql_quote_filter)
90
97
  return env
91
98
 
92
99
 
@@ -78,7 +78,17 @@ class PulsarSchema:
78
78
  def __init__(self, schema):
79
79
  self.schema_version = schema.get("version")
80
80
 
81
- avro_schema = json.loads(schema.get("data"))
81
+ schema_data = schema.get("data")
82
+ if not schema_data:
83
+ logger.warning("Schema data is empty or None. Using default empty schema.")
84
+ schema_data = "{}"
85
+
86
+ try:
87
+ avro_schema = json.loads(schema_data)
88
+ except json.JSONDecodeError as e:
89
+ logger.error(f"Invalid JSON schema: {schema_data}. Error: {str(e)}")
90
+ avro_schema = {}
91
+
82
92
  self.schema_name = avro_schema.get("namespace") + "." + avro_schema.get("name")
83
93
  self.schema_description = avro_schema.get("doc")
84
94
  self.schema_type = schema.get("type")
@@ -68,6 +68,7 @@ from datahub.ingestion.api.source import (
68
68
  CapabilityReport,
69
69
  MetadataWorkUnitProcessor,
70
70
  Source,
71
+ StructuredLogLevel,
71
72
  TestableSource,
72
73
  TestConnectionReport,
73
74
  )
@@ -289,16 +290,12 @@ class TableauConnectionConfig(ConfigModel):
289
290
  server.auth.sign_in(authentication)
290
291
  return server
291
292
  except ServerResponseError as e:
293
+ message = f"Unable to login (invalid/expired credentials or missing permissions): {str(e)}"
292
294
  if isinstance(authentication, PersonalAccessTokenAuth):
293
295
  # Docs on token expiry in Tableau:
294
296
  # https://help.tableau.com/current/server/en-us/security_personal_access_tokens.htm#token-expiry
295
- logger.info(
296
- "Error authenticating with Tableau. Note that Tableau personal access tokens "
297
- "expire if not used for 15 days or if over 1 year old"
298
- )
299
- raise ValueError(
300
- f"Unable to login (invalid/expired credentials or missing permissions): {str(e)}"
301
- ) from e
297
+ message = f"Error authenticating with Tableau. Note that Tableau personal access tokens expire if not used for 15 days or if over 1 year old: {str(e)}"
298
+ raise ValueError(message) from e
302
299
  except Exception as e:
303
300
  raise ValueError(
304
301
  f"Unable to login (check your Tableau connection and credentials): {str(e)}"
@@ -700,6 +697,7 @@ class TableauSource(StatefulIngestionSourceBase, TestableSource):
700
697
  config=self.config,
701
698
  ctx=self.ctx,
702
699
  site=site,
700
+ site_id=site.id,
703
701
  report=self.report,
704
702
  server=self.server,
705
703
  platform=self.platform,
@@ -707,11 +705,19 @@ class TableauSource(StatefulIngestionSourceBase, TestableSource):
707
705
  logger.info(f"Ingesting assets of site '{site.content_url}'.")
708
706
  yield from site_source.ingest_tableau_site()
709
707
  else:
710
- site = self.server.sites.get_by_id(self.server.site_id)
708
+ site = None
709
+ with self.report.report_exc(
710
+ title="Unable to fetch site details. Site hierarchy may be incomplete and external urls may be missing.",
711
+ message="This usually indicates missing permissions. Ensure that you have all necessary permissions.",
712
+ level=StructuredLogLevel.WARN,
713
+ ):
714
+ site = self.server.sites.get_by_id(self.server.site_id)
715
+
711
716
  site_source = TableauSiteSource(
712
717
  config=self.config,
713
718
  ctx=self.ctx,
714
719
  site=site,
720
+ site_id=self.server.site_id,
715
721
  report=self.report,
716
722
  server=self.server,
717
723
  platform=self.platform,
@@ -722,6 +728,7 @@ class TableauSource(StatefulIngestionSourceBase, TestableSource):
722
728
  title="Failed to Retrieve Tableau Metadata",
723
729
  message="Unable to retrieve metadata from tableau.",
724
730
  context=str(md_exception),
731
+ exc=md_exception,
725
732
  )
726
733
 
727
734
  def close(self) -> None:
@@ -743,7 +750,8 @@ class TableauSiteSource:
743
750
  self,
744
751
  config: TableauConfig,
745
752
  ctx: PipelineContext,
746
- site: SiteItem,
753
+ site: Optional[SiteItem],
754
+ site_id: Optional[str],
747
755
  report: TableauSourceReport,
748
756
  server: Server,
749
757
  platform: str,
@@ -752,9 +760,16 @@ class TableauSiteSource:
752
760
  self.report = report
753
761
  self.server: Server = server
754
762
  self.ctx: PipelineContext = ctx
755
- self.site: SiteItem = site
756
763
  self.platform = platform
757
764
 
765
+ self.site: Optional[SiteItem] = site
766
+ if site_id is not None:
767
+ self.site_id: str = site_id
768
+ else:
769
+ assert self.site is not None, "site or site_id is required"
770
+ assert self.site.id is not None, "site_id is required when site is provided"
771
+ self.site_id = self.site.id
772
+
758
773
  self.database_tables: Dict[str, DatabaseTable] = {}
759
774
  self.tableau_stat_registry: Dict[str, UsageStat] = {}
760
775
  self.tableau_project_registry: Dict[str, TableauProject] = {}
@@ -808,7 +823,7 @@ class TableauSiteSource:
808
823
  def _re_authenticate(self):
809
824
  tableau_auth: Union[
810
825
  TableauAuth, PersonalAccessTokenAuth
811
- ] = self.config.get_tableau_auth(self.site.content_url)
826
+ ] = self.config.get_tableau_auth(self.site_id)
812
827
  self.server.auth.sign_in(tableau_auth)
813
828
 
814
829
  @property
@@ -826,6 +841,7 @@ class TableauSiteSource:
826
841
  if not view.id:
827
842
  continue
828
843
  self.tableau_stat_registry[view.id] = UsageStat(view_count=view.total_views)
844
+ logger.info(f"Got Tableau stats for {len(self.tableau_stat_registry)} assets")
829
845
  logger.debug("Tableau stats %s", self.tableau_stat_registry)
830
846
 
831
847
  def _populate_database_server_hostname_map(self) -> None:
@@ -876,7 +892,7 @@ class TableauSiteSource:
876
892
  ancestors = [cur_proj.name]
877
893
  while cur_proj.parent_id is not None:
878
894
  if cur_proj.parent_id not in all_project_map:
879
- self.report.report_warning(
895
+ self.report.warning(
880
896
  "project-issue",
881
897
  f"Parent project {cur_proj.parent_id} not found. We need Site Administrator Explorer permissions.",
882
898
  )
@@ -974,8 +990,11 @@ class TableauSiteSource:
974
990
  self.datasource_project_map[ds.id] = ds.project_id
975
991
  except Exception as e:
976
992
  self.report.get_all_datasources_query_failed = True
977
- logger.info(f"Get all datasources query failed due to error {e}")
978
- logger.debug("Error stack trace", exc_info=True)
993
+ self.report.warning(
994
+ title="Unexpected Query Error",
995
+ message="Get all datasources query failed due to error",
996
+ exc=e,
997
+ )
979
998
 
980
999
  def _init_workbook_registry(self) -> None:
981
1000
  if self.server is None:
@@ -1141,7 +1160,6 @@ class TableauSiteSource:
1141
1160
  )
1142
1161
 
1143
1162
  if node_limit_errors:
1144
- logger.debug(f"Node Limit Error. query_data {query_data}")
1145
1163
  self.report.warning(
1146
1164
  title="Tableau Data Exceed Predefined Limit",
1147
1165
  message="The numbers of record in result set exceeds a predefined limit. Increase the tableau "
@@ -1257,9 +1275,10 @@ class TableauSiteSource:
1257
1275
  wrk_id: Optional[str] = workbook.get(c.ID)
1258
1276
  prj_name: Optional[str] = workbook.get(c.PROJECT_NAME)
1259
1277
 
1260
- logger.debug(
1261
- f"Skipping workbook {wrk_name}({wrk_id}) as it is project {prj_name}({project_luid}) not "
1262
- f"present in project registry"
1278
+ self.report.warning(
1279
+ title="Skipping Missing Workbook",
1280
+ message="Skipping workbook as its project is not present in project registry",
1281
+ context=f"workbook={wrk_name}({wrk_id}), project={prj_name}({project_luid})",
1263
1282
  )
1264
1283
  continue
1265
1284
 
@@ -1453,7 +1472,7 @@ class TableauSiteSource:
1453
1472
  c.COLUMNS_CONNECTION
1454
1473
  ].get("totalCount")
1455
1474
  if not is_custom_sql and not num_tbl_cols:
1456
- logger.debug(
1475
+ logger.warning(
1457
1476
  f"Skipping upstream table with id {table[c.ID]}, no columns: {table}"
1458
1477
  )
1459
1478
  continue
@@ -1469,7 +1488,12 @@ class TableauSiteSource:
1469
1488
  table, default_schema_map=self.config.default_schema_map
1470
1489
  )
1471
1490
  except Exception as e:
1472
- logger.info(f"Failed to generate upstream reference for {table}: {e}")
1491
+ self.report.warning(
1492
+ title="Potentially Missing Lineage Issue",
1493
+ message="Failed to generate upstream reference",
1494
+ exc=e,
1495
+ context=f"table={table}",
1496
+ )
1473
1497
  continue
1474
1498
 
1475
1499
  table_urn = ref.make_dataset_urn(
@@ -1917,10 +1941,12 @@ class TableauSiteSource:
1917
1941
  self.datasource_project_map[ds_result.id] = ds_result.project_id
1918
1942
  except Exception as e:
1919
1943
  self.report.num_get_datasource_query_failures += 1
1920
- logger.warning(
1921
- f"Failed to get datasource project_luid for {ds_luid} due to error {e}"
1944
+ self.report.warning(
1945
+ title="Unexpected Query Error",
1946
+ message="Failed to get datasource details",
1947
+ exc=e,
1948
+ context=f"ds_luid={ds_luid}",
1922
1949
  )
1923
- logger.debug("Error stack trace", exc_info=True)
1924
1950
 
1925
1951
  def _get_workbook_project_luid(self, wb: dict) -> Optional[str]:
1926
1952
  if wb.get(c.LUID) and self.workbook_project_map.get(wb[c.LUID]):
@@ -3181,10 +3207,10 @@ class TableauSiteSource:
3181
3207
  else:
3182
3208
  # This is a root Tableau project since the parent_project_id is None.
3183
3209
  # For a root project, either the site is the parent, or the platform is the default parent.
3184
- if self.config.add_site_container and self.site and self.site.id:
3210
+ if self.config.add_site_container:
3185
3211
  # The site containers have already been generated by emit_site_container, so we
3186
3212
  # don't need to emit them again here.
3187
- parent_project_key = self.gen_site_key(self.site.id)
3213
+ parent_project_key = self.gen_site_key(self.site_id)
3188
3214
 
3189
3215
  yield from gen_containers(
3190
3216
  container_key=project_key,
@@ -3201,12 +3227,12 @@ class TableauSiteSource:
3201
3227
  yield from emit_project_in_topological_order(project)
3202
3228
 
3203
3229
  def emit_site_container(self):
3204
- if not self.site or not self.site.id:
3230
+ if not self.site:
3205
3231
  logger.warning("Can not ingest site container. No site information found.")
3206
3232
  return
3207
3233
 
3208
3234
  yield from gen_containers(
3209
- container_key=self.gen_site_key(self.site.id),
3235
+ container_key=self.gen_site_key(self.site_id),
3210
3236
  name=self.site.name or "Default",
3211
3237
  sub_types=[c.SITE],
3212
3238
  )