acryl-datahub 1.1.0.1rc6__py3-none-any.whl → 1.1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (25) hide show
  1. {acryl_datahub-1.1.0.1rc6.dist-info → acryl_datahub-1.1.0.2.dist-info}/METADATA +2522 -2522
  2. {acryl_datahub-1.1.0.1rc6.dist-info → acryl_datahub-1.1.0.2.dist-info}/RECORD +25 -23
  3. datahub/_version.py +1 -1
  4. datahub/api/entities/external/lake_formation_external_entites.py +161 -0
  5. datahub/api/entities/external/restricted_text.py +10 -10
  6. datahub/api/entities/external/unity_catalog_external_entites.py +5 -2
  7. datahub/emitter/rest_emitter.py +29 -4
  8. datahub/ingestion/graph/client.py +2 -0
  9. datahub/ingestion/graph/config.py +1 -0
  10. datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +20 -10
  11. datahub/ingestion/sink/datahub_rest.py +1 -0
  12. datahub/ingestion/source/aws/aws_common.py +4 -0
  13. datahub/ingestion/source/aws/glue.py +488 -243
  14. datahub/ingestion/source/aws/tag_entities.py +292 -0
  15. datahub/ingestion/source/sql/mssql/source.py +207 -18
  16. datahub/ingestion/source/unity/source.py +2 -3
  17. datahub/metadata/_internal_schema_classes.py +499 -499
  18. datahub/metadata/_urns/urn_defs.py +1766 -1766
  19. datahub/metadata/schema.avsc +17480 -17093
  20. datahub/metadata/schemas/__init__.py +3 -3
  21. datahub/sdk/main_client.py +3 -3
  22. {acryl_datahub-1.1.0.1rc6.dist-info → acryl_datahub-1.1.0.2.dist-info}/WHEEL +0 -0
  23. {acryl_datahub-1.1.0.1rc6.dist-info → acryl_datahub-1.1.0.2.dist-info}/entry_points.txt +0 -0
  24. {acryl_datahub-1.1.0.1rc6.dist-info → acryl_datahub-1.1.0.2.dist-info}/licenses/LICENSE +0 -0
  25. {acryl_datahub-1.1.0.1rc6.dist-info → acryl_datahub-1.1.0.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,292 @@
1
+ import logging
2
+ from typing import List, Optional
3
+
4
+ from pydantic import BaseModel
5
+
6
+ from datahub.api.entities.external.external_entities import (
7
+ ExternalEntity,
8
+ ExternalEntityId,
9
+ LinkedResourceSet,
10
+ PlatformResourceRepository,
11
+ )
12
+ from datahub.api.entities.external.lake_formation_external_entites import (
13
+ LakeFormationTag,
14
+ )
15
+ from datahub.api.entities.platformresource.platform_resource import (
16
+ PlatformResource,
17
+ PlatformResourceKey,
18
+ PlatformResourceSearchFields,
19
+ )
20
+ from datahub.metadata.urns import TagUrn
21
+ from datahub.utilities.search_utils import ElasticDocumentQuery
22
+ from datahub.utilities.urns.urn import Urn
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ class LakeFormationTagSyncContext(BaseModel):
28
+ # it is intentionally empty
29
+ platform_instance: Optional[str] = None
30
+ catalog: Optional[str] = None
31
+
32
+
33
+ class LakeFormationTagPlatformResourceId(BaseModel, ExternalEntityId):
34
+ """
35
+ A LakeFormationTag is a unique identifier for a Lakeformation tag.
36
+ """
37
+
38
+ tag_key: str
39
+ tag_value: Optional[str] = None
40
+ platform_instance: Optional[str]
41
+ catalog: Optional[str] = None
42
+ exists_in_lake_formation: bool = False
43
+ persisted: bool = False
44
+
45
+ def __hash__(self) -> int:
46
+ return hash(self.to_platform_resource_key().id)
47
+
48
+ # this is a hack to make sure the property is a string and not private pydantic field
49
+ @staticmethod
50
+ def _RESOURCE_TYPE() -> str:
51
+ return "LakeFormationTagPlatformResource"
52
+
53
+ def to_platform_resource_key(self) -> PlatformResourceKey:
54
+ return PlatformResourceKey(
55
+ platform="glue",
56
+ resource_type=str(LakeFormationTagPlatformResourceId._RESOURCE_TYPE()),
57
+ primary_key=f"{self.catalog}.{self.tag_key}:{self.tag_value}"
58
+ if self.catalog
59
+ else f"{self.tag_key}:{self.tag_value}",
60
+ platform_instance=self.platform_instance,
61
+ )
62
+
63
+ @classmethod
64
+ def from_tag(
65
+ cls,
66
+ tag: LakeFormationTag,
67
+ platform_instance: Optional[str],
68
+ platform_resource_repository: PlatformResourceRepository,
69
+ catalog: Optional[str] = None,
70
+ exists_in_lake_formation: bool = False,
71
+ ) -> "LakeFormationTagPlatformResourceId":
72
+ """
73
+ Creates a LakeFormationTagPlatformResourceId from a LakeFormationTag.
74
+ """
75
+
76
+ existing_platform_resource = cls.search_by_urn(
77
+ tag.to_datahub_tag_urn().urn(),
78
+ platform_resource_repository=platform_resource_repository,
79
+ tag_sync_context=LakeFormationTagSyncContext(
80
+ platform_instance=platform_instance,
81
+ catalog=catalog,
82
+ ),
83
+ )
84
+ if existing_platform_resource:
85
+ logger.info(
86
+ f"Found existing LakeFormationTagPlatformResourceId for tag {tag.key}: {existing_platform_resource}"
87
+ )
88
+ return existing_platform_resource
89
+
90
+ return LakeFormationTagPlatformResourceId(
91
+ tag_key=tag.key,
92
+ tag_value=tag.value if tag.value is not None else None,
93
+ platform_instance=platform_instance,
94
+ exists_in_lake_formation=exists_in_lake_formation,
95
+ catalog=catalog,
96
+ persisted=False,
97
+ )
98
+
99
+ @classmethod
100
+ def search_by_urn(
101
+ cls,
102
+ urn: str,
103
+ platform_resource_repository: PlatformResourceRepository,
104
+ tag_sync_context: LakeFormationTagSyncContext,
105
+ ) -> Optional["LakeFormationTagPlatformResourceId"]:
106
+ mapped_tags = [
107
+ t
108
+ for t in platform_resource_repository.search_by_filter(
109
+ ElasticDocumentQuery.create_from(
110
+ (
111
+ PlatformResourceSearchFields.RESOURCE_TYPE,
112
+ str(LakeFormationTagPlatformResourceId._RESOURCE_TYPE()),
113
+ ),
114
+ (PlatformResourceSearchFields.SECONDARY_KEYS, urn),
115
+ )
116
+ )
117
+ ]
118
+ logger.info(
119
+ f"Found {len(mapped_tags)} mapped tags for URN {urn}. {mapped_tags}"
120
+ )
121
+ if len(mapped_tags) > 0:
122
+ for platform_resource in mapped_tags:
123
+ if (
124
+ platform_resource.resource_info
125
+ and platform_resource.resource_info.value
126
+ ):
127
+ lake_formation_tag_platform_resource = (
128
+ LakeFormationTagPlatformResource(
129
+ **platform_resource.resource_info.value.as_pydantic_object(
130
+ LakeFormationTagPlatformResource
131
+ ).dict()
132
+ )
133
+ )
134
+ if (
135
+ lake_formation_tag_platform_resource.id.platform_instance
136
+ == tag_sync_context.platform_instance
137
+ and lake_formation_tag_platform_resource.id.catalog
138
+ == tag_sync_context.catalog
139
+ ):
140
+ lake_formation_tag_id = lake_formation_tag_platform_resource.id
141
+ lake_formation_tag_id.exists_in_lake_formation = True
142
+ lake_formation_tag_id.persisted = True
143
+ return lake_formation_tag_id
144
+ else:
145
+ logger.warning(
146
+ f"Platform resource {platform_resource} does not have a resource_info value"
147
+ )
148
+ continue
149
+
150
+ # If we reach here, it means we did not find a mapped tag for the URN
151
+ logger.info(
152
+ f"No mapped tag found for URN {urn} with platform instance {tag_sync_context.platform_instance}. Creating a new LakeFormationTagPlatformResourceId."
153
+ )
154
+ return None
155
+
156
+ @classmethod
157
+ def from_datahub_urn(
158
+ cls,
159
+ urn: str,
160
+ platform_resource_repository: PlatformResourceRepository,
161
+ tag_sync_context: LakeFormationTagSyncContext,
162
+ ) -> "LakeFormationTagPlatformResourceId":
163
+ """
164
+ Creates a UnityCatalogTagPlatformResourceId from a DataHub URN.
165
+ """
166
+ # First we check if we already have a mapped platform resource for this
167
+ # urn that is of the type UnityCatalogTagPlatformResource
168
+ # If we do, we can use it to create the UnityCatalogTagPlatformResourceId
169
+ # Else, we need to generate a new UnityCatalogTagPlatformResourceId
170
+ existing_platform_resource_id = cls.search_by_urn(
171
+ urn, platform_resource_repository, tag_sync_context
172
+ )
173
+ if existing_platform_resource_id:
174
+ logger.info(
175
+ f"Found existing LakeFormationTagPlatformResourceId for URN {urn}: {existing_platform_resource_id}"
176
+ )
177
+ return existing_platform_resource_id
178
+
179
+ # Otherwise, we need to create a new UnityCatalogTagPlatformResourceId
180
+ new_tag_id = cls.generate_tag_id(tag_sync_context, urn)
181
+ if new_tag_id:
182
+ # we then check if this tag has already been ingested as a platform
183
+ # resource in the platform resource repository
184
+ resource_key = platform_resource_repository.get(
185
+ new_tag_id.to_platform_resource_key()
186
+ )
187
+ if resource_key:
188
+ logger.info(
189
+ f"Tag {new_tag_id} already exists in platform resource repository with {resource_key}"
190
+ )
191
+ new_tag_id.exists_in_lake_formation = (
192
+ True # TODO: Check if this is a safe assumption
193
+ )
194
+ return new_tag_id
195
+ raise ValueError(f"Unable to create SnowflakeTagId from DataHub URN: {urn}")
196
+
197
+ @classmethod
198
+ def generate_tag_id(
199
+ cls, tag_sync_context: LakeFormationTagSyncContext, urn: str
200
+ ) -> "LakeFormationTagPlatformResourceId":
201
+ parsed_urn = Urn.from_string(urn)
202
+ entity_type = parsed_urn.entity_type
203
+ if entity_type == "tag":
204
+ new_tag_id = LakeFormationTagPlatformResourceId.from_datahub_tag(
205
+ TagUrn.from_string(urn), tag_sync_context
206
+ )
207
+ else:
208
+ raise ValueError(f"Unsupported entity type {entity_type} for URN {urn}")
209
+ return new_tag_id
210
+
211
+ @classmethod
212
+ def from_datahub_tag(
213
+ cls, tag_urn: TagUrn, tag_sync_context: LakeFormationTagSyncContext
214
+ ) -> "LakeFormationTagPlatformResourceId":
215
+ tag = LakeFormationTag.from_urn(tag_urn)
216
+
217
+ return LakeFormationTagPlatformResourceId(
218
+ tag_key=str(tag.key),
219
+ tag_value=str(tag.value),
220
+ platform_instance=tag_sync_context.platform_instance,
221
+ catalog=tag_sync_context.catalog,
222
+ exists_in_lake_formation=False,
223
+ )
224
+
225
+
226
+ class LakeFormationTagPlatformResource(BaseModel, ExternalEntity):
227
+ datahub_urns: LinkedResourceSet
228
+ managed_by_datahub: bool
229
+ id: LakeFormationTagPlatformResourceId
230
+ allowed_values: Optional[List[str]]
231
+
232
+ def get_id(self) -> ExternalEntityId:
233
+ return self.id
234
+
235
+ def is_managed_by_datahub(self) -> bool:
236
+ return self.managed_by_datahub
237
+
238
+ def datahub_linked_resources(self) -> LinkedResourceSet:
239
+ return self.datahub_urns
240
+
241
+ def as_platform_resource(self) -> PlatformResource:
242
+ return PlatformResource.create(
243
+ key=self.id.to_platform_resource_key(),
244
+ secondary_keys=[u for u in self.datahub_urns.urns],
245
+ value=self,
246
+ )
247
+
248
+ @classmethod
249
+ def get_from_datahub(
250
+ cls,
251
+ lake_formation_tag_id: LakeFormationTagPlatformResourceId,
252
+ platform_resource_repository: PlatformResourceRepository,
253
+ managed_by_datahub: bool = False,
254
+ ) -> "LakeFormationTagPlatformResource":
255
+ # Search for linked DataHub URNs
256
+ platform_resources = [
257
+ r
258
+ for r in platform_resource_repository.search_by_filter(
259
+ ElasticDocumentQuery.create_from(
260
+ (
261
+ PlatformResourceSearchFields.RESOURCE_TYPE,
262
+ str(LakeFormationTagPlatformResourceId._RESOURCE_TYPE()),
263
+ ),
264
+ (
265
+ PlatformResourceSearchFields.PRIMARY_KEY,
266
+ f"{lake_formation_tag_id.tag_key}/{lake_formation_tag_id.tag_value}",
267
+ ),
268
+ )
269
+ )
270
+ ]
271
+ for platform_resource in platform_resources:
272
+ if (
273
+ platform_resource.resource_info
274
+ and platform_resource.resource_info.value
275
+ ):
276
+ lf_tag = LakeFormationTagPlatformResource(
277
+ **platform_resource.resource_info.value.as_pydantic_object(
278
+ LakeFormationTagPlatformResource
279
+ ).dict()
280
+ )
281
+ if (
282
+ lf_tag.id.platform_instance
283
+ == lake_formation_tag_id.platform_instance
284
+ and lf_tag.id.catalog == lake_formation_tag_id.catalog
285
+ ):
286
+ return lf_tag
287
+ return cls(
288
+ id=lake_formation_tag_id,
289
+ datahub_urns=LinkedResourceSet(urns=[]),
290
+ managed_by_datahub=managed_by_datahub,
291
+ allowed_values=None,
292
+ )
@@ -323,9 +323,11 @@ class SQLServerSource(SQLAlchemySource):
323
323
  try:
324
324
  yield from self.loop_jobs(inspector, self.config)
325
325
  except Exception as e:
326
- self.report.report_failure(
327
- "jobs",
328
- f"Failed to list jobs due to error {e}",
326
+ self.report.failure(
327
+ message="Failed to list jobs",
328
+ title="SQL Server Jobs Extraction",
329
+ context="Error occurred during database-level job extraction",
330
+ exc=e,
329
331
  )
330
332
 
331
333
  def get_schema_level_workunits(
@@ -343,12 +345,158 @@ class SQLServerSource(SQLAlchemySource):
343
345
  try:
344
346
  yield from self.loop_stored_procedures(inspector, schema, self.config)
345
347
  except Exception as e:
346
- self.report.report_failure(
347
- "jobs",
348
- f"Failed to list jobs due to error {e}",
348
+ self.report.failure(
349
+ message="Failed to list stored procedures",
350
+ title="SQL Server Stored Procedures Extraction",
351
+ context="Error occurred during schema-level stored procedure extraction",
352
+ exc=e,
349
353
  )
350
354
 
355
+ def _detect_rds_environment(self, conn: Connection) -> bool:
356
+ """
357
+ Detect if we're running in an RDS/managed environment vs on-premises.
358
+ Returns True if RDS/managed, False if on-premises.
359
+ """
360
+ try:
361
+ # Try to access system tables directly - this typically fails in RDS
362
+ conn.execute("SELECT TOP 1 * FROM msdb.dbo.sysjobs")
363
+ logger.debug(
364
+ "Direct table access successful - likely on-premises environment"
365
+ )
366
+ return False
367
+ except Exception:
368
+ logger.debug("Direct table access failed - likely RDS/managed environment")
369
+ return True
370
+
351
371
  def _get_jobs(self, conn: Connection, db_name: str) -> Dict[str, Dict[str, Any]]:
372
+ """
373
+ Get job information with environment detection to choose optimal method first.
374
+ """
375
+ jobs: Dict[str, Dict[str, Any]] = {}
376
+
377
+ # Detect environment to choose optimal method first
378
+ is_rds = self._detect_rds_environment(conn)
379
+
380
+ if is_rds:
381
+ # Managed environment - try stored procedures first
382
+ try:
383
+ jobs = self._get_jobs_via_stored_procedures(conn, db_name)
384
+ logger.info(
385
+ "Successfully retrieved jobs using stored procedures (managed environment)"
386
+ )
387
+ return jobs
388
+ except Exception as sp_error:
389
+ logger.warning(
390
+ f"Failed to retrieve jobs via stored procedures in managed environment: {sp_error}"
391
+ )
392
+ # Try direct query as fallback (might work in some managed environments)
393
+ try:
394
+ jobs = self._get_jobs_via_direct_query(conn, db_name)
395
+ logger.info(
396
+ "Successfully retrieved jobs using direct query fallback in managed environment"
397
+ )
398
+ return jobs
399
+ except Exception as direct_error:
400
+ self.report.failure(
401
+ message="Failed to retrieve jobs in managed environment",
402
+ title="SQL Server Jobs Extraction",
403
+ context="Both stored procedures and direct query methods failed",
404
+ exc=direct_error,
405
+ )
406
+ else:
407
+ # On-premises environment - try direct query first (usually faster)
408
+ try:
409
+ jobs = self._get_jobs_via_direct_query(conn, db_name)
410
+ logger.info(
411
+ "Successfully retrieved jobs using direct query (on-premises environment)"
412
+ )
413
+ return jobs
414
+ except Exception as direct_error:
415
+ logger.warning(
416
+ f"Failed to retrieve jobs via direct query in on-premises environment: {direct_error}"
417
+ )
418
+ # Try stored procedures as fallback
419
+ try:
420
+ jobs = self._get_jobs_via_stored_procedures(conn, db_name)
421
+ logger.info(
422
+ "Successfully retrieved jobs using stored procedures fallback in on-premises environment"
423
+ )
424
+ return jobs
425
+ except Exception as sp_error:
426
+ self.report.failure(
427
+ message="Failed to retrieve jobs in on-premises environment",
428
+ title="SQL Server Jobs Extraction",
429
+ context="Both direct query and stored procedures methods failed",
430
+ exc=sp_error,
431
+ )
432
+
433
+ return jobs
434
+
435
+ def _get_jobs_via_stored_procedures(
436
+ self, conn: Connection, db_name: str
437
+ ) -> Dict[str, Dict[str, Any]]:
438
+ jobs: Dict[str, Dict[str, Any]] = {}
439
+
440
+ # First, get all jobs
441
+ jobs_result = conn.execute("EXEC msdb.dbo.sp_help_job")
442
+ jobs_data = {}
443
+
444
+ for row in jobs_result:
445
+ job_id = str(row["job_id"])
446
+ jobs_data[job_id] = {
447
+ "job_id": job_id,
448
+ "name": row["name"],
449
+ "description": row.get("description", ""),
450
+ "date_created": row.get("date_created"),
451
+ "date_modified": row.get("date_modified"),
452
+ "enabled": row.get("enabled", 1),
453
+ }
454
+
455
+ # Now get job steps for each job, filtering by database
456
+ for job_id, job_info in jobs_data.items():
457
+ try:
458
+ # Get steps for this specific job
459
+ steps_result = conn.execute(
460
+ f"EXEC msdb.dbo.sp_help_jobstep @job_id = '{job_id}'"
461
+ )
462
+
463
+ job_steps = {}
464
+ for step_row in steps_result:
465
+ # Only include steps that run against our target database
466
+ step_database = step_row.get("database_name", "")
467
+ if step_database.lower() == db_name.lower() or not step_database:
468
+ step_data = {
469
+ "job_id": job_id,
470
+ "job_name": job_info["name"],
471
+ "description": job_info["description"],
472
+ "date_created": job_info["date_created"],
473
+ "date_modified": job_info["date_modified"],
474
+ "step_id": step_row["step_id"],
475
+ "step_name": step_row["step_name"],
476
+ "subsystem": step_row.get("subsystem", ""),
477
+ "command": step_row.get("command", ""),
478
+ "database_name": step_database,
479
+ }
480
+ job_steps[step_row["step_id"]] = step_data
481
+
482
+ # Only add job if it has relevant steps
483
+ if job_steps:
484
+ jobs[job_info["name"]] = job_steps
485
+
486
+ except Exception as step_error:
487
+ logger.warning(
488
+ f"Failed to get steps for job {job_info['name']}: {step_error}"
489
+ )
490
+ continue
491
+
492
+ return jobs
493
+
494
+ def _get_jobs_via_direct_query(
495
+ self, conn: Connection, db_name: str
496
+ ) -> Dict[str, Dict[str, Any]]:
497
+ """
498
+ Original method using direct table access for on-premises SQL Server.
499
+ """
352
500
  jobs_data = conn.execute(
353
501
  f"""
354
502
  SELECT
@@ -371,6 +519,7 @@ class SQLServerSource(SQLAlchemySource):
371
519
  where database_name = '{db_name}'
372
520
  """
373
521
  )
522
+
374
523
  jobs: Dict[str, Dict[str, Any]] = {}
375
524
  for row in jobs_data:
376
525
  step_data = dict(
@@ -383,11 +532,13 @@ class SQLServerSource(SQLAlchemySource):
383
532
  step_name=row["step_name"],
384
533
  subsystem=row["subsystem"],
385
534
  command=row["command"],
535
+ database_name=row["database_name"],
386
536
  )
387
537
  if row["name"] in jobs:
388
538
  jobs[row["name"]][row["step_id"]] = step_data
389
539
  else:
390
540
  jobs[row["name"]] = {row["step_id"]: step_data}
541
+
391
542
  return jobs
392
543
 
393
544
  def loop_jobs(
@@ -397,21 +548,59 @@ class SQLServerSource(SQLAlchemySource):
397
548
  ) -> Iterable[MetadataWorkUnit]:
398
549
  """
399
550
  Loop MS SQL jobs as dataFlow-s.
400
- :return:
551
+ Now supports both managed and on-premises SQL Server.
401
552
  """
402
553
  db_name = self.get_db_name(inspector)
403
- with inspector.engine.connect() as conn:
404
- jobs = self._get_jobs(conn, db_name)
405
- for job_name, job_steps in jobs.items():
406
- job = MSSQLJob(
407
- name=job_name,
408
- env=sql_config.env,
409
- db=db_name,
410
- platform_instance=sql_config.platform_instance,
554
+
555
+ try:
556
+ with inspector.engine.connect() as conn:
557
+ jobs = self._get_jobs(conn, db_name)
558
+
559
+ if not jobs:
560
+ logger.info(f"No jobs found for database: {db_name}")
561
+ return
562
+
563
+ logger.info(f"Found {len(jobs)} jobs for database: {db_name}")
564
+
565
+ for job_name, job_steps in jobs.items():
566
+ try:
567
+ job = MSSQLJob(
568
+ name=job_name,
569
+ env=sql_config.env,
570
+ db=db_name,
571
+ platform_instance=sql_config.platform_instance,
572
+ )
573
+ data_flow = MSSQLDataFlow(entity=job)
574
+ yield from self.construct_flow_workunits(data_flow=data_flow)
575
+ yield from self.loop_job_steps(job, job_steps)
576
+
577
+ except Exception as job_error:
578
+ logger.warning(f"Failed to process job {job_name}: {job_error}")
579
+ self.report.warning(
580
+ message=f"Failed to process job {job_name}",
581
+ title="SQL Server Jobs Extraction",
582
+ context="Error occurred while processing individual job",
583
+ exc=job_error,
584
+ )
585
+ continue
586
+
587
+ except Exception as e:
588
+ error_message = f"Failed to retrieve jobs for database {db_name}: {e}"
589
+ logger.error(error_message)
590
+
591
+ # Provide specific guidance for permission issues
592
+ if "permission" in str(e).lower() or "denied" in str(e).lower():
593
+ permission_guidance = (
594
+ "For managed SQL Server services, ensure the following permissions are granted:\n"
595
+ "GRANT EXECUTE ON msdb.dbo.sp_help_job TO datahub_read;\n"
596
+ "GRANT EXECUTE ON msdb.dbo.sp_help_jobstep TO datahub_read;\n"
597
+ "For on-premises SQL Server, you may also need:\n"
598
+ "GRANT SELECT ON msdb.dbo.sysjobs TO datahub_read;\n"
599
+ "GRANT SELECT ON msdb.dbo.sysjobsteps TO datahub_read;"
411
600
  )
412
- data_flow = MSSQLDataFlow(entity=job)
413
- yield from self.construct_flow_workunits(data_flow=data_flow)
414
- yield from self.loop_job_steps(job, job_steps)
601
+ logger.info(permission_guidance)
602
+
603
+ raise e
415
604
 
416
605
  def loop_job_steps(
417
606
  self, job: MSSQLJob, job_steps: Dict[str, Any]
@@ -785,7 +785,7 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
785
785
  description=schema.comment,
786
786
  owner_urn=self.get_owner_urn(schema.owner),
787
787
  external_url=f"{self.external_url_base}/{schema.catalog.name}/{schema.name}",
788
- tags=[tag.to_datahub_tag_urn().urn() for tag in schema_tags]
788
+ tags=[tag.to_datahub_tag_urn().name for tag in schema_tags]
789
789
  if schema_tags
790
790
  else None,
791
791
  )
@@ -830,7 +830,7 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
830
830
  description=catalog.comment,
831
831
  owner_urn=self.get_owner_urn(catalog.owner),
832
832
  external_url=f"{self.external_url_base}/{catalog.name}",
833
- tags=[tag.to_datahub_tag_urn().urn() for tag in catalog_tags]
833
+ tags=[tag.to_datahub_tag_urn().name for tag in catalog_tags]
834
834
  if catalog_tags
835
835
  else None,
836
836
  )
@@ -1083,7 +1083,6 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
1083
1083
  )
1084
1084
  else:
1085
1085
  if tags is not None:
1086
- logger.debug(f"Column tags are: {tags}")
1087
1086
  attribution = MetadataAttribution(
1088
1087
  source="urn:li:dataPlatform:unity-catalog",
1089
1088
  actor="urn:li:corpuser:datahub",