acryl-datahub 1.0.0rc2__py3-none-any.whl → 1.0.0rc4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (44) hide show
  1. {acryl_datahub-1.0.0rc2.dist-info → acryl_datahub-1.0.0rc4.dist-info}/METADATA +2446 -2446
  2. {acryl_datahub-1.0.0rc2.dist-info → acryl_datahub-1.0.0rc4.dist-info}/RECORD +44 -43
  3. datahub/_version.py +1 -1
  4. datahub/cli/docker_cli.py +2 -2
  5. datahub/cli/ingest_cli.py +27 -92
  6. datahub/configuration/common.py +1 -1
  7. datahub/ingestion/api/decorators.py +1 -1
  8. datahub/ingestion/graph/client.py +1 -1
  9. datahub/ingestion/run/pipeline.py +1 -1
  10. datahub/ingestion/source/identity/okta.py +13 -2
  11. datahub/ingestion/source/looker/looker_common.py +3 -3
  12. datahub/ingestion/source/looker/looker_source.py +1 -1
  13. datahub/ingestion/source/mode.py +3 -3
  14. datahub/ingestion/source/nifi.py +2 -2
  15. datahub/ingestion/source/openapi.py +1 -1
  16. datahub/ingestion/source/openapi_parser.py +1 -1
  17. datahub/ingestion/source/powerbi_report_server/report_server_domain.py +1 -1
  18. datahub/ingestion/source/sql/athena.py +2 -2
  19. datahub/ingestion/source/sql/hive_metastore.py +1 -1
  20. datahub/ingestion/source/sql/mssql/source.py +1 -1
  21. datahub/ingestion/source/sql/sql_common.py +1 -1
  22. datahub/ingestion/source/sql/teradata.py +3 -3
  23. datahub/ingestion/source/tableau/tableau.py +19 -0
  24. datahub/ingestion/source/unity/source.py +11 -1
  25. datahub/metadata/schema.avsc +30 -3
  26. datahub/metadata/schemas/AssertionInfo.avsc +3 -1
  27. datahub/metadata/schemas/BusinessAttributeInfo.avsc +6 -2
  28. datahub/metadata/schemas/BusinessAttributes.avsc +6 -0
  29. datahub/metadata/schemas/ChartInfo.avsc +1 -0
  30. datahub/metadata/schemas/EditableSchemaMetadata.avsc +6 -2
  31. datahub/metadata/schemas/GlossaryTerms.avsc +3 -1
  32. datahub/metadata/schemas/InputFields.avsc +3 -1
  33. datahub/metadata/schemas/MetadataChangeEvent.avsc +7 -2
  34. datahub/metadata/schemas/SchemaMetadata.avsc +3 -1
  35. datahub/metadata/schemas/StructuredPropertyDefinition.avsc +14 -0
  36. datahub/sql_parsing/schema_resolver.py +1 -1
  37. datahub/sql_parsing/sqlglot_lineage.py +1 -1
  38. datahub/upgrade/upgrade.py +2 -2
  39. datahub/utilities/ingest_utils.py +106 -0
  40. datahub/utilities/mapping.py +1 -1
  41. {acryl_datahub-1.0.0rc2.dist-info → acryl_datahub-1.0.0rc4.dist-info}/LICENSE +0 -0
  42. {acryl_datahub-1.0.0rc2.dist-info → acryl_datahub-1.0.0rc4.dist-info}/WHEEL +0 -0
  43. {acryl_datahub-1.0.0rc2.dist-info → acryl_datahub-1.0.0rc4.dist-info}/entry_points.txt +0 -0
  44. {acryl_datahub-1.0.0rc2.dist-info → acryl_datahub-1.0.0rc4.dist-info}/top_level.txt +0 -0
@@ -759,9 +759,9 @@ class ModeSource(StatefulIngestionSourceBase):
759
759
  return platform, database
760
760
  else:
761
761
  self.report.report_warning(
762
- title="Failed to create Data Platform Urn",
763
- message=f"Cannot create datasource urn for datasource id: "
764
- f"{data_source_id}",
762
+ title="Unable to construct upstream lineage",
763
+ message="We did not find a data source / connection with a matching ID, meaning that we do not know the platform/database to use in lineage.",
764
+ context=f"Data Source ID: {data_source_id}",
765
765
  )
766
766
  return None, None
767
767
 
@@ -488,7 +488,7 @@ class NifiSource(Source):
488
488
  def get_report(self) -> SourceReport:
489
489
  return self.report
490
490
 
491
- def update_flow(self, pg_flow_dto: Dict, recursion_level: int = 0) -> None: # noqa: C901
491
+ def update_flow(self, pg_flow_dto: Dict, recursion_level: int = 0) -> None:
492
492
  """
493
493
  Update self.nifi_flow with contents of the input process group `pg_flow_dto`
494
494
  """
@@ -894,7 +894,7 @@ class NifiSource(Source):
894
894
  if not delete_response.ok:
895
895
  logger.error("failed to delete provenance ", provenance_uri)
896
896
 
897
- def construct_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901
897
+ def construct_workunits(self) -> Iterable[MetadataWorkUnit]:
898
898
  rootpg = self.nifi_flow.root_process_group
899
899
  flow_name = rootpg.name # self.config.site_name
900
900
  flow_urn = self.make_flow_urn()
@@ -270,7 +270,7 @@ class APISource(Source, ABC):
270
270
  mce = MetadataChangeEvent(proposedSnapshot=dataset_snapshot)
271
271
  return ApiWorkUnit(id=dataset_name, mce=mce)
272
272
 
273
- def get_workunits_internal(self) -> Iterable[ApiWorkUnit]: # noqa: C901
273
+ def get_workunits_internal(self) -> Iterable[ApiWorkUnit]:
274
274
  config = self.config
275
275
 
276
276
  sw_dict = self.config.get_swagger()
@@ -111,7 +111,7 @@ def check_sw_version(sw_dict: dict) -> None:
111
111
  )
112
112
 
113
113
 
114
- def get_endpoints(sw_dict: dict) -> dict: # noqa: C901
114
+ def get_endpoints(sw_dict: dict) -> dict:
115
115
  """
116
116
  Get all the URLs, together with their description and the tags
117
117
  """
@@ -33,7 +33,7 @@ class CatalogItem(BaseModel):
33
33
  )
34
34
 
35
35
  @validator("display_name", always=True)
36
- def validate_diplay_name(cls, value, values): # noqa: N805
36
+ def validate_diplay_name(cls, value, values):
37
37
  if values["created_by"]:
38
38
  return values["created_by"].split("\\")[-1]
39
39
  return ""
@@ -55,7 +55,7 @@ try:
55
55
  except ImportError:
56
56
  _F = typing.TypeVar("_F", bound=typing.Callable[..., typing.Any])
57
57
 
58
- def override(f: _F, /) -> _F: # noqa: F811
58
+ def override(f: _F, /) -> _F:
59
59
  return f
60
60
 
61
61
 
@@ -104,7 +104,7 @@ class CustomAthenaRestDialect(AthenaRestDialect):
104
104
  return "\n".join([r for r in res])
105
105
 
106
106
  @typing.no_type_check
107
- def _get_column_type(self, type_: Union[str, Dict[str, Any]]) -> TypeEngine: # noqa: C901
107
+ def _get_column_type(self, type_: Union[str, Dict[str, Any]]) -> TypeEngine:
108
108
  """Derives the data type of the Athena column.
109
109
 
110
110
  This method is overwritten to extend the behavior of PyAthena.
@@ -67,7 +67,7 @@ TableKey = namedtuple("TableKey", ["schema", "table"])
67
67
 
68
68
 
69
69
  class HiveMetastoreConfigMode(StrEnum):
70
- hive: str = "hive" # noqa: F811
70
+ hive: str = "hive"
71
71
  presto: str = "presto"
72
72
  presto_on_hive: str = "presto-on-hive"
73
73
  trino: str = "trino"
@@ -401,7 +401,7 @@ class SQLServerSource(SQLAlchemySource):
401
401
  data_job.add_property(name=data_name, value=str(data_value))
402
402
  yield from self.construct_job_workunits(data_job)
403
403
 
404
- def loop_stored_procedures( # noqa: C901
404
+ def loop_stored_procedures(
405
405
  self,
406
406
  inspector: Inspector,
407
407
  schema: str,
@@ -635,7 +635,7 @@ class SQLAlchemySource(StatefulIngestionSourceBase, TestableSource):
635
635
 
636
636
  return None
637
637
 
638
- def loop_tables( # noqa: C901
638
+ def loop_tables(
639
639
  self,
640
640
  inspector: Inspector,
641
641
  schema: str,
@@ -649,7 +649,7 @@ ORDER by DataBaseName, TableName;
649
649
  )
650
650
 
651
651
  # Disabling the below because the cached view definition is not the view definition the column in tablesv actually holds the last statement executed against the object... not necessarily the view definition
652
- # setattr( # noqa: B010
652
+ # setattr(
653
653
  # TeradataDialect,
654
654
  # "get_view_definition",
655
655
  # lambda self, connection, view_name, schema=None, **kw: optimized_get_view_definition(
@@ -746,7 +746,7 @@ ORDER by DataBaseName, TableName;
746
746
  else:
747
747
  raise Exception("Unable to get database name from Sqlalchemy inspector")
748
748
 
749
- def cached_loop_tables( # noqa: C901
749
+ def cached_loop_tables(
750
750
  self,
751
751
  inspector: Inspector,
752
752
  schema: str,
@@ -782,7 +782,7 @@ ORDER by DataBaseName, TableName;
782
782
  break
783
783
  return description, properties, location
784
784
 
785
- def cached_loop_views( # noqa: C901
785
+ def cached_loop_views(
786
786
  self,
787
787
  inspector: Inspector,
788
788
  schema: str,
@@ -2190,6 +2190,10 @@ class TableauSiteSource:
2190
2190
  dataset_snapshot.aspects.append(browse_paths)
2191
2191
  else:
2192
2192
  logger.debug(f"Browse path not set for Custom SQL table {csql_id}")
2193
+ logger.warning(
2194
+ f"Skipping Custom SQL table {csql_id} due to filtered downstream"
2195
+ )
2196
+ continue
2193
2197
 
2194
2198
  dataset_properties = DatasetPropertiesClass(
2195
2199
  name=csql.get(c.NAME),
@@ -2628,6 +2632,15 @@ class TableauSiteSource:
2628
2632
  datasource_info = datasource
2629
2633
 
2630
2634
  browse_path = self._get_project_browse_path_name(datasource)
2635
+ if (
2636
+ not is_embedded_ds
2637
+ and self._get_published_datasource_project_luid(datasource) is None
2638
+ ):
2639
+ logger.warning(
2640
+ f"Skip ingesting published datasource {datasource.get(c.NAME)} because of filtered project"
2641
+ )
2642
+ return
2643
+
2631
2644
  logger.debug(f"datasource {datasource.get(c.NAME)} browse-path {browse_path}")
2632
2645
  datasource_id = datasource[c.ID]
2633
2646
  datasource_urn = builder.make_dataset_urn_with_platform_instance(
@@ -2851,6 +2864,11 @@ class TableauSiteSource:
2851
2864
  query_filter=tables_filter,
2852
2865
  page_size=self.config.effective_database_table_page_size,
2853
2866
  ):
2867
+ if tableau_database_table_id_to_urn_map.get(tableau_table[c.ID]) is None:
2868
+ logger.warning(
2869
+ f"Skipping table {tableau_table[c.ID]} due to filtered out published datasource"
2870
+ )
2871
+ continue
2854
2872
  database_table = self.database_tables[
2855
2873
  tableau_database_table_id_to_urn_map[tableau_table[c.ID]]
2856
2874
  ]
@@ -2905,6 +2923,7 @@ class TableauSiteSource:
2905
2923
  dataset_snapshot.aspects.append(browse_paths)
2906
2924
  else:
2907
2925
  logger.debug(f"Browse path not set for table {database_table.urn}")
2926
+ return
2908
2927
 
2909
2928
  schema_metadata = self.get_schema_metadata_for_table(
2910
2929
  tableau_columns, database_table.parsed_columns
@@ -464,7 +464,17 @@ class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
464
464
 
465
465
  with self.report.new_stage(f"Ingest schema {schema.id}"):
466
466
  yield from self.gen_schema_containers(schema)
467
- yield from self.process_tables(schema)
467
+ try:
468
+ yield from self.process_tables(schema)
469
+ except Exception as e:
470
+ logger.exception(f"Error parsing schema {schema}")
471
+ self.report.report_warning(
472
+ message="Missed schema because of parsing issues",
473
+ context=str(schema),
474
+ title="Error parsing schema",
475
+ exc=e,
476
+ )
477
+ continue
468
478
 
469
479
  self.report.schemas.processed(schema.id)
470
480
 
@@ -3504,7 +3504,9 @@
3504
3504
  "fieldName": "glossaryTerms",
3505
3505
  "fieldType": "URN",
3506
3506
  "filterNameOverride": "Glossary Term",
3507
- "hasValuesFieldName": "hasGlossaryTerms"
3507
+ "hasValuesFieldName": "hasGlossaryTerms",
3508
+ "includeSystemModifiedAt": true,
3509
+ "systemModifiedAtFieldName": "termsModifiedAt"
3508
3510
  },
3509
3511
  "java": {
3510
3512
  "class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn"
@@ -10339,7 +10341,9 @@
10339
10341
  "/terms/*/urn": {
10340
10342
  "boostScore": 0.5,
10341
10343
  "fieldName": "editedFieldGlossaryTerms",
10342
- "fieldType": "URN"
10344
+ "fieldType": "URN",
10345
+ "includeSystemModifiedAt": true,
10346
+ "systemModifiedAtFieldName": "schemaFieldTermsModifiedAt"
10343
10347
  }
10344
10348
  },
10345
10349
  "type": [
@@ -10477,6 +10481,12 @@
10477
10481
  "namespace": "com.linkedin.pegasus2avro.businessattribute",
10478
10482
  "fields": [
10479
10483
  {
10484
+ "Searchable": {
10485
+ "fieldName": "schemaFieldBusinessAttribute",
10486
+ "includeSystemModifiedAt": true,
10487
+ "queryByDefault": false,
10488
+ "systemModifiedAtFieldName": "schemaFieldBusinessAttributeModifiedAt"
10489
+ },
10480
10490
  "java": {
10481
10491
  "class": "com.linkedin.pegasus2avro.common.urn.BusinessAttributeUrn"
10482
10492
  },
@@ -15077,6 +15087,7 @@
15077
15087
  },
15078
15088
  {
15079
15089
  "Searchable": {
15090
+ "boostScore": 10.0,
15080
15091
  "enableAutocomplete": true,
15081
15092
  "fieldNameAliases": [
15082
15093
  "_entityName"
@@ -16546,7 +16557,9 @@
16546
16557
  "/terms/*/urn": {
16547
16558
  "boostScore": 0.5,
16548
16559
  "fieldName": "editedFieldGlossaryTerms",
16549
- "fieldType": "URN"
16560
+ "fieldType": "URN",
16561
+ "includeSystemModifiedAt": true,
16562
+ "systemModifiedAtFieldName": "schemaFieldTermsModifiedAt"
16550
16563
  }
16551
16564
  },
16552
16565
  "type": [
@@ -19423,6 +19436,13 @@
19423
19436
  "doc": "The display name of the property. This is the name that will be shown in the UI and can be used to look up the property id."
19424
19437
  },
19425
19438
  {
19439
+ "UrnValidation": {
19440
+ "entityTypes": [
19441
+ "dataType"
19442
+ ],
19443
+ "exist": true,
19444
+ "strict": true
19445
+ },
19426
19446
  "java": {
19427
19447
  "class": "com.linkedin.pegasus2avro.common.urn.Urn"
19428
19448
  },
@@ -19511,6 +19531,13 @@
19511
19531
  "fieldName": "entityTypes"
19512
19532
  }
19513
19533
  },
19534
+ "UrnValidation": {
19535
+ "entityTypes": [
19536
+ "entityType"
19537
+ ],
19538
+ "exist": true,
19539
+ "strict": true
19540
+ },
19514
19541
  "Urn": "Urn",
19515
19542
  "urn_is_array": true,
19516
19543
  "type": {
@@ -2010,7 +2010,9 @@
2010
2010
  "fieldName": "glossaryTerms",
2011
2011
  "fieldType": "URN",
2012
2012
  "filterNameOverride": "Glossary Term",
2013
- "hasValuesFieldName": "hasGlossaryTerms"
2013
+ "hasValuesFieldName": "hasGlossaryTerms",
2014
+ "includeSystemModifiedAt": true,
2015
+ "systemModifiedAtFieldName": "termsModifiedAt"
2014
2016
  },
2015
2017
  "java": {
2016
2018
  "class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn"
@@ -221,7 +221,9 @@
221
221
  "/terms/*/urn": {
222
222
  "boostScore": 0.5,
223
223
  "fieldName": "editedFieldGlossaryTerms",
224
- "fieldType": "URN"
224
+ "fieldType": "URN",
225
+ "includeSystemModifiedAt": true,
226
+ "systemModifiedAtFieldName": "schemaFieldTermsModifiedAt"
225
227
  }
226
228
  },
227
229
  "type": [
@@ -254,7 +256,9 @@
254
256
  "fieldName": "glossaryTerms",
255
257
  "fieldType": "URN",
256
258
  "filterNameOverride": "Glossary Term",
257
- "hasValuesFieldName": "hasGlossaryTerms"
259
+ "hasValuesFieldName": "hasGlossaryTerms",
260
+ "includeSystemModifiedAt": true,
261
+ "systemModifiedAtFieldName": "termsModifiedAt"
258
262
  },
259
263
  "java": {
260
264
  "class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn"
@@ -31,6 +31,12 @@
31
31
  "namespace": "com.linkedin.pegasus2avro.businessattribute",
32
32
  "fields": [
33
33
  {
34
+ "Searchable": {
35
+ "fieldName": "schemaFieldBusinessAttribute",
36
+ "includeSystemModifiedAt": true,
37
+ "queryByDefault": false,
38
+ "systemModifiedAtFieldName": "schemaFieldBusinessAttributeModifiedAt"
39
+ },
34
40
  "java": {
35
41
  "class": "com.linkedin.pegasus2avro.common.urn.BusinessAttributeUrn"
36
42
  },
@@ -39,6 +39,7 @@
39
39
  },
40
40
  {
41
41
  "Searchable": {
42
+ "boostScore": 10.0,
42
43
  "enableAutocomplete": true,
43
44
  "fieldNameAliases": [
44
45
  "_entityName"
@@ -303,7 +303,9 @@
303
303
  "/terms/*/urn": {
304
304
  "boostScore": 0.5,
305
305
  "fieldName": "editedFieldGlossaryTerms",
306
- "fieldType": "URN"
306
+ "fieldType": "URN",
307
+ "includeSystemModifiedAt": true,
308
+ "systemModifiedAtFieldName": "schemaFieldTermsModifiedAt"
307
309
  }
308
310
  },
309
311
  "type": [
@@ -336,7 +338,9 @@
336
338
  "fieldName": "glossaryTerms",
337
339
  "fieldType": "URN",
338
340
  "filterNameOverride": "Glossary Term",
339
- "hasValuesFieldName": "hasGlossaryTerms"
341
+ "hasValuesFieldName": "hasGlossaryTerms",
342
+ "includeSystemModifiedAt": true,
343
+ "systemModifiedAtFieldName": "termsModifiedAt"
340
344
  },
341
345
  "java": {
342
346
  "class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn"
@@ -26,7 +26,9 @@
26
26
  "fieldName": "glossaryTerms",
27
27
  "fieldType": "URN",
28
28
  "filterNameOverride": "Glossary Term",
29
- "hasValuesFieldName": "hasGlossaryTerms"
29
+ "hasValuesFieldName": "hasGlossaryTerms",
30
+ "includeSystemModifiedAt": true,
31
+ "systemModifiedAtFieldName": "termsModifiedAt"
30
32
  },
31
33
  "java": {
32
34
  "class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn"
@@ -553,7 +553,9 @@
553
553
  "fieldName": "glossaryTerms",
554
554
  "fieldType": "URN",
555
555
  "filterNameOverride": "Glossary Term",
556
- "hasValuesFieldName": "hasGlossaryTerms"
556
+ "hasValuesFieldName": "hasGlossaryTerms",
557
+ "includeSystemModifiedAt": true,
558
+ "systemModifiedAtFieldName": "termsModifiedAt"
557
559
  },
558
560
  "java": {
559
561
  "class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn"
@@ -183,6 +183,7 @@
183
183
  },
184
184
  {
185
185
  "Searchable": {
186
+ "boostScore": 10.0,
186
187
  "enableAutocomplete": true,
187
188
  "fieldNameAliases": [
188
189
  "_entityName"
@@ -994,7 +995,9 @@
994
995
  "fieldName": "glossaryTerms",
995
996
  "fieldType": "URN",
996
997
  "filterNameOverride": "Glossary Term",
997
- "hasValuesFieldName": "hasGlossaryTerms"
998
+ "hasValuesFieldName": "hasGlossaryTerms",
999
+ "includeSystemModifiedAt": true,
1000
+ "systemModifiedAtFieldName": "termsModifiedAt"
998
1001
  },
999
1002
  "java": {
1000
1003
  "class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn"
@@ -4644,7 +4647,9 @@
4644
4647
  "/terms/*/urn": {
4645
4648
  "boostScore": 0.5,
4646
4649
  "fieldName": "editedFieldGlossaryTerms",
4647
- "fieldType": "URN"
4650
+ "fieldType": "URN",
4651
+ "includeSystemModifiedAt": true,
4652
+ "systemModifiedAtFieldName": "schemaFieldTermsModifiedAt"
4648
4653
  }
4649
4654
  },
4650
4655
  "type": [
@@ -777,7 +777,9 @@
777
777
  "fieldName": "glossaryTerms",
778
778
  "fieldType": "URN",
779
779
  "filterNameOverride": "Glossary Term",
780
- "hasValuesFieldName": "hasGlossaryTerms"
780
+ "hasValuesFieldName": "hasGlossaryTerms",
781
+ "includeSystemModifiedAt": true,
782
+ "systemModifiedAtFieldName": "termsModifiedAt"
781
783
  },
782
784
  "java": {
783
785
  "class": "com.linkedin.pegasus2avro.common.urn.GlossaryTermUrn"
@@ -23,6 +23,13 @@
23
23
  "doc": "The display name of the property. This is the name that will be shown in the UI and can be used to look up the property id."
24
24
  },
25
25
  {
26
+ "UrnValidation": {
27
+ "entityTypes": [
28
+ "dataType"
29
+ ],
30
+ "exist": true,
31
+ "strict": true
32
+ },
26
33
  "java": {
27
34
  "class": "com.linkedin.pegasus2avro.common.urn.Urn"
28
35
  },
@@ -111,6 +118,13 @@
111
118
  "fieldName": "entityTypes"
112
119
  }
113
120
  },
121
+ "UrnValidation": {
122
+ "entityTypes": [
123
+ "entityType"
124
+ ],
125
+ "exist": true,
126
+ "strict": true
127
+ },
114
128
  "type": {
115
129
  "type": "array",
116
130
  "items": "string"
@@ -13,7 +13,7 @@ from datahub.ingestion.graph.client import DataHubGraph
13
13
  from datahub.ingestion.source.bigquery_v2.bigquery_audit import BigqueryTableIdentifier
14
14
  from datahub.metadata.schema_classes import SchemaFieldClass, SchemaMetadataClass
15
15
  from datahub.metadata.urns import DataPlatformUrn
16
- from datahub.sql_parsing._models import _TableName as _TableName # noqa: I250
16
+ from datahub.sql_parsing._models import _TableName as _TableName
17
17
  from datahub.sql_parsing.sql_parsing_common import PLATFORMS_WITH_CASE_SENSITIVE_TABLES
18
18
  from datahub.utilities.file_backed_collections import ConnectionWrapper, FileBackedDict
19
19
  from datahub.utilities.urns.field_paths import get_simple_field_path_from_v2_field_path
@@ -473,7 +473,7 @@ def _create_table_ddl_cll(
473
473
  return column_lineage
474
474
 
475
475
 
476
- def _select_statement_cll( # noqa: C901
476
+ def _select_statement_cll(
477
477
  statement: _SupportedColumnLineageTypes,
478
478
  dialect: sqlglot.Dialect,
479
479
  root_scope: sqlglot.optimizer.Scope,
@@ -293,9 +293,9 @@ def is_client_server_compatible(client: VersionStats, server: VersionStats) -> i
293
293
  return server.version.micro - client.version.micro
294
294
 
295
295
 
296
- def _maybe_print_upgrade_message( # noqa: C901
296
+ def _maybe_print_upgrade_message(
297
297
  version_stats: Optional[DataHubVersionStats],
298
- ) -> None: # noqa: C901
298
+ ) -> None:
299
299
  days_before_cli_stale = 7
300
300
  days_before_quickstart_stale = 7
301
301
 
@@ -0,0 +1,106 @@
1
+ import json
2
+ import logging
3
+ from typing import Optional
4
+
5
+ import click
6
+
7
+ from datahub.configuration.common import ConfigModel
8
+ from datahub.configuration.config_loader import load_config_file
9
+ from datahub.emitter.mce_builder import datahub_guid
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def _make_ingestion_urn(name: str) -> str:
15
+ guid = datahub_guid(
16
+ {
17
+ "name": name,
18
+ }
19
+ )
20
+ return f"urn:li:dataHubIngestionSource:deploy-{guid}"
21
+
22
+
23
+ class DeployOptions(ConfigModel):
24
+ name: str
25
+ schedule: Optional[str] = None
26
+ time_zone: str = "UTC"
27
+ cli_version: Optional[str] = None
28
+ executor_id: str = "default"
29
+
30
+
31
+ def deploy_source_vars(
32
+ name: Optional[str],
33
+ config: str,
34
+ urn: Optional[str],
35
+ executor_id: str,
36
+ cli_version: Optional[str],
37
+ schedule: Optional[str],
38
+ time_zone: str,
39
+ extra_pip: Optional[str],
40
+ debug: bool = False,
41
+ ) -> dict:
42
+ pipeline_config = load_config_file(
43
+ config,
44
+ allow_stdin=True,
45
+ allow_remote=True,
46
+ resolve_env_vars=False,
47
+ )
48
+
49
+ deploy_options_raw = pipeline_config.pop("deployment", None)
50
+ if deploy_options_raw is not None:
51
+ deploy_options = DeployOptions.parse_obj(deploy_options_raw)
52
+
53
+ if name:
54
+ logger.info(f"Overriding deployment name {deploy_options.name} with {name}")
55
+ deploy_options.name = name
56
+ else:
57
+ if not name:
58
+ raise click.UsageError(
59
+ "Either --name must be set or deployment_name specified in the config"
60
+ )
61
+ deploy_options = DeployOptions(name=name)
62
+
63
+ # Use remaining CLI args to override deploy_options
64
+ if schedule:
65
+ deploy_options.schedule = schedule
66
+ if time_zone:
67
+ deploy_options.time_zone = time_zone
68
+ if cli_version:
69
+ deploy_options.cli_version = cli_version
70
+ if executor_id:
71
+ deploy_options.executor_id = executor_id
72
+
73
+ logger.info(f"Using {repr(deploy_options)}")
74
+
75
+ if not urn:
76
+ # When urn/name is not specified, we will generate a unique urn based on the deployment name.
77
+ urn = _make_ingestion_urn(deploy_options.name)
78
+ logger.info(f"Using recipe urn: {urn}")
79
+
80
+ variables: dict = {
81
+ "urn": urn,
82
+ "input": {
83
+ "name": deploy_options.name,
84
+ "type": pipeline_config["source"]["type"],
85
+ "config": {
86
+ "recipe": json.dumps(pipeline_config),
87
+ "executorId": deploy_options.executor_id,
88
+ "debugMode": debug,
89
+ "version": deploy_options.cli_version,
90
+ },
91
+ },
92
+ }
93
+
94
+ if deploy_options.schedule is not None:
95
+ variables["input"]["schedule"] = {
96
+ "interval": deploy_options.schedule,
97
+ "timezone": deploy_options.time_zone,
98
+ }
99
+ if extra_pip is not None:
100
+ extra_args_list = (
101
+ variables.get("input", {}).get("config", {}).get("extraArgs", [])
102
+ )
103
+ extra_args_list.append({"key": "extra_pip_requirements", "value": extra_pip})
104
+ variables["input"]["config"]["extraArgs"] = extra_args_list
105
+
106
+ return variables
@@ -171,7 +171,7 @@ class OperationProcessor:
171
171
  self.owner_source_type = owner_source_type
172
172
  self.match_nested_props = match_nested_props
173
173
 
174
- def process(self, raw_props: Mapping[str, Any]) -> Dict[str, Any]: # noqa: C901
174
+ def process(self, raw_props: Mapping[str, Any]) -> Dict[str, Any]:
175
175
  # Defining the following local variables -
176
176
  # operations_map - the final resulting map when operations are processed.
177
177
  # Against each operation the values to be applied are stored.