acryl-datahub 1.2.0.8rc2__py3-none-any.whl → 1.2.0.8rc4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,7 +1,7 @@
1
- acryl_datahub-1.2.0.8rc2.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.2.0.8rc4.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=61ZxWUlQVKM0CF2BBOi-9OpFZENqh_B4oxFCZYQSJBc,323
4
+ datahub/_version.py,sha256=uu8Uaqh56VQOGHwbtlo282g9v9lvEV1QwtbdKz41OI0,323
5
5
  datahub/entrypoints.py,sha256=9Qf-37rNnTzbGlx8S75OCDazIclFp6zWNcCEL1zCZto,9015
6
6
  datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -148,10 +148,10 @@ datahub/ingestion/api/incremental_properties_helper.py,sha256=KzdxdrQtaMV2XMHfPs
148
148
  datahub/ingestion/api/ingestion_job_checkpointing_provider_base.py,sha256=3lLdkkxVqE9MVc26cdXImPeWy16az5BwgcorWxeBV50,1759
149
149
  datahub/ingestion/api/pipeline_run_listener.py,sha256=5uBP__LbMQxJ2utlf07cIzQINqPbUOKiZyOJta6a0og,713
150
150
  datahub/ingestion/api/registry.py,sha256=LbdZr89465Lj7ptQRVB4vI1JR1igWABvQFj9-WX63bI,7454
151
- datahub/ingestion/api/report.py,sha256=OuVZAgNkzSGkKhpOhpqebd9_bEsBCTeoWR1VcanPeD4,18509
151
+ datahub/ingestion/api/report.py,sha256=1w63Y2yN49IaDLZaIvXEjRU3yVb_9t3wzymSI-fumZM,18959
152
152
  datahub/ingestion/api/report_helpers.py,sha256=WbUC1kQeaKqIagGV3XzfPmPs7slAT1mfNY4og2BH2A8,994
153
- datahub/ingestion/api/sink.py,sha256=GZt48PV56FAhNoma-V5EwwRZvezhb40YH_zprm8_Yo0,4961
154
- datahub/ingestion/api/source.py,sha256=uf0fNbiOy0bS_aKFOcNv6NvuZe0LSDIDdNza9hraP7s,21857
153
+ datahub/ingestion/api/sink.py,sha256=nfal7nsYY1AT2WQRjqO48uAHitpjax7TsRVzYXnqbeM,4918
154
+ datahub/ingestion/api/source.py,sha256=JASs7WygVB6g-tcwtchaftzv3lNtlVM31lEa242pn44,21853
155
155
  datahub/ingestion/api/source_helpers.py,sha256=XT9y5HgfVeF52jrX39vlLn1SdXpLVyT2Su8oGNsddYo,21148
156
156
  datahub/ingestion/api/transform.py,sha256=X0GpjMJzYkLuZx8MTWxH50cWGm9rGsnn3k188mmC8J8,582
157
157
  datahub/ingestion/api/workunit.py,sha256=e8n8RfSjHZZm2R4ShNH0UuMtUkMjyqqM2j2t7oL74lo,6327
@@ -193,7 +193,7 @@ datahub/ingestion/reporting/file_reporter.py,sha256=tiWukmMxHrTQI3rOAumsq6lRlw8T
193
193
  datahub/ingestion/reporting/reporting_provider_registry.py,sha256=jTYSh3T4sensjnHQfPLiIcbA2dG8w0px9ghChAJjGdU,310
194
194
  datahub/ingestion/run/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
195
195
  datahub/ingestion/run/connection.py,sha256=mngNzr5aRLUDa5Izqxa0xkdDEqEqcDuacWSKIlkdvPc,1483
196
- datahub/ingestion/run/pipeline.py,sha256=xKjH5jUTCwJ-rCAnOEXLmZqVLKz_qxcX2JJxcO5AGmE,32046
196
+ datahub/ingestion/run/pipeline.py,sha256=WO9K4yUihmoL6crtilO_cpRAeE6yVa1jd2tzf6spb-A,32541
197
197
  datahub/ingestion/run/pipeline_config.py,sha256=joG1j9OlwJhb8zqv4TY6_FSzOaKOx6xsBu255A5lP8g,4101
198
198
  datahub/ingestion/run/sink_callback.py,sha256=xZAzaKkR0dcQP838pMJWsA52oaQXV5BiqXkpxEpJ_9U,2856
199
199
  datahub/ingestion/sink/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -201,7 +201,7 @@ datahub/ingestion/sink/blackhole.py,sha256=-jYcWo4i8q7312bCIoHrGr7nT9JdPvA7c4jvS
201
201
  datahub/ingestion/sink/console.py,sha256=TZfhA0Ec2eNCrMH7RRy2JOdUE-U-hkoIQrPm1CmKLQs,591
202
202
  datahub/ingestion/sink/datahub_kafka.py,sha256=_cjuXu5I6G0zJ2UK7hMbaKjMPZXeIwRMgm7CVeTiNtc,2578
203
203
  datahub/ingestion/sink/datahub_lite.py,sha256=7u2aWm7ENLshKHl-PkjJg6Mrw4bWs8sTfKIBz4mm8Ak,1879
204
- datahub/ingestion/sink/datahub_rest.py,sha256=QrtR-hJ6yljN1quXcjoUHdAmJueZclrFZFrhU7c4YJM,13563
204
+ datahub/ingestion/sink/datahub_rest.py,sha256=xw4JIaXOyFTLN3_wYOd3-dKv2ZwrcSxY_oXbeWpsXhs,13189
205
205
  datahub/ingestion/sink/file.py,sha256=SxXJPJpkIGoaqRjCcSmj2ZE3xE4rLlBABBGwpTj5LWI,3271
206
206
  datahub/ingestion/sink/sink_registry.py,sha256=JRBWx8qEYg0ubSTyhqwgSWctgxwyp6fva9GoN2LwBao,490
207
207
  datahub/ingestion/source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -278,8 +278,8 @@ datahub/ingestion/source/bigquery_v2/queries_extractor.py,sha256=_5cAXVU8b8T_nAP
278
278
  datahub/ingestion/source/bigquery_v2/usage.py,sha256=A9c-ofclaRk0NSnc4IRaqJYqMPv6ecCld_TPy3V2qFs,40748
279
279
  datahub/ingestion/source/cassandra/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
280
280
  datahub/ingestion/source/cassandra/cassandra.py,sha256=pNy61Z4kTqL_wGcWIYee5fnZiuJDseDcRcQwsxeAssk,14487
281
- datahub/ingestion/source/cassandra/cassandra_api.py,sha256=b7MApc3_tEfHoj-6ub6snkcv_DweL1wi_TGJjAA1-yU,13516
282
- datahub/ingestion/source/cassandra/cassandra_config.py,sha256=Ga9915cDZukR5-u2tMNx5Jkf8eza2oAE5YS_sQIVEVQ,4222
281
+ datahub/ingestion/source/cassandra/cassandra_api.py,sha256=wCJx-1ZByGMgPkORBO420sGucKkxXXE4pOLWXxdpMIw,14222
282
+ datahub/ingestion/source/cassandra/cassandra_config.py,sha256=w9LBiT8XrGvXlrvpcAU_xm82GiE4nUfEg-VKIX6MRMY,4446
283
283
  datahub/ingestion/source/cassandra/cassandra_profiling.py,sha256=FdgPK_8s8otTOJDqNM4rpF6Mn4lFWbnjTaKEChzn2iE,11011
284
284
  datahub/ingestion/source/cassandra/cassandra_utils.py,sha256=j-LidYkaCTmGnpUVNLsax_c3z32PsQbsbHeYojygd1s,5105
285
285
  datahub/ingestion/source/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -333,7 +333,7 @@ datahub/ingestion/source/excel/report.py,sha256=oEkeI8J6is7zB9iz4RqASu_-Q5xl36lA
333
333
  datahub/ingestion/source/excel/source.py,sha256=w_vOz4UD7BcXBBDKoo81_6-QFeOPITuXqkfjIMHCQj4,23827
334
334
  datahub/ingestion/source/excel/util.py,sha256=YYmadYuCiT-4_MfQM0YSE7wuDcE0k8o2KrlOKM9Z6eI,406
335
335
  datahub/ingestion/source/fivetran/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
336
- datahub/ingestion/source/fivetran/config.py,sha256=00jc9srBZUQ18RSSCnHiOoJ4_F6I_rp--z-V7d9wXNY,9031
336
+ datahub/ingestion/source/fivetran/config.py,sha256=A_ZFZ1PAfBeSY2k7wU-ixCFg1FOO7rgPGTd2wxKqbEg,9032
337
337
  datahub/ingestion/source/fivetran/data_classes.py,sha256=ecdUJH5BEze0yv-uFpKWPNaNmV1gORDA2XMFk0zhcBw,595
338
338
  datahub/ingestion/source/fivetran/fivetran.py,sha256=s8wcECtmuugUoZ0Zdthq0SIPpTLvziZXuhhUX9bJ5N4,14492
339
339
  datahub/ingestion/source/fivetran/fivetran_log_api.py,sha256=PNzuykiiFTU8FhBIfUbW6udURZpz_35aq7rfffbpIfA,13010
@@ -366,7 +366,7 @@ datahub/ingestion/source/hex/mapper.py,sha256=IyDAE-TzZUji3ICI_9gkYC3dQN3gl6kERR
366
366
  datahub/ingestion/source/hex/model.py,sha256=eri4aRo1eXcE2SWjzCnPFMhzPTiJ8w8zC4GN7Lgpr74,1864
367
367
  datahub/ingestion/source/hex/query_fetcher.py,sha256=r9UvF_qwswkRlNY7AI8p46eqAYSxVtjVE2e7eO4XagA,13384
368
368
  datahub/ingestion/source/iceberg/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
369
- datahub/ingestion/source/iceberg/iceberg.py,sha256=2E3mhvsIDSHDUd1Prb0nlZnGIsQLIuwNeFRxJPYyS-0,37042
369
+ datahub/ingestion/source/iceberg/iceberg.py,sha256=KM9IDdWxW2VwOv0Iv3sMV6a60FNkgDEMS8vZaFHTOyA,37040
370
370
  datahub/ingestion/source/iceberg/iceberg_common.py,sha256=CD_yHQ_wEgivyLQUTRO9BZJB29S7j5fUVllki-BPwUU,12292
371
371
  datahub/ingestion/source/iceberg/iceberg_profiler.py,sha256=9iwp2vpQTi4OMbIKoDZV5lAdvjMR0ls6Llpck9grJIE,9875
372
372
  datahub/ingestion/source/identity/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -640,7 +640,7 @@ datahub/metadata/schema.avsc,sha256=HluHCVmYg7RpOaw9xUMigEJBxlHF5WLdNcqVBKPeoOU,
640
640
  datahub/metadata/schema_classes.py,sha256=tPT8iHCak4IsZi_oL0nirbPpI8ETTPTZzapqLRpeKU4,1326
641
641
  datahub/metadata/urns.py,sha256=nfrCTExR-k2P9w272WVtWSN3xW1VUJngPwP3xnvULjU,1217
642
642
  datahub/metadata/_urns/__init__.py,sha256=cOF3GHMDgPhmbLKbN02NPpuLGHSu0qNgQyBRv08eqF0,243
643
- datahub/metadata/_urns/urn_defs.py,sha256=Rl2wjTuHxpOk4rDAfqUHeBxCxTFA2JGFg1oCc9yR10s,143154
643
+ datahub/metadata/_urns/urn_defs.py,sha256=tBlEg7f0jaIWVQfpgzTe2gjkthP4janfAwJO7yx6-cw,143257
644
644
  datahub/metadata/com/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
645
645
  datahub/metadata/com/linkedin/__init__.py,sha256=gsAIuTxzfJdI7a9ybZlgMIHMAYksM1SxGxXjtySgKSc,202
646
646
  datahub/metadata/com/linkedin/events/__init__.py,sha256=s_dR0plZF-rOxxIbE8ojekJqwiHzl2WYR-Z3kW6kKS0,298
@@ -1093,7 +1093,7 @@ datahub/utilities/urns/field_paths.py,sha256=ra-o_fMGkBRLgzMewAJN5-HqAyo3PIpXQ0K
1093
1093
  datahub/utilities/urns/notebook_urn.py,sha256=CHqGrV45ReVODlFx7js2WUxjcXxt8B63-xsBZpujmtY,73
1094
1094
  datahub/utilities/urns/structured_properties_urn.py,sha256=fjA1Ysg7IQSly8IVYx1R8HnwnojQz6jZWbqfk_XVvno,271
1095
1095
  datahub/utilities/urns/tag_urn.py,sha256=MqEJdIaCnAyjYe_8VdNnUjOVV4TS8xMlv4pRsy8wwXY,63
1096
- datahub/utilities/urns/urn.py,sha256=B4nYxiFT8s5DLA2NJsWg0KoiUDp9UWg1nvL0j7Sx-h8,218
1096
+ datahub/utilities/urns/urn.py,sha256=CDtOtVccQW2yj5MBNtR3aO1yEInTnAmSDMbGbjheGJY,1279
1097
1097
  datahub/utilities/urns/urn_iter.py,sha256=3LtmobKksKFbnNCUCjFxm8qqFLCPPHUW_Q3zc4PE5nY,4736
1098
1098
  datahub_provider/__init__.py,sha256=qyPbz00f8pgtLVyqHG1TSnTqBfXb1x-kUH10zOLoq2U,53
1099
1099
  datahub_provider/_airflow_compat.py,sha256=unmFDGP57xKHPIhkdw_qo1vW1GAYpZ1yCvCrkMdGJXM,98
@@ -1114,8 +1114,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1114
1114
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1115
1115
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1116
1116
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1117
- acryl_datahub-1.2.0.8rc2.dist-info/METADATA,sha256=Q8mmqp92zb_C5PbYaI7zQiAwkw9QrX0FUiCAGxtbzzg,186651
1118
- acryl_datahub-1.2.0.8rc2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1119
- acryl_datahub-1.2.0.8rc2.dist-info/entry_points.txt,sha256=qopCAD6qrsijaZ9mTw3UlPCKsE00C3t9MbkkWow7pi4,9943
1120
- acryl_datahub-1.2.0.8rc2.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1121
- acryl_datahub-1.2.0.8rc2.dist-info/RECORD,,
1117
+ acryl_datahub-1.2.0.8rc4.dist-info/METADATA,sha256=w5nzJ9UysAV5hpiguzwq09DBTlLJwLyRbSj3sEEWa8Q,186651
1118
+ acryl_datahub-1.2.0.8rc4.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1119
+ acryl_datahub-1.2.0.8rc4.dist-info/entry_points.txt,sha256=qopCAD6qrsijaZ9mTw3UlPCKsE00C3t9MbkkWow7pi4,9943
1120
+ acryl_datahub-1.2.0.8rc4.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1121
+ acryl_datahub-1.2.0.8rc4.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.2.0.8rc2"
3
+ __version__ = "1.2.0.8rc4"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -29,6 +29,7 @@ from datahub.metadata.schema_classes import (
29
29
  )
30
30
  from datahub.utilities.file_backed_collections import FileBackedDict
31
31
  from datahub.utilities.lossy_collections import LossyList
32
+ from datahub.utilities.urns.urn import guess_platform_name
32
33
 
33
34
  logger = logging.getLogger(__name__)
34
35
  LogLevel = Literal["ERROR", "WARNING", "INFO", "DEBUG"]
@@ -41,6 +42,15 @@ class SupportsAsObj(Protocol):
41
42
 
42
43
  @dataclass
43
44
  class Report(SupportsAsObj):
45
+ def __post_init__(self) -> None:
46
+ self.platform: Optional[str] = None
47
+
48
+ def set_platform(self, platform: str) -> None:
49
+ self.platform = platform
50
+
51
+ def get_platform(self) -> Optional[str]:
52
+ return self.platform
53
+
44
54
  @staticmethod
45
55
  def to_str(some_val: Any) -> str:
46
56
  if isinstance(some_val, Enum):
@@ -213,6 +223,7 @@ class ExamplesReport(Report, Closeable):
213
223
  _lineage_aspects_seen: Set[str] = field(default_factory=set)
214
224
 
215
225
  def __post_init__(self) -> None:
226
+ super().__post_init__()
216
227
  self._file_based_dict = FileBackedDict(
217
228
  tablename="urn_aspects",
218
229
  extra_columns={
@@ -347,6 +358,9 @@ class ExamplesReport(Report, Closeable):
347
358
  aspectName: str,
348
359
  mcp: Union[MetadataChangeProposalClass, MetadataChangeProposalWrapper],
349
360
  ) -> None:
361
+ platform_name = guess_platform_name(urn)
362
+ if platform_name != self.get_platform():
363
+ return
350
364
  if is_lineage_aspect(entityType, aspectName):
351
365
  self._lineage_aspects_seen.add(aspectName)
352
366
  has_fine_grained_lineage = self._has_fine_grained_lineage(mcp)
@@ -147,9 +147,6 @@ class Sink(Generic[SinkConfig, SinkReportType], Closeable, metaclass=ABCMeta):
147
147
  def close(self) -> None:
148
148
  pass
149
149
 
150
- def flush(self) -> None:
151
- pass
152
-
153
150
  def configured(self) -> str:
154
151
  """Override this method to output a human-readable and scrubbed version of the configured sink"""
155
152
  return ""
@@ -531,9 +531,9 @@ class Source(Closeable, metaclass=ABCMeta):
531
531
  auto_status_aspect,
532
532
  auto_materialize_referenced_tags_terms,
533
533
  partial(
534
- auto_fix_duplicate_schema_field_paths, platform=self._infer_platform()
534
+ auto_fix_duplicate_schema_field_paths, platform=self.infer_platform()
535
535
  ),
536
- partial(auto_fix_empty_field_paths, platform=self._infer_platform()),
536
+ partial(auto_fix_empty_field_paths, platform=self.infer_platform()),
537
537
  browse_path_processor,
538
538
  partial(auto_workunit_reporter, self.get_report()),
539
539
  auto_patch_last_modified,
@@ -583,7 +583,7 @@ class Source(Closeable, metaclass=ABCMeta):
583
583
  def close(self) -> None:
584
584
  self.get_report().close()
585
585
 
586
- def _infer_platform(self) -> Optional[str]:
586
+ def infer_platform(self) -> Optional[str]:
587
587
  config = self.get_config()
588
588
  platform = (
589
589
  getattr(config, "platform_name", None)
@@ -598,7 +598,7 @@ class Source(Closeable, metaclass=ABCMeta):
598
598
  def _get_browse_path_processor(self, dry_run: bool) -> MetadataWorkUnitProcessor:
599
599
  config = self.get_config()
600
600
 
601
- platform = self._infer_platform()
601
+ platform = self.infer_platform()
602
602
  env = getattr(config, "env", None)
603
603
  browse_path_drop_dirs = [
604
604
  platform,
@@ -373,11 +373,13 @@ class Pipeline:
373
373
  )
374
374
  current_version = version_stats.client.current.version
375
375
 
376
- logger.debug(f"""
376
+ logger.debug(
377
+ f"""
377
378
  client_version: {current_version}
378
379
  server_default_version: {server_default_version}
379
380
  server_default_cli_ahead: True
380
- """)
381
+ """
382
+ )
381
383
 
382
384
  self.source.get_report().warning(
383
385
  title="Server default CLI version is ahead of CLI version",
@@ -440,7 +442,19 @@ class Pipeline:
440
442
  return True
441
443
  return False
442
444
 
445
+ def _set_platform(self) -> None:
446
+ platform = self.source.infer_platform()
447
+ if platform:
448
+ self.source.get_report().set_platform(platform)
449
+ else:
450
+ self.source.get_report().warning(
451
+ message="Platform not found",
452
+ title="Platform not found",
453
+ context="Platform not found",
454
+ )
455
+
443
456
  def run(self) -> None:
457
+ self._set_platform()
444
458
  self._warn_old_cli_version()
445
459
  with self.exit_stack, self.inner_exit_stack:
446
460
  if self.config.flags.generate_memory_profiles:
@@ -548,8 +562,9 @@ class Pipeline:
548
562
  self._handle_uncaught_pipeline_exception(exc)
549
563
  finally:
550
564
  clear_global_warnings()
551
- self.sink.flush()
552
- self._notify_reporters_on_ingestion_completion()
565
+
566
+ # This can't be in the finally part because this should happen after context manager exists
567
+ self._notify_reporters_on_ingestion_completion()
553
568
 
554
569
  def transform(self, records: Iterable[RecordEnvelope]) -> Iterable[RecordEnvelope]:
555
570
  """
@@ -5,7 +5,6 @@ import functools
5
5
  import logging
6
6
  import os
7
7
  import threading
8
- import time
9
8
  import uuid
10
9
  from enum import auto
11
10
  from typing import List, Optional, Tuple, Union
@@ -349,17 +348,6 @@ class DatahubRestSink(Sink[DatahubRestSinkConfig, DataHubRestSinkReport]):
349
348
  RecordEnvelope(item, metadata={}), NoopWriteCallback()
350
349
  )
351
350
 
352
- def flush(self) -> None:
353
- """Wait for all pending records to be written."""
354
- i = 0
355
- while self.report.pending_requests > 0:
356
- time.sleep(0.1)
357
- i += 1
358
- if i % 1000 == 0:
359
- logger.info(
360
- f"Waiting for {self.report.pending_requests} records to be written"
361
- )
362
-
363
351
  def close(self):
364
352
  with self.report.main_thread_blocking_timer:
365
353
  self.executor.shutdown()
@@ -132,7 +132,23 @@ class CassandraAPI:
132
132
 
133
133
  ssl_context = None
134
134
  if self.config.ssl_ca_certs:
135
- ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
135
+ # Map SSL version string to ssl module constant
136
+ ssl_version_map = {
137
+ "TLS_CLIENT": ssl.PROTOCOL_TLS_CLIENT,
138
+ "TLSv1": ssl.PROTOCOL_TLSv1,
139
+ "TLSv1_1": ssl.PROTOCOL_TLSv1_1,
140
+ "TLSv1_2": ssl.PROTOCOL_TLSv1_2,
141
+ "TLSv1_3": ssl.PROTOCOL_TLSv1_2, # Python's ssl module uses TLSv1_2 for TLS 1.3
142
+ }
143
+
144
+ ssl_protocol = (
145
+ ssl_version_map.get(
146
+ self.config.ssl_version, ssl.PROTOCOL_TLS_CLIENT
147
+ )
148
+ if self.config.ssl_version
149
+ else ssl.PROTOCOL_TLS_CLIENT
150
+ )
151
+ ssl_context = ssl.SSLContext(ssl_protocol)
136
152
  ssl_context.load_verify_locations(self.config.ssl_ca_certs)
137
153
  if self.config.ssl_certfile and self.config.ssl_keyfile:
138
154
  ssl_context.load_cert_chain(
@@ -94,6 +94,11 @@ class CassandraSourceConfig(
94
94
  description="Path to the SSL key file for SSL connections.",
95
95
  )
96
96
 
97
+ ssl_version: Optional[str] = Field(
98
+ default="TLS_CLIENT",
99
+ description="SSL protocol version to use for connections. Options: TLS_CLIENT, TLSv1, TLSv1_1, TLSv1_2, TLSv1_3. Defaults to TLS_CLIENT.",
100
+ )
101
+
97
102
  keyspace_pattern: AllowDenyPattern = Field(
98
103
  default=AllowDenyPattern.allow_all(),
99
104
  description="Regex patterns to filter keyspaces for ingestion.",
@@ -194,7 +194,7 @@ class FivetranSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMixin
194
194
 
195
195
  # Configuration for stateful ingestion
196
196
  stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = pydantic.Field(
197
- default=None, description="Airbyte Stateful Ingestion Config."
197
+ default=None, description="Fivetran Stateful Ingestion Config."
198
198
  )
199
199
 
200
200
  # Fivetran connector all sources to platform instance mapping
@@ -200,9 +200,9 @@ class IcebergSource(StatefulIngestionSourceBase):
200
200
  auto_lowercase_dataset_urns,
201
201
  auto_materialize_referenced_tags_terms,
202
202
  partial(
203
- auto_fix_duplicate_schema_field_paths, platform=self._infer_platform()
203
+ auto_fix_duplicate_schema_field_paths, platform=self.infer_platform()
204
204
  ),
205
- partial(auto_fix_empty_field_paths, platform=self._infer_platform()),
205
+ partial(auto_fix_empty_field_paths, platform=self.infer_platform()),
206
206
  partial(auto_workunit_reporter, self.get_report()),
207
207
  auto_patch_last_modified,
208
208
  EnsureAspectSizeProcessor(self.get_report()).ensure_aspect_size,
@@ -2904,6 +2904,10 @@ class DataJobUrn(_SpecificUrn):
2904
2904
  def get_data_flow_urn(self) -> "DataFlowUrn":
2905
2905
  return DataFlowUrn.from_string(self.flow)
2906
2906
 
2907
+ @property
2908
+ def orchestrator(self) -> str:
2909
+ return self.get_data_flow_urn().orchestrator
2910
+
2907
2911
  @deprecated(reason="Use .job_id instead")
2908
2912
  def get_job_id(self) -> str:
2909
2913
  return self.job_id
@@ -1,8 +1,47 @@
1
- from datahub.metadata.urns import Urn
1
+ from typing import Optional
2
2
 
3
- __all__ = ["Urn", "guess_entity_type"]
3
+ from datahub.metadata.urns import (
4
+ DataPlatformUrn,
5
+ Urn,
6
+ )
7
+
8
+ __all__ = ["Urn", "guess_entity_type", "guess_platform_name"]
4
9
 
5
10
 
6
11
  def guess_entity_type(urn: str) -> str:
7
12
  assert urn.startswith("urn:li:"), "urns must start with urn:li:"
8
13
  return urn.split(":")[2]
14
+
15
+
16
+ def guess_platform_name(urn: str) -> Optional[str]:
17
+ """Extract platform from URN using a mapping dictionary."""
18
+ urn_obj = Urn.from_string(urn)
19
+
20
+ try:
21
+ platform = None
22
+ try:
23
+ platform = urn_obj.platform # type: ignore[attr-defined]
24
+ platform_name = DataPlatformUrn.from_string(
25
+ platform
26
+ ).get_entity_id_as_string()
27
+ return platform_name
28
+ except AttributeError:
29
+ pass
30
+ try:
31
+ return urn_obj.orchestrator # type: ignore[attr-defined]
32
+ except AttributeError:
33
+ pass
34
+ try:
35
+ return urn_obj.dashboard_tool # type: ignore[attr-defined]
36
+ except AttributeError:
37
+ pass
38
+ try:
39
+ return urn_obj.ml_model_tool # type: ignore[attr-defined]
40
+ except AttributeError:
41
+ pass
42
+
43
+ if platform is None:
44
+ return None
45
+ except AttributeError:
46
+ pass
47
+ return None