acryl-datahub 1.1.0rc4__py3-none-any.whl → 1.1.1rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -0,0 +1,644 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+ # Add imports for source customization
4
+ from typing import Any, Callable, Dict, Optional, Type, TypeVar
5
+ from urllib.parse import unquote
6
+
7
+ # Don't import TableData at the module level to avoid circular imports
8
+ # from datahub.ingestion.source.s3.source import TableData
9
+
10
+ T = TypeVar("T")
11
+
12
+
13
+ class ObjectStoreInterface(ABC):
14
+ """
15
+ Abstract interface for object store operations.
16
+
17
+ This interface defines the operations that any object store connector
18
+ (S3, GCS, ABS, etc.) should implement to provide a consistent API.
19
+ """
20
+
21
+ @classmethod
22
+ @abstractmethod
23
+ def is_uri(cls, uri: str) -> bool:
24
+ """
25
+ Check if the given URI is for this object store.
26
+
27
+ Args:
28
+ uri: The URI to check
29
+
30
+ Returns:
31
+ True if the URI is for this object store, False otherwise
32
+ """
33
+ pass
34
+
35
+ @classmethod
36
+ @abstractmethod
37
+ def get_prefix(cls, uri: str) -> Optional[str]:
38
+ """
39
+ Get the prefix for this object store URI (e.g., 's3://', 'gs://').
40
+
41
+ Args:
42
+ uri: The URI to get the prefix from
43
+
44
+ Returns:
45
+ The prefix if the URI starts with it, None otherwise
46
+ """
47
+ pass
48
+
49
+ @classmethod
50
+ @abstractmethod
51
+ def strip_prefix(cls, uri: str) -> str:
52
+ """
53
+ Remove the object store prefix from the URI.
54
+
55
+ Args:
56
+ uri: The URI to strip the prefix from
57
+
58
+ Returns:
59
+ The URI without the prefix
60
+
61
+ Raises:
62
+ ValueError: If the URI does not start with the expected prefix
63
+ """
64
+ pass
65
+
66
+ @classmethod
67
+ @abstractmethod
68
+ def get_bucket_name(cls, uri: str) -> str:
69
+ """
70
+ Get the bucket name from the URI.
71
+
72
+ Args:
73
+ uri: The URI to get the bucket name from
74
+
75
+ Returns:
76
+ The bucket name
77
+
78
+ Raises:
79
+ ValueError: If the URI is not valid for this object store
80
+ """
81
+ pass
82
+
83
+ @classmethod
84
+ @abstractmethod
85
+ def get_object_key(cls, uri: str) -> str:
86
+ """
87
+ Get the object key/path (excluding the bucket) from the URI.
88
+
89
+ Args:
90
+ uri: The URI to get the object key from
91
+
92
+ Returns:
93
+ The object key
94
+
95
+ Raises:
96
+ ValueError: If the URI is not valid for this object store
97
+ """
98
+ pass
99
+
100
+ @classmethod
101
+ def get_object_store_bucket_name(cls, uri: str) -> str:
102
+ """
103
+ Get the bucket name from the URI, handling foreign URIs if supported.
104
+
105
+ The default implementation just calls get_bucket_name, but subclasses
106
+ can override this to handle URIs from other object stores.
107
+
108
+ Args:
109
+ uri: The URI to get the bucket name from
110
+
111
+ Returns:
112
+ The bucket name
113
+
114
+ Raises:
115
+ ValueError: If the URI is not supported
116
+ """
117
+ return cls.get_bucket_name(uri)
118
+
119
+
120
+ class S3ObjectStore(ObjectStoreInterface):
121
+ """Implementation of ObjectStoreInterface for Amazon S3."""
122
+
123
+ PREFIXES = ["s3://", "s3n://", "s3a://"]
124
+
125
+ @classmethod
126
+ def is_uri(cls, uri: str) -> bool:
127
+ return any(uri.startswith(prefix) for prefix in cls.PREFIXES)
128
+
129
+ @classmethod
130
+ def get_prefix(cls, uri: str) -> Optional[str]:
131
+ for prefix in cls.PREFIXES:
132
+ if uri.startswith(prefix):
133
+ return prefix
134
+ return None
135
+
136
+ @classmethod
137
+ def strip_prefix(cls, uri: str) -> str:
138
+ prefix = cls.get_prefix(uri)
139
+ if not prefix:
140
+ raise ValueError(
141
+ f"Not an S3 URI. Must start with one of the following prefixes: {str(cls.PREFIXES)}"
142
+ )
143
+ return uri[len(prefix) :]
144
+
145
+ @classmethod
146
+ def get_bucket_name(cls, uri: str) -> str:
147
+ if not cls.is_uri(uri):
148
+ raise ValueError(
149
+ f"Not an S3 URI. Must start with one of the following prefixes: {str(cls.PREFIXES)}"
150
+ )
151
+ return cls.strip_prefix(uri).split("/")[0]
152
+
153
+ @classmethod
154
+ def get_object_key(cls, uri: str) -> str:
155
+ if not cls.is_uri(uri):
156
+ raise ValueError(
157
+ f"Not an S3 URI. Must start with one of the following prefixes: {str(cls.PREFIXES)}"
158
+ )
159
+ parts = cls.strip_prefix(uri).split("/", 1)
160
+ if len(parts) < 2:
161
+ return ""
162
+ return parts[1]
163
+
164
+ @classmethod
165
+ def get_object_store_bucket_name(cls, uri: str) -> str:
166
+ """
167
+ Get the bucket name from an S3 URI.
168
+
169
+ Args:
170
+ uri: The URI to get the bucket name from
171
+
172
+ Returns:
173
+ The bucket name
174
+
175
+ Raises:
176
+ ValueError: If the URI is not an S3 URI
177
+ """
178
+ return cls.get_bucket_name(uri)
179
+
180
+
181
+ class GCSObjectStore(ObjectStoreInterface):
182
+ """Implementation of ObjectStoreInterface for Google Cloud Storage."""
183
+
184
+ PREFIX = "gs://"
185
+
186
+ @classmethod
187
+ def is_uri(cls, uri: str) -> bool:
188
+ return uri.startswith(cls.PREFIX)
189
+
190
+ @classmethod
191
+ def get_prefix(cls, uri: str) -> Optional[str]:
192
+ if uri.startswith(cls.PREFIX):
193
+ return cls.PREFIX
194
+ return None
195
+
196
+ @classmethod
197
+ def strip_prefix(cls, uri: str) -> str:
198
+ prefix = cls.get_prefix(uri)
199
+ if not prefix:
200
+ raise ValueError(f"Not a GCS URI. Must start with prefix: {cls.PREFIX}")
201
+ return uri[len(prefix) :]
202
+
203
+ @classmethod
204
+ def get_bucket_name(cls, uri: str) -> str:
205
+ if not cls.is_uri(uri):
206
+ raise ValueError(f"Not a GCS URI. Must start with prefix: {cls.PREFIX}")
207
+ return cls.strip_prefix(uri).split("/")[0]
208
+
209
+ @classmethod
210
+ def get_object_key(cls, uri: str) -> str:
211
+ if not cls.is_uri(uri):
212
+ raise ValueError(f"Not a GCS URI. Must start with prefix: {cls.PREFIX}")
213
+ parts = cls.strip_prefix(uri).split("/", 1)
214
+ if len(parts) < 2:
215
+ return ""
216
+ return parts[1]
217
+
218
+ @classmethod
219
+ def get_object_store_bucket_name(cls, uri: str) -> str:
220
+ """
221
+ Get the bucket name from a GCS URI.
222
+
223
+ Args:
224
+ uri: The URI to get the bucket name from
225
+
226
+ Returns:
227
+ The bucket name
228
+
229
+ Raises:
230
+ ValueError: If the URI is not a GCS URI
231
+ """
232
+ return cls.get_bucket_name(uri)
233
+
234
+
235
+ class ABSObjectStore(ObjectStoreInterface):
236
+ """Implementation of ObjectStoreInterface for Azure Blob Storage."""
237
+
238
+ PREFIX = "abfss://"
239
+
240
+ @classmethod
241
+ def is_uri(cls, uri: str) -> bool:
242
+ return uri.startswith(cls.PREFIX)
243
+
244
+ @classmethod
245
+ def get_prefix(cls, uri: str) -> Optional[str]:
246
+ if uri.startswith(cls.PREFIX):
247
+ return cls.PREFIX
248
+ return None
249
+
250
+ @classmethod
251
+ def strip_prefix(cls, uri: str) -> str:
252
+ prefix = cls.get_prefix(uri)
253
+ if not prefix:
254
+ raise ValueError(f"Not an ABS URI. Must start with prefix: {cls.PREFIX}")
255
+ return uri[len(prefix) :]
256
+
257
+ @classmethod
258
+ def get_bucket_name(cls, uri: str) -> str:
259
+ if not cls.is_uri(uri):
260
+ raise ValueError(f"Not an ABS URI. Must start with prefix: {cls.PREFIX}")
261
+ return cls.strip_prefix(uri).split("@")[0]
262
+
263
+ @classmethod
264
+ def get_object_key(cls, uri: str) -> str:
265
+ if not cls.is_uri(uri):
266
+ raise ValueError(f"Not an ABS URI. Must start with prefix: {cls.PREFIX}")
267
+ parts = cls.strip_prefix(uri).split("@", 1)
268
+ if len(parts) < 2:
269
+ return ""
270
+ account_path = parts[1]
271
+ path_parts = account_path.split("/", 1)
272
+ if len(path_parts) < 2:
273
+ return ""
274
+ return path_parts[1]
275
+
276
+
277
+ # Registry of all object store implementations
278
+ OBJECT_STORE_REGISTRY: Dict[str, Type[ObjectStoreInterface]] = {
279
+ "s3": S3ObjectStore,
280
+ "gcs": GCSObjectStore,
281
+ "abs": ABSObjectStore,
282
+ }
283
+
284
+
285
+ def get_object_store_for_uri(uri: str) -> Optional[Type[ObjectStoreInterface]]:
286
+ """
287
+ Get the appropriate object store implementation for the given URI.
288
+
289
+ Args:
290
+ uri: The URI to get the object store for
291
+
292
+ Returns:
293
+ The object store implementation, or None if no matching implementation is found
294
+ """
295
+ for object_store in OBJECT_STORE_REGISTRY.values():
296
+ if object_store.is_uri(uri):
297
+ return object_store
298
+ return None
299
+
300
+
301
+ def get_object_store_bucket_name(uri: str) -> str:
302
+ """
303
+ Get the bucket name from any supported object store URI.
304
+
305
+ This function acts as a central dispatcher that:
306
+ 1. Identifies the appropriate object store implementation for the URI
307
+ 2. Uses that implementation to extract the bucket name
308
+ 3. Falls back to specific URI format parsing if needed
309
+
310
+ Args:
311
+ uri: The URI to get the bucket name from
312
+
313
+ Returns:
314
+ The bucket name
315
+
316
+ Raises:
317
+ ValueError: If the URI is not supported by any registered object store
318
+ """
319
+ # First try to find the native implementation for this URI
320
+ object_store = get_object_store_for_uri(uri)
321
+ if object_store:
322
+ return object_store.get_bucket_name(uri)
323
+
324
+ # If no native implementation, handle specific URI formats directly
325
+ if uri.startswith("gs://"):
326
+ return uri[5:].split("/")[0]
327
+ elif any(uri.startswith(prefix) for prefix in S3ObjectStore.PREFIXES):
328
+ prefix_length = next(
329
+ len(prefix) for prefix in S3ObjectStore.PREFIXES if uri.startswith(prefix)
330
+ )
331
+ return uri[prefix_length:].split("/")[0]
332
+ elif uri.startswith(ABSObjectStore.PREFIX):
333
+ return uri[len(ABSObjectStore.PREFIX) :].split("@")[0]
334
+
335
+ raise ValueError(f"Unsupported URI format: {uri}")
336
+
337
+
338
+ def get_object_key(uri: str) -> str:
339
+ """
340
+ Get the object key from any supported object store URI.
341
+
342
+ Args:
343
+ uri: The URI to get the object key from
344
+
345
+ Returns:
346
+ The object key
347
+
348
+ Raises:
349
+ ValueError: If the URI is not supported by any registered object store
350
+ """
351
+ object_store = get_object_store_for_uri(uri)
352
+ if object_store:
353
+ return object_store.get_object_key(uri)
354
+
355
+ raise ValueError(f"Unsupported URI format: {uri}")
356
+
357
+
358
+ class ObjectStoreSourceAdapter:
359
+ """
360
+ Adapter for customizing object store source implementations.
361
+
362
+ This class provides a way to customize source implementations for different
363
+ object stores (S3, GCS, etc.) without having to directly modify those classes.
364
+ Instead, adapters register customizations that are applied to the source instance.
365
+ """
366
+
367
+ @staticmethod
368
+ def create_s3_path(bucket_name: str, key: str) -> str:
369
+ """
370
+ Create a default S3 path.
371
+
372
+ Args:
373
+ bucket_name: The bucket name
374
+ key: The object key
375
+
376
+ Returns:
377
+ A properly formatted S3 URI
378
+ """
379
+ return unquote(f"s3://{bucket_name}/{key}")
380
+
381
+ @staticmethod
382
+ def create_gcs_path(bucket_name: str, key: str) -> str:
383
+ """
384
+ Create a default GCS path.
385
+
386
+ Args:
387
+ bucket_name: The bucket name
388
+ key: The object key
389
+
390
+ Returns:
391
+ A properly formatted GCS URI
392
+ """
393
+ return unquote(f"gs://{bucket_name}/{key}")
394
+
395
+ @staticmethod
396
+ def create_abs_path(container_name: str, key: str, account_name: str) -> str:
397
+ """
398
+ Create a default Azure Blob Storage path.
399
+
400
+ Args:
401
+ container_name: The container name
402
+ key: The object key
403
+ account_name: The storage account name
404
+
405
+ Returns:
406
+ A properly formatted ABS URI
407
+ """
408
+ return unquote(
409
+ f"abfss://{container_name}@{account_name}.dfs.core.windows.net/{key}"
410
+ )
411
+
412
+ @staticmethod
413
+ def get_s3_external_url(
414
+ table_data: Any, region: Optional[str] = None
415
+ ) -> Optional[str]:
416
+ """
417
+ Get the AWS S3 console URL for the given table.
418
+
419
+ Args:
420
+ table_data: Table data containing path information
421
+ region: AWS region for the S3 console URL, defaults to us-east-1 if not specified
422
+
423
+ Returns:
424
+ The AWS console URL, or None if not applicable
425
+ """
426
+ if not S3ObjectStore.is_uri(table_data.table_path):
427
+ return None
428
+
429
+ # Get the bucket name and key from the S3 URI
430
+ bucket_name = get_object_store_bucket_name(table_data.table_path)
431
+ key = get_object_key(table_data.table_path)
432
+
433
+ # Use the provided region or default to us-east-1
434
+ aws_region = region or "us-east-1"
435
+
436
+ return f"https://{aws_region}.console.aws.amazon.com/s3/buckets/{bucket_name}?prefix={key}"
437
+
438
+ @staticmethod
439
+ def get_gcs_external_url(table_data: Any) -> Optional[str]:
440
+ """
441
+ Get the GCS console URL for the given table.
442
+
443
+ Args:
444
+ table_data: Table data containing path information
445
+
446
+ Returns:
447
+ The GCS console URL, or None if not applicable
448
+ """
449
+ if not GCSObjectStore.is_uri(table_data.table_path):
450
+ return None
451
+
452
+ # Get the bucket name and key from the GCS URI
453
+ bucket_name = get_object_store_bucket_name(table_data.table_path)
454
+ key = get_object_key(table_data.table_path)
455
+
456
+ # Return the basic GCS console URL
457
+ return f"https://console.cloud.google.com/storage/browser/{bucket_name}/{key}"
458
+
459
+ @staticmethod
460
+ def get_abs_external_url(table_data: Any) -> Optional[str]:
461
+ """
462
+ Get the Azure Storage browser URL for the given table.
463
+
464
+ Args:
465
+ table_data: Table data containing path information
466
+
467
+ Returns:
468
+ The Azure Storage URL, or None if not applicable
469
+ """
470
+ if not ABSObjectStore.is_uri(table_data.table_path):
471
+ return None
472
+
473
+ # Parse the ABS URI
474
+ try:
475
+ # URI format: abfss://container@account.dfs.core.windows.net/path
476
+ path_without_prefix = ABSObjectStore.strip_prefix(table_data.table_path)
477
+ parts = path_without_prefix.split("@", 1)
478
+ if len(parts) < 2:
479
+ return None
480
+
481
+ container_name = parts[0]
482
+ account_parts = parts[1].split("/", 1)
483
+ account_domain = account_parts[0]
484
+ account_name = account_domain.split(".")[0]
485
+
486
+ # Construct Azure portal URL
487
+ return f"https://portal.azure.com/#blade/Microsoft_Azure_Storage/ContainerMenuBlade/overview/storageAccountId/{account_name}/containerName/{container_name}"
488
+ except Exception:
489
+ # If any parsing error occurs, return None
490
+ return None
491
+
492
+ def __init__(
493
+ self,
494
+ platform: str,
495
+ platform_name: str,
496
+ aws_region: Optional[str] = None,
497
+ azure_storage_account: Optional[str] = None,
498
+ ):
499
+ """
500
+ Initialize the adapter with platform-specific configurations.
501
+
502
+ Args:
503
+ platform: The platform identifier (e.g., "s3", "gcs", "abs")
504
+ platform_name: The human-readable platform name
505
+ aws_region: AWS region for S3 URLs, defaults to us-east-1 if not specified
506
+ azure_storage_account: Azure Storage account name
507
+ """
508
+ self.platform = platform
509
+ self.platform_name = platform_name
510
+ self.aws_region = aws_region
511
+ self.azure_storage_account = azure_storage_account
512
+ self.customizations: Dict[str, Callable[..., Any]] = {}
513
+
514
+ # Register default customizations based on platform
515
+ if platform == "gcs":
516
+ self.register_customization("is_s3_platform", lambda: True)
517
+ self.register_customization("create_s3_path", self.create_gcs_path)
518
+ self.register_customization(
519
+ "get_external_url",
520
+ lambda table_data: self.get_gcs_external_url(table_data),
521
+ )
522
+ elif platform == "s3":
523
+ self.register_customization("is_s3_platform", lambda: True)
524
+ self.register_customization("create_s3_path", self.create_s3_path)
525
+ self.register_customization(
526
+ "get_external_url",
527
+ lambda table_data: self.get_s3_external_url(
528
+ table_data, self.aws_region
529
+ ),
530
+ )
531
+ elif platform == "abs":
532
+ self.register_customization("is_s3_platform", lambda: True)
533
+ # If we have an Azure storage account, create a specialized path creation function
534
+ if self.azure_storage_account:
535
+ storage_account = (
536
+ self.azure_storage_account
537
+ ) # Create a local non-optional variable
538
+ self.register_customization(
539
+ "create_s3_path",
540
+ lambda bucket, key: self.create_abs_path(
541
+ bucket, key, storage_account
542
+ ),
543
+ )
544
+ else:
545
+ # Fall back to a simpler implementation if no account provided
546
+ self.register_customization(
547
+ "create_s3_path", lambda bucket, key: f"abfss://{bucket}@{key}"
548
+ )
549
+ self.register_customization("get_external_url", self.get_abs_external_url)
550
+
551
+ def register_customization(
552
+ self, method_name: str, implementation: Callable[..., Any]
553
+ ) -> None:
554
+ """
555
+ Register a customization for a specific method.
556
+
557
+ Args:
558
+ method_name: The name of the method to customize
559
+ implementation: The implementation to use
560
+ """
561
+ self.customizations[method_name] = implementation
562
+
563
+ def apply_customizations(self, source: Any) -> Any:
564
+ """
565
+ Apply all registered customizations to the source instance.
566
+
567
+ Args:
568
+ source: The source instance to customize
569
+
570
+ Returns:
571
+ The customized source instance
572
+ """
573
+ # Set the platform
574
+ if hasattr(source, "source_config") and hasattr(
575
+ source.source_config, "platform"
576
+ ):
577
+ source.source_config.platform = self.platform
578
+
579
+ # Apply method customizations
580
+ for method_name, implementation in self.customizations.items():
581
+ # For instance methods that use self, we need to bind them to the source
582
+ if (
583
+ hasattr(implementation, "__self__")
584
+ and implementation.__self__ is not None
585
+ ):
586
+ # This is already a bound method, use __get__ to rebind it to the source
587
+ setattr(source, method_name, implementation.__get__(source))
588
+ else:
589
+ # This is a regular function or static/class method
590
+ setattr(source, method_name, implementation)
591
+
592
+ return source
593
+
594
+ # Add a direct method for tests that may call this directly
595
+ def get_external_url(self, table_data: Any) -> Optional[str]:
596
+ """
597
+ Get the external URL for a table based on the platform type.
598
+
599
+ This method routes to the appropriate implementation based on the platform.
600
+
601
+ Args:
602
+ table_data: Table data containing path information
603
+
604
+ Returns:
605
+ An external URL or None if not applicable
606
+ """
607
+ if self.platform == "s3":
608
+ return self.get_s3_external_url(table_data, self.aws_region)
609
+ elif self.platform == "gcs":
610
+ return self.get_gcs_external_url(table_data)
611
+ elif self.platform == "abs":
612
+ return self.get_abs_external_url(table_data)
613
+ return None
614
+
615
+
616
+ # Factory function to create an adapter for a specific platform
617
+ def create_object_store_adapter(
618
+ platform: str,
619
+ aws_region: Optional[str] = None,
620
+ azure_storage_account: Optional[str] = None,
621
+ ) -> ObjectStoreSourceAdapter:
622
+ """
623
+ Create an adapter for a specific object store platform.
624
+
625
+ Args:
626
+ platform: The platform identifier (e.g., "s3", "gcs", "abs")
627
+ aws_region: AWS region for S3 URLs, defaults to us-east-1 if not specified
628
+ azure_storage_account: Azure Storage account name
629
+
630
+ Returns:
631
+ An adapter configured for the specified platform
632
+ """
633
+ platform_names = {
634
+ "s3": "Amazon S3",
635
+ "gcs": "Google Cloud Storage",
636
+ "abs": "Azure Blob Storage",
637
+ }
638
+
639
+ return ObjectStoreSourceAdapter(
640
+ platform=platform,
641
+ platform_name=platform_names.get(platform, f"Unknown ({platform})"),
642
+ aws_region=aws_region,
643
+ azure_storage_account=azure_storage_account,
644
+ )
@@ -1,6 +1,5 @@
1
1
  import logging
2
2
  from typing import Dict, Iterable, List, Optional
3
- from urllib.parse import unquote
4
3
 
5
4
  from pydantic import Field, SecretStr, validator
6
5
 
@@ -19,6 +18,9 @@ from datahub.ingestion.api.workunit import MetadataWorkUnit
19
18
  from datahub.ingestion.source.aws.aws_common import AwsConnectionConfig
20
19
  from datahub.ingestion.source.data_lake_common.config import PathSpecsConfigMixin
21
20
  from datahub.ingestion.source.data_lake_common.data_lake_utils import PLATFORM_GCS
21
+ from datahub.ingestion.source.data_lake_common.object_store import (
22
+ create_object_store_adapter,
23
+ )
22
24
  from datahub.ingestion.source.data_lake_common.path_spec import PathSpec, is_gcs_uri
23
25
  from datahub.ingestion.source.s3.config import DataLakeSourceConfig
24
26
  from datahub.ingestion.source.s3.report import DataLakeSourceReport
@@ -136,16 +138,29 @@ class GCSSource(StatefulIngestionSourceBase):
136
138
 
137
139
  def create_equivalent_s3_source(self, ctx: PipelineContext) -> S3Source:
138
140
  config = self.create_equivalent_s3_config()
139
- return self.s3_source_overrides(S3Source(config, PipelineContext(ctx.run_id)))
141
+ s3_source = S3Source(config, PipelineContext(ctx.run_id))
142
+ return self.s3_source_overrides(s3_source)
140
143
 
141
144
  def s3_source_overrides(self, source: S3Source) -> S3Source:
142
- source.source_config.platform = PLATFORM_GCS
145
+ """
146
+ Override S3Source methods with GCS-specific implementations using the adapter pattern.
147
+
148
+ This method customizes the S3Source instance to behave like a GCS source by
149
+ applying the GCS-specific adapter that replaces the necessary functionality.
143
150
 
144
- source.is_s3_platform = lambda: True # type: ignore
145
- source.create_s3_path = lambda bucket_name, key: unquote( # type: ignore
146
- f"s3://{bucket_name}/{key}"
151
+ Args:
152
+ source: The S3Source instance to customize
153
+
154
+ Returns:
155
+ The modified S3Source instance with GCS behavior
156
+ """
157
+ # Create a GCS adapter with project ID and region from our config
158
+ adapter = create_object_store_adapter(
159
+ "gcs",
147
160
  )
148
- return source
161
+
162
+ # Apply all customizations to the source
163
+ return adapter.apply_customizations(source)
149
164
 
150
165
  def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
151
166
  return [