airbyte-source-azure-blob-storage 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of airbyte-source-azure-blob-storage might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-source-azure-blob-storage
3
- Version: 0.4.0
3
+ Version: 0.4.2
4
4
  Summary: Source implementation for Azure Blob Storage.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -0,0 +1,10 @@
1
+ source_azure_blob_storage/__init__.py,sha256=Wx4PzvHg900-c2CpOOP1Wk0zcJpNVqJrkMnPtDcuaQM,319
2
+ source_azure_blob_storage/config_migrations.py,sha256=NPDwQTWaR2IDOBTEm3Np-S0Gz1phRWbxK-Du8hE2icg,4328
3
+ source_azure_blob_storage/run.py,sha256=muonwvccXCFHBX-MTKFJXgvUGmPWmcrDLSDXhY218YU,2009
4
+ source_azure_blob_storage/source.py,sha256=oExhGYkZh8irsFGBBcCnC2l_TDZ1k-qbRLe1Xd41kjs,2481
5
+ source_azure_blob_storage/spec.py,sha256=DqnKiB_y9PUIQRVzU8bozsdyPIkeJuHCcbMwf-MVtmA,3969
6
+ source_azure_blob_storage/stream_reader.py,sha256=0s9hEeus4Bs1rT4I2fetXEcBwAuTKpt0kWCT3qzz4O8,4629
7
+ airbyte_source_azure_blob_storage-0.4.2.dist-info/METADATA,sha256=-csV1t2RgVs966GYBAw7-zMc4rKf8xli5HV10v2BKKs,6246
8
+ airbyte_source_azure_blob_storage-0.4.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
9
+ airbyte_source_azure_blob_storage-0.4.2.dist-info/entry_points.txt,sha256=75v_DA_Xu0qr0eqtEXyh8sPCqcL9eXKWY8UwdST3ANE,79
10
+ airbyte_source_azure_blob_storage-0.4.2.dist-info/RECORD,,
@@ -3,8 +3,8 @@
3
3
  #
4
4
 
5
5
 
6
- from .config import Config
7
6
  from .source import SourceAzureBlobStorage
7
+ from .spec import SourceAzureBlobStorageSpec
8
8
  from .stream_reader import SourceAzureBlobStorageStreamReader
9
9
 
10
- __all__ = ["SourceAzureBlobStorage", "SourceAzureBlobStorageStreamReader", "Config"]
10
+ __all__ = ["SourceAzureBlobStorage", "SourceAzureBlobStorageStreamReader", "SourceAzureBlobStorageSpec"]
@@ -4,6 +4,7 @@
4
4
 
5
5
 
6
6
  import logging
7
+ from abc import ABC, abstractmethod
7
8
  from typing import Any, List, Mapping
8
9
 
9
10
  from airbyte_cdk.config_observation import create_connector_config_control_message
@@ -13,22 +14,16 @@ from airbyte_cdk.sources import Source
13
14
  logger = logging.getLogger("airbyte_logger")
14
15
 
15
16
 
16
- class MigrateCredentials:
17
- """
18
- This class stands for migrating the config azure_blob_storage_account_key inside object `credentials`
19
- """
20
-
17
+ class MigrateConfig(ABC):
21
18
  @classmethod
19
+ @abstractmethod
22
20
  def should_migrate(cls, config: Mapping[str, Any]) -> bool:
23
- return "credentials" not in config
21
+ ...
24
22
 
25
23
  @classmethod
26
- def set_azure_blob_storage_account_key(cls, config: Mapping[str, Any]) -> Mapping[str, Any]:
27
- config["credentials"] = {
28
- "auth_type": "storage_account_key",
29
- "azure_blob_storage_account_key": config.pop("azure_blob_storage_account_key"),
30
- }
31
- return config
24
+ @abstractmethod
25
+ def migrate_config(cls, config: Mapping[str, Any]) -> Mapping[str, Any]:
26
+ ...
32
27
 
33
28
  @classmethod
34
29
  def modify_and_save(cls, config_path: str, source: Source, config: Mapping[str, Any]) -> Mapping[str, Any]:
@@ -43,7 +38,7 @@ class MigrateCredentials:
43
38
  Returns:
44
39
  - Mapping[str, Any]: The updated configuration.
45
40
  """
46
- migrated_config = cls.set_azure_blob_storage_account_key(config)
41
+ migrated_config = cls.migrate_config(config)
47
42
  source.write_config(migrated_config, config_path)
48
43
  return migrated_config
49
44
 
@@ -75,3 +70,50 @@ class MigrateCredentials:
75
70
  config = source.read_config(config_path)
76
71
  if cls.should_migrate(config):
77
72
  cls.emit_control_message(cls.modify_and_save(config_path, source, config))
73
+
74
+
75
+ class MigrateLegacyConfig(MigrateConfig):
76
+ """
77
+ Class that takes in Azure Blob Storage source configs in the legacy format and transforms them into
78
+ configs that can be used by the new Azure Blob Storage source built with the file-based CDK.
79
+ """
80
+
81
+ @classmethod
82
+ def should_migrate(cls, config: Mapping[str, Any]) -> bool:
83
+ return "streams" not in config
84
+
85
+ @classmethod
86
+ def migrate_config(cls, legacy_config: Mapping[str, Any]) -> Mapping[str, Any]:
87
+ azure_blob_storage_blobs_prefix = legacy_config.get("azure_blob_storage_blobs_prefix", "")
88
+ return {
89
+ "azure_blob_storage_endpoint": legacy_config.get("azure_blob_storage_endpoint", None),
90
+ "azure_blob_storage_account_name": legacy_config["azure_blob_storage_account_name"],
91
+ "azure_blob_storage_account_key": legacy_config["azure_blob_storage_account_key"],
92
+ "azure_blob_storage_container_name": legacy_config["azure_blob_storage_container_name"],
93
+ "streams": [
94
+ {
95
+ "name": legacy_config["azure_blob_storage_container_name"],
96
+ "legacy_prefix": azure_blob_storage_blobs_prefix,
97
+ "validation_policy": "Emit Record",
98
+ "format": {"filetype": "jsonl"},
99
+ }
100
+ ],
101
+ }
102
+
103
+
104
+ class MigrateCredentials(MigrateConfig):
105
+ """
106
+ This class stands for migrating the config azure_blob_storage_account_key inside object `credentials`
107
+ """
108
+
109
+ @classmethod
110
+ def should_migrate(cls, config: Mapping[str, Any]) -> bool:
111
+ return "credentials" not in config
112
+
113
+ @classmethod
114
+ def migrate_config(cls, config: Mapping[str, Any]) -> Mapping[str, Any]:
115
+ config["credentials"] = {
116
+ "auth_type": "storage_account_key",
117
+ "azure_blob_storage_account_key": config.pop("azure_blob_storage_account_key"),
118
+ }
119
+ return config
@@ -9,8 +9,8 @@ from datetime import datetime
9
9
  from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch
10
10
  from airbyte_cdk.models import AirbyteErrorTraceMessage, AirbyteMessage, AirbyteTraceMessage, TraceType, Type
11
11
  from airbyte_cdk.sources.file_based.stream.cursor import DefaultFileBasedCursor
12
- from source_azure_blob_storage import Config, SourceAzureBlobStorage, SourceAzureBlobStorageStreamReader
13
- from source_azure_blob_storage.config_migrations import MigrateCredentials
12
+ from source_azure_blob_storage import SourceAzureBlobStorage, SourceAzureBlobStorageSpec, SourceAzureBlobStorageStreamReader
13
+ from source_azure_blob_storage.config_migrations import MigrateCredentials, MigrateLegacyConfig
14
14
 
15
15
 
16
16
  def run():
@@ -21,12 +21,13 @@ def run():
21
21
  try:
22
22
  source = SourceAzureBlobStorage(
23
23
  SourceAzureBlobStorageStreamReader(),
24
- Config,
24
+ SourceAzureBlobStorageSpec,
25
25
  SourceAzureBlobStorage.read_catalog(catalog_path) if catalog_path else None,
26
26
  SourceAzureBlobStorage.read_config(config_path) if catalog_path else None,
27
27
  SourceAzureBlobStorage.read_state(state_path) if catalog_path else None,
28
28
  cursor_cls=DefaultFileBasedCursor,
29
29
  )
30
+ MigrateLegacyConfig.migrate(sys.argv[1:], source)
30
31
  MigrateCredentials.migrate(sys.argv[1:], source)
31
32
  except Exception:
32
33
  print(
@@ -2,35 +2,14 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- from typing import Any, Mapping
5
+ from typing import Any
6
6
 
7
- from airbyte_cdk.config_observation import emit_configuration_as_airbyte_control_message
8
7
  from airbyte_cdk.sources.declarative.models import OAuthConfigSpecification
9
8
  from airbyte_cdk.sources.file_based.file_based_source import FileBasedSource
10
9
  from airbyte_protocol.models import AdvancedAuth, ConnectorSpecification
11
10
 
12
- from .legacy_config_transformer import LegacyConfigTransformer
13
-
14
11
 
15
12
  class SourceAzureBlobStorage(FileBasedSource):
16
- @classmethod
17
- def read_config(cls, config_path: str) -> Mapping[str, Any]:
18
- """
19
- Used to override the default read_config so that when the new file-based Azure Blob Storage connector processes a config
20
- in the legacy format, it can be transformed into the new config. This happens in entrypoint before we
21
- validate the config against the new spec.
22
- """
23
- config = FileBasedSource.read_config(config_path)
24
- if not cls._is_v1_config(config):
25
- converted_config = LegacyConfigTransformer.convert(config)
26
- emit_configuration_as_airbyte_control_message(converted_config)
27
- return converted_config
28
- return config
29
-
30
- @staticmethod
31
- def _is_v1_config(config: Mapping[str, Any]) -> bool:
32
- return "streams" in config
33
-
34
13
  def spec(self, *args: Any, **kwargs: Any) -> ConnectorSpecification:
35
14
  """
36
15
  Returns the specification describing what fields can be configured by a user when setting up a file-based source.
@@ -49,7 +49,7 @@ class StorageAccountKey(BaseModel):
49
49
  )
50
50
 
51
51
 
52
- class Config(AbstractFileBasedSpec):
52
+ class SourceAzureBlobStorageSpec(AbstractFileBasedSpec):
53
53
  """
54
54
  NOTE: When this Spec is changed, legacy_config_transformer.py must also be modified to uptake the changes
55
55
  because it is responsible for converting legacy Azure Blob Storage v0 configs into v1 configs using the File-Based CDK.
@@ -59,17 +59,17 @@ class Config(AbstractFileBasedSpec):
59
59
  def documentation_url(cls) -> AnyUrl:
60
60
  return AnyUrl("https://docs.airbyte.com/integrations/sources/azure-blob-storage", scheme="https")
61
61
 
62
- azure_blob_storage_account_name: str = Field(
63
- title="Azure Blob Storage account name",
64
- description="The account's name of the Azure Blob Storage.",
65
- examples=["airbyte5storage"],
66
- order=2,
67
- )
68
62
  credentials: Union[Oauth2, StorageAccountKey] = Field(
69
63
  title="Authentication",
70
64
  description="Credentials for connecting to the Azure Blob Storage",
71
65
  discriminator="auth_type",
72
66
  type="object",
67
+ order=2,
68
+ )
69
+ azure_blob_storage_account_name: str = Field(
70
+ title="Azure Blob Storage account name",
71
+ description="The account's name of the Azure Blob Storage.",
72
+ examples=["airbyte5storage"],
73
73
  order=3,
74
74
  )
75
75
  azure_blob_storage_container_name: str = Field(
@@ -8,11 +8,14 @@ import pytz
8
8
  from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
9
9
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
10
10
  from airbyte_cdk.sources.streams.http.requests_native_auth import Oauth2Authenticator
11
+ from airbyte_cdk.utils import AirbyteTracedException
12
+ from airbyte_protocol.models import FailureType
11
13
  from azure.core.credentials import AccessToken
14
+ from azure.core.exceptions import ResourceNotFoundError
12
15
  from azure.storage.blob import BlobServiceClient, ContainerClient
13
16
  from smart_open import open
14
17
 
15
- from .config import Config
18
+ from .spec import SourceAzureBlobStorageSpec
16
19
 
17
20
 
18
21
  class AzureOauth2Authenticator(Oauth2Authenticator):
@@ -35,11 +38,11 @@ class SourceAzureBlobStorageStreamReader(AbstractFileBasedStreamReader):
35
38
  self._config = None
36
39
 
37
40
  @property
38
- def config(self) -> Config:
41
+ def config(self) -> SourceAzureBlobStorageSpec:
39
42
  return self._config
40
43
 
41
44
  @config.setter
42
- def config(self, value: Config) -> None:
45
+ def config(self, value: SourceAzureBlobStorageSpec) -> None:
43
46
  self._config = value
44
47
 
45
48
  @property
@@ -80,11 +83,13 @@ class SourceAzureBlobStorageStreamReader(AbstractFileBasedStreamReader):
80
83
  ) -> Iterable[RemoteFile]:
81
84
  prefixes = [prefix] if prefix else self.get_prefixes_from_globs(globs)
82
85
  prefixes = prefixes or [None]
83
- for prefix in prefixes:
84
- for blob in self.azure_container_client.list_blobs(name_starts_with=prefix):
85
- remote_file = RemoteFile(uri=blob.name, last_modified=blob.last_modified.astimezone(pytz.utc).replace(tzinfo=None))
86
- if not globs or self.file_matches_globs(remote_file, globs):
87
- yield remote_file
86
+ try:
87
+ for prefix in prefixes:
88
+ for blob in self.azure_container_client.list_blobs(name_starts_with=prefix):
89
+ remote_file = RemoteFile(uri=blob.name, last_modified=blob.last_modified.astimezone(pytz.utc).replace(tzinfo=None))
90
+ yield from self.filter_files_by_globs_and_start_date([remote_file], globs)
91
+ except ResourceNotFoundError as e:
92
+ raise AirbyteTracedException(failure_type=FailureType.config_error, internal_message=e.message, message=e.reason or e.message)
88
93
 
89
94
  def open_file(self, file: RemoteFile, mode: FileReadMode, encoding: Optional[str], logger: logging.Logger) -> IOBase:
90
95
  try:
@@ -1,11 +0,0 @@
1
- source_azure_blob_storage/__init__.py,sha256=dUrGWNqeIfK3e4yI6dgpIzSi8d03kItsUWCHQcmxXlg,281
2
- source_azure_blob_storage/config.py,sha256=w7hEWeY8cnNJ_jtAr-ZzkhyZ7pAxI6sQLUi2BG75jGQ,3949
3
- source_azure_blob_storage/config_migrations.py,sha256=_Dq-N4T6xTL2HpDmYIr-5K5CKCudUmEjA3x9wUdcHMg,2712
4
- source_azure_blob_storage/legacy_config_transformer.py,sha256=yiS4GgjaVvJtedmweAkq-eEdPG0RWWAAr0T9UV4RmOQ,1306
5
- source_azure_blob_storage/run.py,sha256=hUxQJCBSS_pzGzSE92Fum5C9EY7cU4yP3u-KefiC9cI,1890
6
- source_azure_blob_storage/source.py,sha256=lHyvHaEghSgxs1zK67o_x6viSVZ3oZOJca2Y-owI9J8,3456
7
- source_azure_blob_storage/stream_reader.py,sha256=YpPYv5l2eLRtxTNTLGQ6LCOHtwDdmC9TRLBt9M7ZGTU,4227
8
- airbyte_source_azure_blob_storage-0.4.0.dist-info/METADATA,sha256=IMQQw4bC4D6p4FGYRUPV-DstwWu2_uic9YJoMPn9P7Y,6246
9
- airbyte_source_azure_blob_storage-0.4.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
10
- airbyte_source_azure_blob_storage-0.4.0.dist-info/entry_points.txt,sha256=75v_DA_Xu0qr0eqtEXyh8sPCqcL9eXKWY8UwdST3ANE,79
11
- airbyte_source_azure_blob_storage-0.4.0.dist-info/RECORD,,
@@ -1,31 +0,0 @@
1
- #
2
- # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
- #
4
-
5
- from typing import Any, Mapping, MutableMapping
6
-
7
-
8
- class LegacyConfigTransformer:
9
- """
10
- Class that takes in Azure Blob Storage source configs in the legacy format and transforms them into
11
- configs that can be used by the new Azure Blob Storage source built with the file-based CDK.
12
- """
13
-
14
- @classmethod
15
- def convert(cls, legacy_config: Mapping) -> MutableMapping[str, Any]:
16
- azure_blob_storage_blobs_prefix = legacy_config.get("azure_blob_storage_blobs_prefix", "")
17
-
18
- return {
19
- "azure_blob_storage_endpoint": legacy_config.get("azure_blob_storage_endpoint", None),
20
- "azure_blob_storage_account_name": legacy_config["azure_blob_storage_account_name"],
21
- "azure_blob_storage_account_key": legacy_config["azure_blob_storage_account_key"],
22
- "azure_blob_storage_container_name": legacy_config["azure_blob_storage_container_name"],
23
- "streams": [
24
- {
25
- "name": legacy_config["azure_blob_storage_container_name"],
26
- "legacy_prefix": azure_blob_storage_blobs_prefix,
27
- "validation_policy": "Emit Record",
28
- "format": {"filetype": "jsonl"},
29
- }
30
- ],
31
- }