airbyte-source-azure-blob-storage 0.4.0__tar.gz → 0.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of airbyte-source-azure-blob-storage might be problematic. Click here for more details.
- {airbyte_source_azure_blob_storage-0.4.0 → airbyte_source_azure_blob_storage-0.4.2}/PKG-INFO +1 -1
- {airbyte_source_azure_blob_storage-0.4.0 → airbyte_source_azure_blob_storage-0.4.2}/pyproject.toml +2 -1
- {airbyte_source_azure_blob_storage-0.4.0 → airbyte_source_azure_blob_storage-0.4.2}/source_azure_blob_storage/__init__.py +2 -2
- {airbyte_source_azure_blob_storage-0.4.0 → airbyte_source_azure_blob_storage-0.4.2}/source_azure_blob_storage/config_migrations.py +55 -13
- {airbyte_source_azure_blob_storage-0.4.0 → airbyte_source_azure_blob_storage-0.4.2}/source_azure_blob_storage/run.py +4 -3
- {airbyte_source_azure_blob_storage-0.4.0 → airbyte_source_azure_blob_storage-0.4.2}/source_azure_blob_storage/source.py +1 -22
- airbyte_source_azure_blob_storage-0.4.0/source_azure_blob_storage/config.py → airbyte_source_azure_blob_storage-0.4.2/source_azure_blob_storage/spec.py +7 -7
- {airbyte_source_azure_blob_storage-0.4.0 → airbyte_source_azure_blob_storage-0.4.2}/source_azure_blob_storage/stream_reader.py +13 -8
- airbyte_source_azure_blob_storage-0.4.0/source_azure_blob_storage/legacy_config_transformer.py +0 -31
- {airbyte_source_azure_blob_storage-0.4.0 → airbyte_source_azure_blob_storage-0.4.2}/README.md +0 -0
{airbyte_source_azure_blob_storage-0.4.0 → airbyte_source_azure_blob_storage-0.4.2}/pyproject.toml
RENAMED
|
@@ -5,7 +5,7 @@ requires = [
|
|
|
5
5
|
build-backend = "poetry.core.masonry.api"
|
|
6
6
|
|
|
7
7
|
[tool.poetry]
|
|
8
|
-
version = "0.4.
|
|
8
|
+
version = "0.4.2"
|
|
9
9
|
name = "airbyte-source-azure-blob-storage"
|
|
10
10
|
description = "Source implementation for Azure Blob Storage."
|
|
11
11
|
authors = [
|
|
@@ -41,6 +41,7 @@ source-azure-blob-storage = "source_azure_blob_storage.run:run"
|
|
|
41
41
|
|
|
42
42
|
[tool.poetry.group.dev.dependencies]
|
|
43
43
|
docker = "^7.0.0"
|
|
44
|
+
freezegun = "^1.4.0"
|
|
44
45
|
pytest-mock = "^3.6.1"
|
|
45
46
|
requests-mock = "^1.9.3"
|
|
46
47
|
pandas = "2.2.1"
|
|
@@ -3,8 +3,8 @@
|
|
|
3
3
|
#
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
from .config import Config
|
|
7
6
|
from .source import SourceAzureBlobStorage
|
|
7
|
+
from .spec import SourceAzureBlobStorageSpec
|
|
8
8
|
from .stream_reader import SourceAzureBlobStorageStreamReader
|
|
9
9
|
|
|
10
|
-
__all__ = ["SourceAzureBlobStorage", "SourceAzureBlobStorageStreamReader", "
|
|
10
|
+
__all__ = ["SourceAzureBlobStorage", "SourceAzureBlobStorageStreamReader", "SourceAzureBlobStorageSpec"]
|
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
import logging
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
7
8
|
from typing import Any, List, Mapping
|
|
8
9
|
|
|
9
10
|
from airbyte_cdk.config_observation import create_connector_config_control_message
|
|
@@ -13,22 +14,16 @@ from airbyte_cdk.sources import Source
|
|
|
13
14
|
logger = logging.getLogger("airbyte_logger")
|
|
14
15
|
|
|
15
16
|
|
|
16
|
-
class
|
|
17
|
-
"""
|
|
18
|
-
This class stands for migrating the config azure_blob_storage_account_key inside object `credentials`
|
|
19
|
-
"""
|
|
20
|
-
|
|
17
|
+
class MigrateConfig(ABC):
|
|
21
18
|
@classmethod
|
|
19
|
+
@abstractmethod
|
|
22
20
|
def should_migrate(cls, config: Mapping[str, Any]) -> bool:
|
|
23
|
-
|
|
21
|
+
...
|
|
24
22
|
|
|
25
23
|
@classmethod
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
"azure_blob_storage_account_key": config.pop("azure_blob_storage_account_key"),
|
|
30
|
-
}
|
|
31
|
-
return config
|
|
24
|
+
@abstractmethod
|
|
25
|
+
def migrate_config(cls, config: Mapping[str, Any]) -> Mapping[str, Any]:
|
|
26
|
+
...
|
|
32
27
|
|
|
33
28
|
@classmethod
|
|
34
29
|
def modify_and_save(cls, config_path: str, source: Source, config: Mapping[str, Any]) -> Mapping[str, Any]:
|
|
@@ -43,7 +38,7 @@ class MigrateCredentials:
|
|
|
43
38
|
Returns:
|
|
44
39
|
- Mapping[str, Any]: The updated configuration.
|
|
45
40
|
"""
|
|
46
|
-
migrated_config = cls.
|
|
41
|
+
migrated_config = cls.migrate_config(config)
|
|
47
42
|
source.write_config(migrated_config, config_path)
|
|
48
43
|
return migrated_config
|
|
49
44
|
|
|
@@ -75,3 +70,50 @@ class MigrateCredentials:
|
|
|
75
70
|
config = source.read_config(config_path)
|
|
76
71
|
if cls.should_migrate(config):
|
|
77
72
|
cls.emit_control_message(cls.modify_and_save(config_path, source, config))
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class MigrateLegacyConfig(MigrateConfig):
|
|
76
|
+
"""
|
|
77
|
+
Class that takes in Azure Blob Storage source configs in the legacy format and transforms them into
|
|
78
|
+
configs that can be used by the new Azure Blob Storage source built with the file-based CDK.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
@classmethod
|
|
82
|
+
def should_migrate(cls, config: Mapping[str, Any]) -> bool:
|
|
83
|
+
return "streams" not in config
|
|
84
|
+
|
|
85
|
+
@classmethod
|
|
86
|
+
def migrate_config(cls, legacy_config: Mapping[str, Any]) -> Mapping[str, Any]:
|
|
87
|
+
azure_blob_storage_blobs_prefix = legacy_config.get("azure_blob_storage_blobs_prefix", "")
|
|
88
|
+
return {
|
|
89
|
+
"azure_blob_storage_endpoint": legacy_config.get("azure_blob_storage_endpoint", None),
|
|
90
|
+
"azure_blob_storage_account_name": legacy_config["azure_blob_storage_account_name"],
|
|
91
|
+
"azure_blob_storage_account_key": legacy_config["azure_blob_storage_account_key"],
|
|
92
|
+
"azure_blob_storage_container_name": legacy_config["azure_blob_storage_container_name"],
|
|
93
|
+
"streams": [
|
|
94
|
+
{
|
|
95
|
+
"name": legacy_config["azure_blob_storage_container_name"],
|
|
96
|
+
"legacy_prefix": azure_blob_storage_blobs_prefix,
|
|
97
|
+
"validation_policy": "Emit Record",
|
|
98
|
+
"format": {"filetype": "jsonl"},
|
|
99
|
+
}
|
|
100
|
+
],
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class MigrateCredentials(MigrateConfig):
|
|
105
|
+
"""
|
|
106
|
+
This class stands for migrating the config azure_blob_storage_account_key inside object `credentials`
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
@classmethod
|
|
110
|
+
def should_migrate(cls, config: Mapping[str, Any]) -> bool:
|
|
111
|
+
return "credentials" not in config
|
|
112
|
+
|
|
113
|
+
@classmethod
|
|
114
|
+
def migrate_config(cls, config: Mapping[str, Any]) -> Mapping[str, Any]:
|
|
115
|
+
config["credentials"] = {
|
|
116
|
+
"auth_type": "storage_account_key",
|
|
117
|
+
"azure_blob_storage_account_key": config.pop("azure_blob_storage_account_key"),
|
|
118
|
+
}
|
|
119
|
+
return config
|
|
@@ -9,8 +9,8 @@ from datetime import datetime
|
|
|
9
9
|
from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch
|
|
10
10
|
from airbyte_cdk.models import AirbyteErrorTraceMessage, AirbyteMessage, AirbyteTraceMessage, TraceType, Type
|
|
11
11
|
from airbyte_cdk.sources.file_based.stream.cursor import DefaultFileBasedCursor
|
|
12
|
-
from source_azure_blob_storage import
|
|
13
|
-
from source_azure_blob_storage.config_migrations import MigrateCredentials
|
|
12
|
+
from source_azure_blob_storage import SourceAzureBlobStorage, SourceAzureBlobStorageSpec, SourceAzureBlobStorageStreamReader
|
|
13
|
+
from source_azure_blob_storage.config_migrations import MigrateCredentials, MigrateLegacyConfig
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
def run():
|
|
@@ -21,12 +21,13 @@ def run():
|
|
|
21
21
|
try:
|
|
22
22
|
source = SourceAzureBlobStorage(
|
|
23
23
|
SourceAzureBlobStorageStreamReader(),
|
|
24
|
-
|
|
24
|
+
SourceAzureBlobStorageSpec,
|
|
25
25
|
SourceAzureBlobStorage.read_catalog(catalog_path) if catalog_path else None,
|
|
26
26
|
SourceAzureBlobStorage.read_config(config_path) if catalog_path else None,
|
|
27
27
|
SourceAzureBlobStorage.read_state(state_path) if catalog_path else None,
|
|
28
28
|
cursor_cls=DefaultFileBasedCursor,
|
|
29
29
|
)
|
|
30
|
+
MigrateLegacyConfig.migrate(sys.argv[1:], source)
|
|
30
31
|
MigrateCredentials.migrate(sys.argv[1:], source)
|
|
31
32
|
except Exception:
|
|
32
33
|
print(
|
|
@@ -2,35 +2,14 @@
|
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
3
|
#
|
|
4
4
|
|
|
5
|
-
from typing import Any
|
|
5
|
+
from typing import Any
|
|
6
6
|
|
|
7
|
-
from airbyte_cdk.config_observation import emit_configuration_as_airbyte_control_message
|
|
8
7
|
from airbyte_cdk.sources.declarative.models import OAuthConfigSpecification
|
|
9
8
|
from airbyte_cdk.sources.file_based.file_based_source import FileBasedSource
|
|
10
9
|
from airbyte_protocol.models import AdvancedAuth, ConnectorSpecification
|
|
11
10
|
|
|
12
|
-
from .legacy_config_transformer import LegacyConfigTransformer
|
|
13
|
-
|
|
14
11
|
|
|
15
12
|
class SourceAzureBlobStorage(FileBasedSource):
|
|
16
|
-
@classmethod
|
|
17
|
-
def read_config(cls, config_path: str) -> Mapping[str, Any]:
|
|
18
|
-
"""
|
|
19
|
-
Used to override the default read_config so that when the new file-based Azure Blob Storage connector processes a config
|
|
20
|
-
in the legacy format, it can be transformed into the new config. This happens in entrypoint before we
|
|
21
|
-
validate the config against the new spec.
|
|
22
|
-
"""
|
|
23
|
-
config = FileBasedSource.read_config(config_path)
|
|
24
|
-
if not cls._is_v1_config(config):
|
|
25
|
-
converted_config = LegacyConfigTransformer.convert(config)
|
|
26
|
-
emit_configuration_as_airbyte_control_message(converted_config)
|
|
27
|
-
return converted_config
|
|
28
|
-
return config
|
|
29
|
-
|
|
30
|
-
@staticmethod
|
|
31
|
-
def _is_v1_config(config: Mapping[str, Any]) -> bool:
|
|
32
|
-
return "streams" in config
|
|
33
|
-
|
|
34
13
|
def spec(self, *args: Any, **kwargs: Any) -> ConnectorSpecification:
|
|
35
14
|
"""
|
|
36
15
|
Returns the specification describing what fields can be configured by a user when setting up a file-based source.
|
|
@@ -49,7 +49,7 @@ class StorageAccountKey(BaseModel):
|
|
|
49
49
|
)
|
|
50
50
|
|
|
51
51
|
|
|
52
|
-
class
|
|
52
|
+
class SourceAzureBlobStorageSpec(AbstractFileBasedSpec):
|
|
53
53
|
"""
|
|
54
54
|
NOTE: When this Spec is changed, legacy_config_transformer.py must also be modified to uptake the changes
|
|
55
55
|
because it is responsible for converting legacy Azure Blob Storage v0 configs into v1 configs using the File-Based CDK.
|
|
@@ -59,17 +59,17 @@ class Config(AbstractFileBasedSpec):
|
|
|
59
59
|
def documentation_url(cls) -> AnyUrl:
|
|
60
60
|
return AnyUrl("https://docs.airbyte.com/integrations/sources/azure-blob-storage", scheme="https")
|
|
61
61
|
|
|
62
|
-
azure_blob_storage_account_name: str = Field(
|
|
63
|
-
title="Azure Blob Storage account name",
|
|
64
|
-
description="The account's name of the Azure Blob Storage.",
|
|
65
|
-
examples=["airbyte5storage"],
|
|
66
|
-
order=2,
|
|
67
|
-
)
|
|
68
62
|
credentials: Union[Oauth2, StorageAccountKey] = Field(
|
|
69
63
|
title="Authentication",
|
|
70
64
|
description="Credentials for connecting to the Azure Blob Storage",
|
|
71
65
|
discriminator="auth_type",
|
|
72
66
|
type="object",
|
|
67
|
+
order=2,
|
|
68
|
+
)
|
|
69
|
+
azure_blob_storage_account_name: str = Field(
|
|
70
|
+
title="Azure Blob Storage account name",
|
|
71
|
+
description="The account's name of the Azure Blob Storage.",
|
|
72
|
+
examples=["airbyte5storage"],
|
|
73
73
|
order=3,
|
|
74
74
|
)
|
|
75
75
|
azure_blob_storage_container_name: str = Field(
|
|
@@ -8,11 +8,14 @@ import pytz
|
|
|
8
8
|
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
|
|
9
9
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
|
10
10
|
from airbyte_cdk.sources.streams.http.requests_native_auth import Oauth2Authenticator
|
|
11
|
+
from airbyte_cdk.utils import AirbyteTracedException
|
|
12
|
+
from airbyte_protocol.models import FailureType
|
|
11
13
|
from azure.core.credentials import AccessToken
|
|
14
|
+
from azure.core.exceptions import ResourceNotFoundError
|
|
12
15
|
from azure.storage.blob import BlobServiceClient, ContainerClient
|
|
13
16
|
from smart_open import open
|
|
14
17
|
|
|
15
|
-
from .
|
|
18
|
+
from .spec import SourceAzureBlobStorageSpec
|
|
16
19
|
|
|
17
20
|
|
|
18
21
|
class AzureOauth2Authenticator(Oauth2Authenticator):
|
|
@@ -35,11 +38,11 @@ class SourceAzureBlobStorageStreamReader(AbstractFileBasedStreamReader):
|
|
|
35
38
|
self._config = None
|
|
36
39
|
|
|
37
40
|
@property
|
|
38
|
-
def config(self) ->
|
|
41
|
+
def config(self) -> SourceAzureBlobStorageSpec:
|
|
39
42
|
return self._config
|
|
40
43
|
|
|
41
44
|
@config.setter
|
|
42
|
-
def config(self, value:
|
|
45
|
+
def config(self, value: SourceAzureBlobStorageSpec) -> None:
|
|
43
46
|
self._config = value
|
|
44
47
|
|
|
45
48
|
@property
|
|
@@ -80,11 +83,13 @@ class SourceAzureBlobStorageStreamReader(AbstractFileBasedStreamReader):
|
|
|
80
83
|
) -> Iterable[RemoteFile]:
|
|
81
84
|
prefixes = [prefix] if prefix else self.get_prefixes_from_globs(globs)
|
|
82
85
|
prefixes = prefixes or [None]
|
|
83
|
-
|
|
84
|
-
for
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
yield remote_file
|
|
86
|
+
try:
|
|
87
|
+
for prefix in prefixes:
|
|
88
|
+
for blob in self.azure_container_client.list_blobs(name_starts_with=prefix):
|
|
89
|
+
remote_file = RemoteFile(uri=blob.name, last_modified=blob.last_modified.astimezone(pytz.utc).replace(tzinfo=None))
|
|
90
|
+
yield from self.filter_files_by_globs_and_start_date([remote_file], globs)
|
|
91
|
+
except ResourceNotFoundError as e:
|
|
92
|
+
raise AirbyteTracedException(failure_type=FailureType.config_error, internal_message=e.message, message=e.reason or e.message)
|
|
88
93
|
|
|
89
94
|
def open_file(self, file: RemoteFile, mode: FileReadMode, encoding: Optional[str], logger: logging.Logger) -> IOBase:
|
|
90
95
|
try:
|
airbyte_source_azure_blob_storage-0.4.0/source_azure_blob_storage/legacy_config_transformer.py
DELETED
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
-
#
|
|
4
|
-
|
|
5
|
-
from typing import Any, Mapping, MutableMapping
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class LegacyConfigTransformer:
|
|
9
|
-
"""
|
|
10
|
-
Class that takes in Azure Blob Storage source configs in the legacy format and transforms them into
|
|
11
|
-
configs that can be used by the new Azure Blob Storage source built with the file-based CDK.
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
@classmethod
|
|
15
|
-
def convert(cls, legacy_config: Mapping) -> MutableMapping[str, Any]:
|
|
16
|
-
azure_blob_storage_blobs_prefix = legacy_config.get("azure_blob_storage_blobs_prefix", "")
|
|
17
|
-
|
|
18
|
-
return {
|
|
19
|
-
"azure_blob_storage_endpoint": legacy_config.get("azure_blob_storage_endpoint", None),
|
|
20
|
-
"azure_blob_storage_account_name": legacy_config["azure_blob_storage_account_name"],
|
|
21
|
-
"azure_blob_storage_account_key": legacy_config["azure_blob_storage_account_key"],
|
|
22
|
-
"azure_blob_storage_container_name": legacy_config["azure_blob_storage_container_name"],
|
|
23
|
-
"streams": [
|
|
24
|
-
{
|
|
25
|
-
"name": legacy_config["azure_blob_storage_container_name"],
|
|
26
|
-
"legacy_prefix": azure_blob_storage_blobs_prefix,
|
|
27
|
-
"validation_policy": "Emit Record",
|
|
28
|
-
"format": {"filetype": "jsonl"},
|
|
29
|
-
}
|
|
30
|
-
],
|
|
31
|
-
}
|
{airbyte_source_azure_blob_storage-0.4.0 → airbyte_source_azure_blob_storage-0.4.2}/README.md
RENAMED
|
File without changes
|