airbyte-source-azure-blob-storage 0.3.6__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of airbyte-source-azure-blob-storage might be problematic. Click here for more details.
- {airbyte_source_azure_blob_storage-0.3.6 → airbyte_source_azure_blob_storage-0.4.1}/PKG-INFO +21 -1
- {airbyte_source_azure_blob_storage-0.3.6 → airbyte_source_azure_blob_storage-0.4.1}/README.md +20 -0
- {airbyte_source_azure_blob_storage-0.3.6 → airbyte_source_azure_blob_storage-0.4.1}/pyproject.toml +2 -1
- {airbyte_source_azure_blob_storage-0.3.6 → airbyte_source_azure_blob_storage-0.4.1}/source_azure_blob_storage/__init__.py +2 -2
- airbyte_source_azure_blob_storage-0.4.1/source_azure_blob_storage/config_migrations.py +119 -0
- {airbyte_source_azure_blob_storage-0.3.6 → airbyte_source_azure_blob_storage-0.4.1}/source_azure_blob_storage/run.py +5 -2
- airbyte_source_azure_blob_storage-0.4.1/source_azure_blob_storage/source.py +52 -0
- airbyte_source_azure_blob_storage-0.3.6/source_azure_blob_storage/config.py → airbyte_source_azure_blob_storage-0.4.1/source_azure_blob_storage/spec.py +48 -8
- {airbyte_source_azure_blob_storage-0.3.6 → airbyte_source_azure_blob_storage-0.4.1}/source_azure_blob_storage/stream_reader.py +37 -10
- airbyte_source_azure_blob_storage-0.3.6/source_azure_blob_storage/legacy_config_transformer.py +0 -31
- airbyte_source_azure_blob_storage-0.3.6/source_azure_blob_storage/source.py +0 -30
{airbyte_source_azure_blob_storage-0.3.6 → airbyte_source_azure_blob_storage-0.4.1}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: airbyte-source-azure-blob-storage
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.1
|
|
4
4
|
Summary: Source implementation for Azure Blob Storage.
|
|
5
5
|
Home-page: https://airbyte.com
|
|
6
6
|
License: MIT
|
|
@@ -32,6 +32,26 @@ For information about how to use this connector within Airbyte, see [the documen
|
|
|
32
32
|
* Poetry (~=1.7) - installation instructions [here](https://python-poetry.org/docs/#installation)
|
|
33
33
|
|
|
34
34
|
|
|
35
|
+
### Generate new oauth token
|
|
36
|
+
|
|
37
|
+
Tenant id should be provided by user, reason:
|
|
38
|
+
https://learn.microsoft.com/en-us/answers/questions/1531138/which-tenant-id-do-i-have-to-use-to-get-tokens-and
|
|
39
|
+
|
|
40
|
+
1. GET https://login.microsoftonline.com/<tenant_id>/oauth2/v2.0/authorize
|
|
41
|
+
?response_type=code
|
|
42
|
+
&client_id=<client_id>
|
|
43
|
+
&scope=offline_access https://storage.azure.com/.default
|
|
44
|
+
&redirect_uri=http://localhost:8000/auth_flow
|
|
45
|
+
&response_mode=query
|
|
46
|
+
&state=1234
|
|
47
|
+
|
|
48
|
+
2. POST https://login.microsoftonline.com/<tenant_id>/oauth2/v2.0/token
|
|
49
|
+
client_id:<client_id>
|
|
50
|
+
code:<code obtained from previous request>
|
|
51
|
+
redirect_uri:http://localhost:8000/auth_flow
|
|
52
|
+
grant_type:authorization_code
|
|
53
|
+
client_secret:<client_secret>
|
|
54
|
+
|
|
35
55
|
### Installing the connector
|
|
36
56
|
From this connector directory, run:
|
|
37
57
|
```bash
|
{airbyte_source_azure_blob_storage-0.3.6 → airbyte_source_azure_blob_storage-0.4.1}/README.md
RENAMED
|
@@ -11,6 +11,26 @@ For information about how to use this connector within Airbyte, see [the documen
|
|
|
11
11
|
* Poetry (~=1.7) - installation instructions [here](https://python-poetry.org/docs/#installation)
|
|
12
12
|
|
|
13
13
|
|
|
14
|
+
### Generate new oauth token
|
|
15
|
+
|
|
16
|
+
Tenant id should be provided by user, reason:
|
|
17
|
+
https://learn.microsoft.com/en-us/answers/questions/1531138/which-tenant-id-do-i-have-to-use-to-get-tokens-and
|
|
18
|
+
|
|
19
|
+
1. GET https://login.microsoftonline.com/<tenant_id>/oauth2/v2.0/authorize
|
|
20
|
+
?response_type=code
|
|
21
|
+
&client_id=<client_id>
|
|
22
|
+
&scope=offline_access https://storage.azure.com/.default
|
|
23
|
+
&redirect_uri=http://localhost:8000/auth_flow
|
|
24
|
+
&response_mode=query
|
|
25
|
+
&state=1234
|
|
26
|
+
|
|
27
|
+
2. POST https://login.microsoftonline.com/<tenant_id>/oauth2/v2.0/token
|
|
28
|
+
client_id:<client_id>
|
|
29
|
+
code:<code obtained from previous request>
|
|
30
|
+
redirect_uri:http://localhost:8000/auth_flow
|
|
31
|
+
grant_type:authorization_code
|
|
32
|
+
client_secret:<client_secret>
|
|
33
|
+
|
|
14
34
|
### Installing the connector
|
|
15
35
|
From this connector directory, run:
|
|
16
36
|
```bash
|
{airbyte_source_azure_blob_storage-0.3.6 → airbyte_source_azure_blob_storage-0.4.1}/pyproject.toml
RENAMED
|
@@ -5,7 +5,7 @@ requires = [
|
|
|
5
5
|
build-backend = "poetry.core.masonry.api"
|
|
6
6
|
|
|
7
7
|
[tool.poetry]
|
|
8
|
-
version = "0.
|
|
8
|
+
version = "0.4.1"
|
|
9
9
|
name = "airbyte-source-azure-blob-storage"
|
|
10
10
|
description = "Source implementation for Azure Blob Storage."
|
|
11
11
|
authors = [
|
|
@@ -41,6 +41,7 @@ source-azure-blob-storage = "source_azure_blob_storage.run:run"
|
|
|
41
41
|
|
|
42
42
|
[tool.poetry.group.dev.dependencies]
|
|
43
43
|
docker = "^7.0.0"
|
|
44
|
+
freezegun = "^1.4.0"
|
|
44
45
|
pytest-mock = "^3.6.1"
|
|
45
46
|
requests-mock = "^1.9.3"
|
|
46
47
|
pandas = "2.2.1"
|
|
@@ -3,8 +3,8 @@
|
|
|
3
3
|
#
|
|
4
4
|
|
|
5
5
|
|
|
6
|
-
from .config import Config
|
|
7
6
|
from .source import SourceAzureBlobStorage
|
|
7
|
+
from .spec import SourceAzureBlobStorageSpec
|
|
8
8
|
from .stream_reader import SourceAzureBlobStorageStreamReader
|
|
9
9
|
|
|
10
|
-
__all__ = ["SourceAzureBlobStorage", "SourceAzureBlobStorageStreamReader", "
|
|
10
|
+
__all__ = ["SourceAzureBlobStorage", "SourceAzureBlobStorageStreamReader", "SourceAzureBlobStorageSpec"]
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
import logging
|
|
7
|
+
from abc import ABC, abstractmethod
|
|
8
|
+
from typing import Any, List, Mapping
|
|
9
|
+
|
|
10
|
+
from airbyte_cdk.config_observation import create_connector_config_control_message
|
|
11
|
+
from airbyte_cdk.entrypoint import AirbyteEntrypoint
|
|
12
|
+
from airbyte_cdk.sources import Source
|
|
13
|
+
|
|
14
|
+
logger = logging.getLogger("airbyte_logger")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class MigrateConfig(ABC):
|
|
18
|
+
@classmethod
|
|
19
|
+
@abstractmethod
|
|
20
|
+
def should_migrate(cls, config: Mapping[str, Any]) -> bool:
|
|
21
|
+
...
|
|
22
|
+
|
|
23
|
+
@classmethod
|
|
24
|
+
@abstractmethod
|
|
25
|
+
def migrate_config(cls, config: Mapping[str, Any]) -> Mapping[str, Any]:
|
|
26
|
+
...
|
|
27
|
+
|
|
28
|
+
@classmethod
|
|
29
|
+
def modify_and_save(cls, config_path: str, source: Source, config: Mapping[str, Any]) -> Mapping[str, Any]:
|
|
30
|
+
"""
|
|
31
|
+
Modifies the configuration and then saves it back to the source.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
- config_path (str): The path where the configuration is stored.
|
|
35
|
+
- source (Source): The data source.
|
|
36
|
+
- config (Mapping[str, Any]): The current configuration.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
- Mapping[str, Any]: The updated configuration.
|
|
40
|
+
"""
|
|
41
|
+
migrated_config = cls.migrate_config(config)
|
|
42
|
+
source.write_config(migrated_config, config_path)
|
|
43
|
+
return migrated_config
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def emit_control_message(cls, migrated_config: Mapping[str, Any]) -> None:
|
|
47
|
+
"""
|
|
48
|
+
Emits the control messages related to configuration migration.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
- migrated_config (Mapping[str, Any]): The migrated configuration.
|
|
52
|
+
"""
|
|
53
|
+
print(create_connector_config_control_message(migrated_config).json(exclude_unset=True))
|
|
54
|
+
|
|
55
|
+
@classmethod
|
|
56
|
+
def migrate(cls, args: List[str], source: Source) -> None:
|
|
57
|
+
"""
|
|
58
|
+
Orchestrates the configuration migration process.
|
|
59
|
+
|
|
60
|
+
It first checks if the `--config` argument is provided, and if so,
|
|
61
|
+
determines whether migration is needed, and then performs the migration
|
|
62
|
+
if required.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
- args (List[str]): List of command-line arguments.
|
|
66
|
+
- source (Source): The data source.
|
|
67
|
+
"""
|
|
68
|
+
config_path = AirbyteEntrypoint(source).extract_config(args)
|
|
69
|
+
if config_path:
|
|
70
|
+
config = source.read_config(config_path)
|
|
71
|
+
if cls.should_migrate(config):
|
|
72
|
+
cls.emit_control_message(cls.modify_and_save(config_path, source, config))
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class MigrateLegacyConfig(MigrateConfig):
|
|
76
|
+
"""
|
|
77
|
+
Class that takes in Azure Blob Storage source configs in the legacy format and transforms them into
|
|
78
|
+
configs that can be used by the new Azure Blob Storage source built with the file-based CDK.
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
@classmethod
|
|
82
|
+
def should_migrate(cls, config: Mapping[str, Any]) -> bool:
|
|
83
|
+
return "streams" not in config
|
|
84
|
+
|
|
85
|
+
@classmethod
|
|
86
|
+
def migrate_config(cls, legacy_config: Mapping[str, Any]) -> Mapping[str, Any]:
|
|
87
|
+
azure_blob_storage_blobs_prefix = legacy_config.get("azure_blob_storage_blobs_prefix", "")
|
|
88
|
+
return {
|
|
89
|
+
"azure_blob_storage_endpoint": legacy_config.get("azure_blob_storage_endpoint", None),
|
|
90
|
+
"azure_blob_storage_account_name": legacy_config["azure_blob_storage_account_name"],
|
|
91
|
+
"azure_blob_storage_account_key": legacy_config["azure_blob_storage_account_key"],
|
|
92
|
+
"azure_blob_storage_container_name": legacy_config["azure_blob_storage_container_name"],
|
|
93
|
+
"streams": [
|
|
94
|
+
{
|
|
95
|
+
"name": legacy_config["azure_blob_storage_container_name"],
|
|
96
|
+
"legacy_prefix": azure_blob_storage_blobs_prefix,
|
|
97
|
+
"validation_policy": "Emit Record",
|
|
98
|
+
"format": {"filetype": "jsonl"},
|
|
99
|
+
}
|
|
100
|
+
],
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
class MigrateCredentials(MigrateConfig):
|
|
105
|
+
"""
|
|
106
|
+
This class stands for migrating the config azure_blob_storage_account_key inside object `credentials`
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
@classmethod
|
|
110
|
+
def should_migrate(cls, config: Mapping[str, Any]) -> bool:
|
|
111
|
+
return "credentials" not in config
|
|
112
|
+
|
|
113
|
+
@classmethod
|
|
114
|
+
def migrate_config(cls, config: Mapping[str, Any]) -> Mapping[str, Any]:
|
|
115
|
+
config["credentials"] = {
|
|
116
|
+
"auth_type": "storage_account_key",
|
|
117
|
+
"azure_blob_storage_account_key": config.pop("azure_blob_storage_account_key"),
|
|
118
|
+
}
|
|
119
|
+
return config
|
|
@@ -9,7 +9,8 @@ from datetime import datetime
|
|
|
9
9
|
from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch
|
|
10
10
|
from airbyte_cdk.models import AirbyteErrorTraceMessage, AirbyteMessage, AirbyteTraceMessage, TraceType, Type
|
|
11
11
|
from airbyte_cdk.sources.file_based.stream.cursor import DefaultFileBasedCursor
|
|
12
|
-
from source_azure_blob_storage import
|
|
12
|
+
from source_azure_blob_storage import SourceAzureBlobStorage, SourceAzureBlobStorageSpec, SourceAzureBlobStorageStreamReader
|
|
13
|
+
from source_azure_blob_storage.config_migrations import MigrateCredentials, MigrateLegacyConfig
|
|
13
14
|
|
|
14
15
|
|
|
15
16
|
def run():
|
|
@@ -20,12 +21,14 @@ def run():
|
|
|
20
21
|
try:
|
|
21
22
|
source = SourceAzureBlobStorage(
|
|
22
23
|
SourceAzureBlobStorageStreamReader(),
|
|
23
|
-
|
|
24
|
+
SourceAzureBlobStorageSpec,
|
|
24
25
|
SourceAzureBlobStorage.read_catalog(catalog_path) if catalog_path else None,
|
|
25
26
|
SourceAzureBlobStorage.read_config(config_path) if catalog_path else None,
|
|
26
27
|
SourceAzureBlobStorage.read_state(state_path) if catalog_path else None,
|
|
27
28
|
cursor_cls=DefaultFileBasedCursor,
|
|
28
29
|
)
|
|
30
|
+
MigrateLegacyConfig.migrate(sys.argv[1:], source)
|
|
31
|
+
MigrateCredentials.migrate(sys.argv[1:], source)
|
|
29
32
|
except Exception:
|
|
30
33
|
print(
|
|
31
34
|
AirbyteMessage(
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
+
#
|
|
4
|
+
|
|
5
|
+
from typing import Any, Mapping
|
|
6
|
+
|
|
7
|
+
from airbyte_cdk.config_observation import emit_configuration_as_airbyte_control_message
|
|
8
|
+
from airbyte_cdk.sources.declarative.models import OAuthConfigSpecification
|
|
9
|
+
from airbyte_cdk.sources.file_based.file_based_source import FileBasedSource
|
|
10
|
+
from airbyte_protocol.models import AdvancedAuth, ConnectorSpecification
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class SourceAzureBlobStorage(FileBasedSource):
|
|
14
|
+
def spec(self, *args: Any, **kwargs: Any) -> ConnectorSpecification:
|
|
15
|
+
"""
|
|
16
|
+
Returns the specification describing what fields can be configured by a user when setting up a file-based source.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
return ConnectorSpecification(
|
|
20
|
+
documentationUrl=self.spec_class.documentation_url(),
|
|
21
|
+
connectionSpecification=self.spec_class.schema(),
|
|
22
|
+
advanced_auth=AdvancedAuth(
|
|
23
|
+
auth_flow_type="oauth2.0",
|
|
24
|
+
predicate_key=["credentials", "auth_type"],
|
|
25
|
+
predicate_value="oauth2",
|
|
26
|
+
oauth_config_specification=OAuthConfigSpecification(
|
|
27
|
+
complete_oauth_output_specification={
|
|
28
|
+
"type": "object",
|
|
29
|
+
"additionalProperties": False,
|
|
30
|
+
"properties": {"refresh_token": {"type": "string", "path_in_connector_config": ["credentials", "refresh_token"]}},
|
|
31
|
+
},
|
|
32
|
+
complete_oauth_server_input_specification={
|
|
33
|
+
"type": "object",
|
|
34
|
+
"additionalProperties": False,
|
|
35
|
+
"properties": {"client_id": {"type": "string"}, "client_secret": {"type": "string"}},
|
|
36
|
+
},
|
|
37
|
+
complete_oauth_server_output_specification={
|
|
38
|
+
"type": "object",
|
|
39
|
+
"additionalProperties": False,
|
|
40
|
+
"properties": {
|
|
41
|
+
"client_id": {"type": "string", "path_in_connector_config": ["credentials", "client_id"]},
|
|
42
|
+
"client_secret": {"type": "string", "path_in_connector_config": ["credentials", "client_secret"]},
|
|
43
|
+
},
|
|
44
|
+
},
|
|
45
|
+
oauth_user_input_from_connector_config_specification={
|
|
46
|
+
"type": "object",
|
|
47
|
+
"additionalProperties": False,
|
|
48
|
+
"properties": {"tenant_id": {"type": "string", "path_in_connector_config": ["credentials", "tenant_id"]}},
|
|
49
|
+
},
|
|
50
|
+
),
|
|
51
|
+
),
|
|
52
|
+
)
|
|
@@ -2,14 +2,54 @@
|
|
|
2
2
|
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
3
|
#
|
|
4
4
|
|
|
5
|
-
from typing import Any, Dict, Optional
|
|
5
|
+
from typing import Any, Dict, Literal, Optional, Union
|
|
6
6
|
|
|
7
7
|
import dpath.util
|
|
8
8
|
from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
|
|
9
|
-
from
|
|
9
|
+
from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
|
|
10
|
+
from pydantic import AnyUrl, BaseModel, Field
|
|
10
11
|
|
|
11
12
|
|
|
12
|
-
class
|
|
13
|
+
class Oauth2(BaseModel):
|
|
14
|
+
class Config(OneOfOptionConfig):
|
|
15
|
+
title = "Authenticate via Oauth2"
|
|
16
|
+
discriminator = "auth_type"
|
|
17
|
+
|
|
18
|
+
auth_type: Literal["oauth2"] = Field("oauth2", const=True)
|
|
19
|
+
tenant_id: str = Field(title="Tenant ID", description="Tenant ID of the Microsoft Azure Application user", airbyte_secret=True)
|
|
20
|
+
client_id: str = Field(
|
|
21
|
+
title="Client ID",
|
|
22
|
+
description="Client ID of your Microsoft developer application",
|
|
23
|
+
airbyte_secret=True,
|
|
24
|
+
)
|
|
25
|
+
client_secret: str = Field(
|
|
26
|
+
title="Client Secret",
|
|
27
|
+
description="Client Secret of your Microsoft developer application",
|
|
28
|
+
airbyte_secret=True,
|
|
29
|
+
)
|
|
30
|
+
refresh_token: str = Field(
|
|
31
|
+
title="Refresh Token",
|
|
32
|
+
description="Refresh Token of your Microsoft developer application",
|
|
33
|
+
airbyte_secret=True,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class StorageAccountKey(BaseModel):
|
|
38
|
+
class Config(OneOfOptionConfig):
|
|
39
|
+
title = "Authenticate via Storage Account Key"
|
|
40
|
+
discriminator = "auth_type"
|
|
41
|
+
|
|
42
|
+
auth_type: Literal["storage_account_key"] = Field("storage_account_key", const=True)
|
|
43
|
+
azure_blob_storage_account_key: str = Field(
|
|
44
|
+
title="Azure Blob Storage account key",
|
|
45
|
+
description="The Azure blob storage account key.",
|
|
46
|
+
airbyte_secret=True,
|
|
47
|
+
examples=["Z8ZkZpteggFx394vm+PJHnGTvdRncaYS+JhLKdj789YNmD+iyGTnG+PV+POiuYNhBg/ACS+LKjd%4FG3FHGN12Nd=="],
|
|
48
|
+
order=3,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class SourceAzureBlobStorageSpec(AbstractFileBasedSpec):
|
|
13
53
|
"""
|
|
14
54
|
NOTE: When this Spec is changed, legacy_config_transformer.py must also be modified to uptake the changes
|
|
15
55
|
because it is responsible for converting legacy Azure Blob Storage v0 configs into v1 configs using the File-Based CDK.
|
|
@@ -25,11 +65,11 @@ class Config(AbstractFileBasedSpec):
|
|
|
25
65
|
examples=["airbyte5storage"],
|
|
26
66
|
order=2,
|
|
27
67
|
)
|
|
28
|
-
|
|
29
|
-
title="
|
|
30
|
-
description="
|
|
31
|
-
|
|
32
|
-
|
|
68
|
+
credentials: Union[Oauth2, StorageAccountKey] = Field(
|
|
69
|
+
title="Authentication",
|
|
70
|
+
description="Credentials for connecting to the Azure Blob Storage",
|
|
71
|
+
discriminator="auth_type",
|
|
72
|
+
type="object",
|
|
33
73
|
order=3,
|
|
34
74
|
)
|
|
35
75
|
azure_blob_storage_container_name: str = Field(
|
|
@@ -2,28 +2,44 @@
|
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
from io import IOBase
|
|
5
|
-
from typing import Iterable, List, Optional
|
|
5
|
+
from typing import Iterable, List, Optional, Union
|
|
6
6
|
|
|
7
7
|
import pytz
|
|
8
8
|
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
|
|
9
9
|
from airbyte_cdk.sources.file_based.remote_file import RemoteFile
|
|
10
|
+
from airbyte_cdk.sources.streams.http.requests_native_auth import Oauth2Authenticator
|
|
11
|
+
from azure.core.credentials import AccessToken
|
|
10
12
|
from azure.storage.blob import BlobServiceClient, ContainerClient
|
|
11
13
|
from smart_open import open
|
|
12
14
|
|
|
13
|
-
from .
|
|
15
|
+
from .spec import SourceAzureBlobStorageSpec
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class AzureOauth2Authenticator(Oauth2Authenticator):
|
|
19
|
+
"""
|
|
20
|
+
Authenticator for Azure Blob Storage SDK to align with azure.core.credentials.TokenCredential protocol
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def get_token(self, *args, **kwargs) -> AccessToken:
|
|
24
|
+
"""Parent class handles Oauth Refresh token logic.
|
|
25
|
+
`expires_on` is ignored and set to year 2222 to align with protocol.
|
|
26
|
+
"""
|
|
27
|
+
return AccessToken(token=self.get_access_token(), expires_on=7952342400)
|
|
14
28
|
|
|
15
29
|
|
|
16
30
|
class SourceAzureBlobStorageStreamReader(AbstractFileBasedStreamReader):
|
|
31
|
+
_credentials = None
|
|
32
|
+
|
|
17
33
|
def __init__(self, *args, **kwargs):
|
|
18
34
|
super().__init__(*args, **kwargs)
|
|
19
35
|
self._config = None
|
|
20
36
|
|
|
21
37
|
@property
|
|
22
|
-
def config(self) ->
|
|
38
|
+
def config(self) -> SourceAzureBlobStorageSpec:
|
|
23
39
|
return self._config
|
|
24
40
|
|
|
25
41
|
@config.setter
|
|
26
|
-
def config(self, value:
|
|
42
|
+
def config(self, value: SourceAzureBlobStorageSpec) -> None:
|
|
27
43
|
self._config = value
|
|
28
44
|
|
|
29
45
|
@property
|
|
@@ -35,14 +51,26 @@ class SourceAzureBlobStorageStreamReader(AbstractFileBasedStreamReader):
|
|
|
35
51
|
@property
|
|
36
52
|
def azure_container_client(self):
|
|
37
53
|
return ContainerClient(
|
|
38
|
-
self.account_url,
|
|
39
|
-
container_name=self.config.azure_blob_storage_container_name,
|
|
40
|
-
credential=self.config.azure_blob_storage_account_key,
|
|
54
|
+
self.account_url, container_name=self.config.azure_blob_storage_container_name, credential=self.azure_credentials
|
|
41
55
|
)
|
|
42
56
|
|
|
43
57
|
@property
|
|
44
58
|
def azure_blob_service_client(self):
|
|
45
|
-
return BlobServiceClient(self.account_url, credential=self.
|
|
59
|
+
return BlobServiceClient(self.account_url, credential=self._credentials)
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def azure_credentials(self) -> Union[str, AzureOauth2Authenticator]:
|
|
63
|
+
if not self._credentials:
|
|
64
|
+
if self.config.credentials.auth_type == "storage_account_key":
|
|
65
|
+
self._credentials = self.config.credentials.azure_blob_storage_account_key
|
|
66
|
+
else:
|
|
67
|
+
self._credentials = AzureOauth2Authenticator(
|
|
68
|
+
token_refresh_endpoint=f"https://login.microsoftonline.com/{self.config.credentials.tenant_id}/oauth2/v2.0/token",
|
|
69
|
+
client_id=self.config.credentials.client_id,
|
|
70
|
+
client_secret=self.config.credentials.client_secret,
|
|
71
|
+
refresh_token=self.config.credentials.refresh_token,
|
|
72
|
+
)
|
|
73
|
+
return self._credentials
|
|
46
74
|
|
|
47
75
|
def get_matching_files(
|
|
48
76
|
self,
|
|
@@ -55,8 +83,7 @@ class SourceAzureBlobStorageStreamReader(AbstractFileBasedStreamReader):
|
|
|
55
83
|
for prefix in prefixes:
|
|
56
84
|
for blob in self.azure_container_client.list_blobs(name_starts_with=prefix):
|
|
57
85
|
remote_file = RemoteFile(uri=blob.name, last_modified=blob.last_modified.astimezone(pytz.utc).replace(tzinfo=None))
|
|
58
|
-
|
|
59
|
-
yield remote_file
|
|
86
|
+
yield from self.filter_files_by_globs_and_start_date([remote_file], globs)
|
|
60
87
|
|
|
61
88
|
def open_file(self, file: RemoteFile, mode: FileReadMode, encoding: Optional[str], logger: logging.Logger) -> IOBase:
|
|
62
89
|
try:
|
airbyte_source_azure_blob_storage-0.3.6/source_azure_blob_storage/legacy_config_transformer.py
DELETED
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
-
#
|
|
4
|
-
|
|
5
|
-
from typing import Any, Mapping, MutableMapping
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
class LegacyConfigTransformer:
|
|
9
|
-
"""
|
|
10
|
-
Class that takes in Azure Blob Storage source configs in the legacy format and transforms them into
|
|
11
|
-
configs that can be used by the new Azure Blob Storage source built with the file-based CDK.
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
@classmethod
|
|
15
|
-
def convert(cls, legacy_config: Mapping) -> MutableMapping[str, Any]:
|
|
16
|
-
azure_blob_storage_blobs_prefix = legacy_config.get("azure_blob_storage_blobs_prefix", "")
|
|
17
|
-
|
|
18
|
-
return {
|
|
19
|
-
"azure_blob_storage_endpoint": legacy_config.get("azure_blob_storage_endpoint", None),
|
|
20
|
-
"azure_blob_storage_account_name": legacy_config["azure_blob_storage_account_name"],
|
|
21
|
-
"azure_blob_storage_account_key": legacy_config["azure_blob_storage_account_key"],
|
|
22
|
-
"azure_blob_storage_container_name": legacy_config["azure_blob_storage_container_name"],
|
|
23
|
-
"streams": [
|
|
24
|
-
{
|
|
25
|
-
"name": legacy_config["azure_blob_storage_container_name"],
|
|
26
|
-
"legacy_prefix": azure_blob_storage_blobs_prefix,
|
|
27
|
-
"validation_policy": "Emit Record",
|
|
28
|
-
"format": {"filetype": "jsonl"},
|
|
29
|
-
}
|
|
30
|
-
],
|
|
31
|
-
}
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
#
|
|
2
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
|
3
|
-
#
|
|
4
|
-
|
|
5
|
-
from typing import Any, Mapping
|
|
6
|
-
|
|
7
|
-
from airbyte_cdk.config_observation import emit_configuration_as_airbyte_control_message
|
|
8
|
-
from airbyte_cdk.sources.file_based.file_based_source import FileBasedSource
|
|
9
|
-
|
|
10
|
-
from .legacy_config_transformer import LegacyConfigTransformer
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class SourceAzureBlobStorage(FileBasedSource):
|
|
14
|
-
@classmethod
|
|
15
|
-
def read_config(cls, config_path: str) -> Mapping[str, Any]:
|
|
16
|
-
"""
|
|
17
|
-
Used to override the default read_config so that when the new file-based Azure Blob Storage connector processes a config
|
|
18
|
-
in the legacy format, it can be transformed into the new config. This happens in entrypoint before we
|
|
19
|
-
validate the config against the new spec.
|
|
20
|
-
"""
|
|
21
|
-
config = FileBasedSource.read_config(config_path)
|
|
22
|
-
if not cls._is_v1_config(config):
|
|
23
|
-
converted_config = LegacyConfigTransformer.convert(config)
|
|
24
|
-
emit_configuration_as_airbyte_control_message(converted_config)
|
|
25
|
-
return converted_config
|
|
26
|
-
return config
|
|
27
|
-
|
|
28
|
-
@staticmethod
|
|
29
|
-
def _is_v1_config(config: Mapping[str, Any]) -> bool:
|
|
30
|
-
return "streams" in config
|