airbyte-source-azure-blob-storage 0.3.5__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of airbyte-source-azure-blob-storage might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-source-azure-blob-storage
3
- Version: 0.3.5
3
+ Version: 0.4.0
4
4
  Summary: Source implementation for Azure Blob Storage.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -12,7 +12,7 @@ Classifier: Programming Language :: Python :: 3
12
12
  Classifier: Programming Language :: Python :: 3.9
13
13
  Classifier: Programming Language :: Python :: 3.10
14
14
  Classifier: Programming Language :: Python :: 3.11
15
- Requires-Dist: airbyte-cdk[file-based] (==0.75)
15
+ Requires-Dist: airbyte-cdk[file-based] (>=0,<1)
16
16
  Requires-Dist: pytz (==2024.1)
17
17
  Requires-Dist: smart-open[azure] (==6.4.0)
18
18
  Project-URL: Documentation, https://docs.airbyte.com/integrations/sources/azure-blob-storage
@@ -32,6 +32,26 @@ For information about how to use this connector within Airbyte, see [the documen
32
32
  * Poetry (~=1.7) - installation instructions [here](https://python-poetry.org/docs/#installation)
33
33
 
34
34
 
35
+ ### Generate new oauth token
36
+
37
+ Tenant id should be provided by user, reason:
38
+ https://learn.microsoft.com/en-us/answers/questions/1531138/which-tenant-id-do-i-have-to-use-to-get-tokens-and
39
+
40
+ 1. GET https://login.microsoftonline.com/<tenant_id>/oauth2/v2.0/authorize
41
+ ?response_type=code
42
+ &client_id=<client_id>
43
+ &scope=offline_access https://storage.azure.com/.default
44
+ &redirect_uri=http://localhost:8000/auth_flow
45
+ &response_mode=query
46
+ &state=1234
47
+
48
+ 2. POST https://login.microsoftonline.com/<tenant_id>/oauth2/v2.0/token
49
+ client_id:<client_id>
50
+ code:<code obtained from previous request>
51
+ redirect_uri:http://localhost:8000/auth_flow
52
+ grant_type:authorization_code
53
+ client_secret:<client_secret>
54
+
35
55
  ### Installing the connector
36
56
  From this connector directory, run:
37
57
  ```bash
@@ -0,0 +1,11 @@
1
+ source_azure_blob_storage/__init__.py,sha256=dUrGWNqeIfK3e4yI6dgpIzSi8d03kItsUWCHQcmxXlg,281
2
+ source_azure_blob_storage/config.py,sha256=w7hEWeY8cnNJ_jtAr-ZzkhyZ7pAxI6sQLUi2BG75jGQ,3949
3
+ source_azure_blob_storage/config_migrations.py,sha256=_Dq-N4T6xTL2HpDmYIr-5K5CKCudUmEjA3x9wUdcHMg,2712
4
+ source_azure_blob_storage/legacy_config_transformer.py,sha256=yiS4GgjaVvJtedmweAkq-eEdPG0RWWAAr0T9UV4RmOQ,1306
5
+ source_azure_blob_storage/run.py,sha256=hUxQJCBSS_pzGzSE92Fum5C9EY7cU4yP3u-KefiC9cI,1890
6
+ source_azure_blob_storage/source.py,sha256=lHyvHaEghSgxs1zK67o_x6viSVZ3oZOJca2Y-owI9J8,3456
7
+ source_azure_blob_storage/stream_reader.py,sha256=YpPYv5l2eLRtxTNTLGQ6LCOHtwDdmC9TRLBt9M7ZGTU,4227
8
+ airbyte_source_azure_blob_storage-0.4.0.dist-info/METADATA,sha256=IMQQw4bC4D6p4FGYRUPV-DstwWu2_uic9YJoMPn9P7Y,6246
9
+ airbyte_source_azure_blob_storage-0.4.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
10
+ airbyte_source_azure_blob_storage-0.4.0.dist-info/entry_points.txt,sha256=75v_DA_Xu0qr0eqtEXyh8sPCqcL9eXKWY8UwdST3ANE,79
11
+ airbyte_source_azure_blob_storage-0.4.0.dist-info/RECORD,,
@@ -2,11 +2,51 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- from typing import Any, Dict, Optional
5
+ from typing import Any, Dict, Literal, Optional, Union
6
6
 
7
7
  import dpath.util
8
8
  from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
9
- from pydantic import AnyUrl, Field
9
+ from airbyte_cdk.utils.oneof_option_config import OneOfOptionConfig
10
+ from pydantic import AnyUrl, BaseModel, Field
11
+
12
+
13
+ class Oauth2(BaseModel):
14
+ class Config(OneOfOptionConfig):
15
+ title = "Authenticate via Oauth2"
16
+ discriminator = "auth_type"
17
+
18
+ auth_type: Literal["oauth2"] = Field("oauth2", const=True)
19
+ tenant_id: str = Field(title="Tenant ID", description="Tenant ID of the Microsoft Azure Application user", airbyte_secret=True)
20
+ client_id: str = Field(
21
+ title="Client ID",
22
+ description="Client ID of your Microsoft developer application",
23
+ airbyte_secret=True,
24
+ )
25
+ client_secret: str = Field(
26
+ title="Client Secret",
27
+ description="Client Secret of your Microsoft developer application",
28
+ airbyte_secret=True,
29
+ )
30
+ refresh_token: str = Field(
31
+ title="Refresh Token",
32
+ description="Refresh Token of your Microsoft developer application",
33
+ airbyte_secret=True,
34
+ )
35
+
36
+
37
+ class StorageAccountKey(BaseModel):
38
+ class Config(OneOfOptionConfig):
39
+ title = "Authenticate via Storage Account Key"
40
+ discriminator = "auth_type"
41
+
42
+ auth_type: Literal["storage_account_key"] = Field("storage_account_key", const=True)
43
+ azure_blob_storage_account_key: str = Field(
44
+ title="Azure Blob Storage account key",
45
+ description="The Azure blob storage account key.",
46
+ airbyte_secret=True,
47
+ examples=["Z8ZkZpteggFx394vm+PJHnGTvdRncaYS+JhLKdj789YNmD+iyGTnG+PV+POiuYNhBg/ACS+LKjd%4FG3FHGN12Nd=="],
48
+ order=3,
49
+ )
10
50
 
11
51
 
12
52
  class Config(AbstractFileBasedSpec):
@@ -25,11 +65,11 @@ class Config(AbstractFileBasedSpec):
25
65
  examples=["airbyte5storage"],
26
66
  order=2,
27
67
  )
28
- azure_blob_storage_account_key: str = Field(
29
- title="Azure Blob Storage account key",
30
- description="The Azure blob storage account key.",
31
- airbyte_secret=True,
32
- examples=["Z8ZkZpteggFx394vm+PJHnGTvdRncaYS+JhLKdj789YNmD+iyGTnG+PV+POiuYNhBg/ACS+LKjd%4FG3FHGN12Nd=="],
68
+ credentials: Union[Oauth2, StorageAccountKey] = Field(
69
+ title="Authentication",
70
+ description="Credentials for connecting to the Azure Blob Storage",
71
+ discriminator="auth_type",
72
+ type="object",
33
73
  order=3,
34
74
  )
35
75
  azure_blob_storage_container_name: str = Field(
@@ -0,0 +1,77 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+
6
+ import logging
7
+ from typing import Any, List, Mapping
8
+
9
+ from airbyte_cdk.config_observation import create_connector_config_control_message
10
+ from airbyte_cdk.entrypoint import AirbyteEntrypoint
11
+ from airbyte_cdk.sources import Source
12
+
13
+ logger = logging.getLogger("airbyte_logger")
14
+
15
+
16
+ class MigrateCredentials:
17
+ """
18
+ This class stands for migrating the config azure_blob_storage_account_key inside object `credentials`
19
+ """
20
+
21
+ @classmethod
22
+ def should_migrate(cls, config: Mapping[str, Any]) -> bool:
23
+ return "credentials" not in config
24
+
25
+ @classmethod
26
+ def set_azure_blob_storage_account_key(cls, config: Mapping[str, Any]) -> Mapping[str, Any]:
27
+ config["credentials"] = {
28
+ "auth_type": "storage_account_key",
29
+ "azure_blob_storage_account_key": config.pop("azure_blob_storage_account_key"),
30
+ }
31
+ return config
32
+
33
+ @classmethod
34
+ def modify_and_save(cls, config_path: str, source: Source, config: Mapping[str, Any]) -> Mapping[str, Any]:
35
+ """
36
+ Modifies the configuration and then saves it back to the source.
37
+
38
+ Args:
39
+ - config_path (str): The path where the configuration is stored.
40
+ - source (Source): The data source.
41
+ - config (Mapping[str, Any]): The current configuration.
42
+
43
+ Returns:
44
+ - Mapping[str, Any]: The updated configuration.
45
+ """
46
+ migrated_config = cls.set_azure_blob_storage_account_key(config)
47
+ source.write_config(migrated_config, config_path)
48
+ return migrated_config
49
+
50
+ @classmethod
51
+ def emit_control_message(cls, migrated_config: Mapping[str, Any]) -> None:
52
+ """
53
+ Emits the control messages related to configuration migration.
54
+
55
+ Args:
56
+ - migrated_config (Mapping[str, Any]): The migrated configuration.
57
+ """
58
+ print(create_connector_config_control_message(migrated_config).json(exclude_unset=True))
59
+
60
+ @classmethod
61
+ def migrate(cls, args: List[str], source: Source) -> None:
62
+ """
63
+ Orchestrates the configuration migration process.
64
+
65
+ It first checks if the `--config` argument is provided, and if so,
66
+ determines whether migration is needed, and then performs the migration
67
+ if required.
68
+
69
+ Args:
70
+ - args (List[str]): List of command-line arguments.
71
+ - source (Source): The data source.
72
+ """
73
+ config_path = AirbyteEntrypoint(source).extract_config(args)
74
+ if config_path:
75
+ config = source.read_config(config_path)
76
+ if cls.should_migrate(config):
77
+ cls.emit_control_message(cls.modify_and_save(config_path, source, config))
@@ -10,6 +10,7 @@ from airbyte_cdk.entrypoint import AirbyteEntrypoint, launch
10
10
  from airbyte_cdk.models import AirbyteErrorTraceMessage, AirbyteMessage, AirbyteTraceMessage, TraceType, Type
11
11
  from airbyte_cdk.sources.file_based.stream.cursor import DefaultFileBasedCursor
12
12
  from source_azure_blob_storage import Config, SourceAzureBlobStorage, SourceAzureBlobStorageStreamReader
13
+ from source_azure_blob_storage.config_migrations import MigrateCredentials
13
14
 
14
15
 
15
16
  def run():
@@ -26,6 +27,7 @@ def run():
26
27
  SourceAzureBlobStorage.read_state(state_path) if catalog_path else None,
27
28
  cursor_cls=DefaultFileBasedCursor,
28
29
  )
30
+ MigrateCredentials.migrate(sys.argv[1:], source)
29
31
  except Exception:
30
32
  print(
31
33
  AirbyteMessage(
@@ -5,7 +5,9 @@
5
5
  from typing import Any, Mapping
6
6
 
7
7
  from airbyte_cdk.config_observation import emit_configuration_as_airbyte_control_message
8
+ from airbyte_cdk.sources.declarative.models import OAuthConfigSpecification
8
9
  from airbyte_cdk.sources.file_based.file_based_source import FileBasedSource
10
+ from airbyte_protocol.models import AdvancedAuth, ConnectorSpecification
9
11
 
10
12
  from .legacy_config_transformer import LegacyConfigTransformer
11
13
 
@@ -28,3 +30,43 @@ class SourceAzureBlobStorage(FileBasedSource):
28
30
  @staticmethod
29
31
  def _is_v1_config(config: Mapping[str, Any]) -> bool:
30
32
  return "streams" in config
33
+
34
+ def spec(self, *args: Any, **kwargs: Any) -> ConnectorSpecification:
35
+ """
36
+ Returns the specification describing what fields can be configured by a user when setting up a file-based source.
37
+ """
38
+
39
+ return ConnectorSpecification(
40
+ documentationUrl=self.spec_class.documentation_url(),
41
+ connectionSpecification=self.spec_class.schema(),
42
+ advanced_auth=AdvancedAuth(
43
+ auth_flow_type="oauth2.0",
44
+ predicate_key=["credentials", "auth_type"],
45
+ predicate_value="oauth2",
46
+ oauth_config_specification=OAuthConfigSpecification(
47
+ complete_oauth_output_specification={
48
+ "type": "object",
49
+ "additionalProperties": False,
50
+ "properties": {"refresh_token": {"type": "string", "path_in_connector_config": ["credentials", "refresh_token"]}},
51
+ },
52
+ complete_oauth_server_input_specification={
53
+ "type": "object",
54
+ "additionalProperties": False,
55
+ "properties": {"client_id": {"type": "string"}, "client_secret": {"type": "string"}},
56
+ },
57
+ complete_oauth_server_output_specification={
58
+ "type": "object",
59
+ "additionalProperties": False,
60
+ "properties": {
61
+ "client_id": {"type": "string", "path_in_connector_config": ["credentials", "client_id"]},
62
+ "client_secret": {"type": "string", "path_in_connector_config": ["credentials", "client_secret"]},
63
+ },
64
+ },
65
+ oauth_user_input_from_connector_config_specification={
66
+ "type": "object",
67
+ "additionalProperties": False,
68
+ "properties": {"tenant_id": {"type": "string", "path_in_connector_config": ["credentials", "tenant_id"]}},
69
+ },
70
+ ),
71
+ ),
72
+ )
@@ -1,20 +1,35 @@
1
1
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
2
2
 
3
3
  import logging
4
- from contextlib import contextmanager
5
4
  from io import IOBase
6
- from typing import Iterable, List, Optional
5
+ from typing import Iterable, List, Optional, Union
7
6
 
8
7
  import pytz
9
8
  from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
10
9
  from airbyte_cdk.sources.file_based.remote_file import RemoteFile
10
+ from airbyte_cdk.sources.streams.http.requests_native_auth import Oauth2Authenticator
11
+ from azure.core.credentials import AccessToken
11
12
  from azure.storage.blob import BlobServiceClient, ContainerClient
12
13
  from smart_open import open
13
14
 
14
15
  from .config import Config
15
16
 
16
17
 
18
+ class AzureOauth2Authenticator(Oauth2Authenticator):
19
+ """
20
+ Authenticator for Azure Blob Storage SDK to align with azure.core.credentials.TokenCredential protocol
21
+ """
22
+
23
+ def get_token(self, *args, **kwargs) -> AccessToken:
24
+ """Parent class handles Oauth Refresh token logic.
25
+ `expires_on` is ignored and set to year 2222 to align with protocol.
26
+ """
27
+ return AccessToken(token=self.get_access_token(), expires_on=7952342400)
28
+
29
+
17
30
  class SourceAzureBlobStorageStreamReader(AbstractFileBasedStreamReader):
31
+ _credentials = None
32
+
18
33
  def __init__(self, *args, **kwargs):
19
34
  super().__init__(*args, **kwargs)
20
35
  self._config = None
@@ -36,14 +51,26 @@ class SourceAzureBlobStorageStreamReader(AbstractFileBasedStreamReader):
36
51
  @property
37
52
  def azure_container_client(self):
38
53
  return ContainerClient(
39
- self.account_url,
40
- container_name=self.config.azure_blob_storage_container_name,
41
- credential=self.config.azure_blob_storage_account_key,
54
+ self.account_url, container_name=self.config.azure_blob_storage_container_name, credential=self.azure_credentials
42
55
  )
43
56
 
44
57
  @property
45
58
  def azure_blob_service_client(self):
46
- return BlobServiceClient(self.account_url, credential=self.config.azure_blob_storage_account_key)
59
+ return BlobServiceClient(self.account_url, credential=self._credentials)
60
+
61
+ @property
62
+ def azure_credentials(self) -> Union[str, AzureOauth2Authenticator]:
63
+ if not self._credentials:
64
+ if self.config.credentials.auth_type == "storage_account_key":
65
+ self._credentials = self.config.credentials.azure_blob_storage_account_key
66
+ else:
67
+ self._credentials = AzureOauth2Authenticator(
68
+ token_refresh_endpoint=f"https://login.microsoftonline.com/{self.config.credentials.tenant_id}/oauth2/v2.0/token",
69
+ client_id=self.config.credentials.client_id,
70
+ client_secret=self.config.credentials.client_secret,
71
+ refresh_token=self.config.credentials.refresh_token,
72
+ )
73
+ return self._credentials
47
74
 
48
75
  def get_matching_files(
49
76
  self,
@@ -1,10 +0,0 @@
1
- source_azure_blob_storage/__init__.py,sha256=dUrGWNqeIfK3e4yI6dgpIzSi8d03kItsUWCHQcmxXlg,281
2
- source_azure_blob_storage/config.py,sha256=fg1fitDbsX-LXtPGBmG3bfu-jb0XoFYqHxGWsYY6Uy8,2499
3
- source_azure_blob_storage/legacy_config_transformer.py,sha256=yiS4GgjaVvJtedmweAkq-eEdPG0RWWAAr0T9UV4RmOQ,1306
4
- source_azure_blob_storage/run.py,sha256=xam3N4LxPECFb0HTpJKIrmr8R_bVJmKiqEN4sluK4yk,1758
5
- source_azure_blob_storage/source.py,sha256=dt6BmjpDsxjeSlm6BccfFD3NJblEmDk4gUiJnvjcU40,1186
6
- source_azure_blob_storage/stream_reader.py,sha256=HA9jNcsMKK-5TVlVelpV2JmjxSf0OfqxC0yBEUtdEe0,2940
7
- airbyte_source_azure_blob_storage-0.3.5.dist-info/METADATA,sha256=aobYZFqV7OVJs68wTuObNEYRPEd7zJ_NDElXom1-aY0,5536
8
- airbyte_source_azure_blob_storage-0.3.5.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
9
- airbyte_source_azure_blob_storage-0.3.5.dist-info/entry_points.txt,sha256=75v_DA_Xu0qr0eqtEXyh8sPCqcL9eXKWY8UwdST3ANE,79
10
- airbyte_source_azure_blob_storage-0.3.5.dist-info/RECORD,,