airbyte-source-azure-blob-storage 0.6.15__tar.gz → 0.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of airbyte-source-azure-blob-storage might be problematic. Click here for more details.
- {airbyte_source_azure_blob_storage-0.6.15 → airbyte_source_azure_blob_storage-0.7.0}/PKG-INFO +5 -5
- {airbyte_source_azure_blob_storage-0.6.15 → airbyte_source_azure_blob_storage-0.7.0}/pyproject.toml +5 -5
- {airbyte_source_azure_blob_storage-0.6.15 → airbyte_source_azure_blob_storage-0.7.0}/source_azure_blob_storage/config_migrations.py +4 -1
- {airbyte_source_azure_blob_storage-0.6.15 → airbyte_source_azure_blob_storage-0.7.0}/source_azure_blob_storage/run.py +19 -13
- {airbyte_source_azure_blob_storage-0.6.15 → airbyte_source_azure_blob_storage-0.7.0}/source_azure_blob_storage/source.py +3 -1
- {airbyte_source_azure_blob_storage-0.6.15 → airbyte_source_azure_blob_storage-0.7.0}/source_azure_blob_storage/spec.py +12 -1
- {airbyte_source_azure_blob_storage-0.6.15 → airbyte_source_azure_blob_storage-0.7.0}/source_azure_blob_storage/stream_reader.py +35 -6
- {airbyte_source_azure_blob_storage-0.6.15 → airbyte_source_azure_blob_storage-0.7.0}/README.md +0 -0
- {airbyte_source_azure_blob_storage-0.6.15 → airbyte_source_azure_blob_storage-0.7.0}/source_azure_blob_storage/__init__.py +0 -0
{airbyte_source_azure_blob_storage-0.6.15 → airbyte_source_azure_blob_storage-0.7.0}/PKG-INFO
RENAMED
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: airbyte-source-azure-blob-storage
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.0
|
|
4
4
|
Summary: Source implementation for Azure Blob Storage.
|
|
5
5
|
Home-page: https://airbyte.com
|
|
6
|
-
License:
|
|
6
|
+
License: ELv2
|
|
7
7
|
Author: Airbyte
|
|
8
8
|
Author-email: contact@airbyte.io
|
|
9
9
|
Requires-Python: >=3.11,<3.12
|
|
10
|
-
Classifier: License ::
|
|
10
|
+
Classifier: License :: Other/Proprietary License
|
|
11
11
|
Classifier: Programming Language :: Python :: 3
|
|
12
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
-
Requires-Dist: airbyte-cdk[file-based] (>=
|
|
13
|
+
Requires-Dist: airbyte-cdk[file-based] (>=7,<8)
|
|
14
14
|
Requires-Dist: pytz (>=2024.1,<2025.0)
|
|
15
|
-
Requires-Dist: smart-open[azure] (==0.
|
|
15
|
+
Requires-Dist: smart-open[azure] (==0.7.0)
|
|
16
16
|
Project-URL: Documentation, https://docs.airbyte.com/integrations/sources/azure-blob-storage
|
|
17
17
|
Project-URL: Repository, https://github.com/airbytehq/airbyte
|
|
18
18
|
Description-Content-Type: text/markdown
|
{airbyte_source_azure_blob_storage-0.6.15 → airbyte_source_azure_blob_storage-0.7.0}/pyproject.toml
RENAMED
|
@@ -3,11 +3,11 @@ requires = [ "poetry-core>=1.0.0",]
|
|
|
3
3
|
build-backend = "poetry.core.masonry.api"
|
|
4
4
|
|
|
5
5
|
[tool.poetry]
|
|
6
|
-
version = "0.
|
|
6
|
+
version = "0.7.0"
|
|
7
7
|
name = "airbyte-source-azure-blob-storage"
|
|
8
8
|
description = "Source implementation for Azure Blob Storage."
|
|
9
9
|
authors = [ "Airbyte <contact@airbyte.io>",]
|
|
10
|
-
license = "
|
|
10
|
+
license = "ELv2"
|
|
11
11
|
readme = "README.md"
|
|
12
12
|
documentation = "https://docs.airbyte.com/integrations/sources/azure-blob-storage"
|
|
13
13
|
homepage = "https://airbyte.com"
|
|
@@ -18,11 +18,11 @@ include = "source_azure_blob_storage"
|
|
|
18
18
|
[tool.poetry.dependencies]
|
|
19
19
|
python = "^3.11,<3.12"
|
|
20
20
|
pytz = "^2024.1"
|
|
21
|
-
airbyte-cdk = {extras = ["file-based"], version = "^
|
|
21
|
+
airbyte-cdk = {extras = ["file-based"], version = "^7"}
|
|
22
22
|
|
|
23
23
|
[tool.poetry.dependencies.smart-open]
|
|
24
24
|
extras = [ "azure",]
|
|
25
|
-
version = "0.
|
|
25
|
+
version = "0.7.0"
|
|
26
26
|
|
|
27
27
|
[tool.poetry.scripts]
|
|
28
28
|
source-azure-blob-storage = "source_azure_blob_storage.run:run"
|
|
@@ -32,7 +32,7 @@ docker = "^7.0.0"
|
|
|
32
32
|
freezegun = "^1.4.0"
|
|
33
33
|
pytest-mock = "^3.6.1"
|
|
34
34
|
requests-mock = "^1.9.3"
|
|
35
|
-
pandas = "2.2.
|
|
35
|
+
pandas = "2.2.3"
|
|
36
36
|
pytest = "^8.0.0"
|
|
37
37
|
|
|
38
38
|
|
|
@@ -7,7 +7,10 @@ import logging
|
|
|
7
7
|
from abc import ABC, abstractmethod
|
|
8
8
|
from typing import Any, List, Mapping
|
|
9
9
|
|
|
10
|
+
import orjson
|
|
11
|
+
|
|
10
12
|
from airbyte_cdk import AirbyteEntrypoint, Source, create_connector_config_control_message
|
|
13
|
+
from airbyte_cdk.models import AirbyteMessageSerializer
|
|
11
14
|
|
|
12
15
|
|
|
13
16
|
logger = logging.getLogger("airbyte_logger")
|
|
@@ -47,7 +50,7 @@ class MigrateConfig(ABC):
|
|
|
47
50
|
Args:
|
|
48
51
|
- migrated_config (Mapping[str, Any]): The migrated configuration.
|
|
49
52
|
"""
|
|
50
|
-
print(create_connector_config_control_message(migrated_config).
|
|
53
|
+
print((orjson.dumps(AirbyteMessageSerializer.dump(create_connector_config_control_message(migrated_config))).decode()))
|
|
51
54
|
|
|
52
55
|
@classmethod
|
|
53
56
|
def migrate(cls, args: List[str], source: Source) -> None:
|
|
@@ -4,11 +4,13 @@
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
import sys
|
|
7
|
+
import time
|
|
7
8
|
import traceback
|
|
8
|
-
|
|
9
|
+
|
|
10
|
+
import orjson
|
|
9
11
|
|
|
10
12
|
from airbyte_cdk import AirbyteEntrypoint, AirbyteMessage, Type, launch
|
|
11
|
-
from airbyte_cdk.models import AirbyteErrorTraceMessage, AirbyteTraceMessage, TraceType
|
|
13
|
+
from airbyte_cdk.models import AirbyteErrorTraceMessage, AirbyteMessageSerializer, AirbyteTraceMessage, TraceType
|
|
12
14
|
from airbyte_cdk.sources.file_based.stream.cursor import DefaultFileBasedCursor
|
|
13
15
|
from source_azure_blob_storage import SourceAzureBlobStorage, SourceAzureBlobStorageSpec, SourceAzureBlobStorageStreamReader
|
|
14
16
|
from source_azure_blob_storage.config_migrations import MigrateCredentials, MigrateLegacyConfig
|
|
@@ -32,17 +34,21 @@ def run():
|
|
|
32
34
|
MigrateCredentials.migrate(sys.argv[1:], source)
|
|
33
35
|
except Exception:
|
|
34
36
|
print(
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
37
|
+
orjson.dumps(
|
|
38
|
+
AirbyteMessageSerializer.dump(
|
|
39
|
+
AirbyteMessage(
|
|
40
|
+
type=Type.TRACE,
|
|
41
|
+
trace=AirbyteTraceMessage(
|
|
42
|
+
type=TraceType.ERROR,
|
|
43
|
+
emitted_at=time.time_ns() // 1_000_000,
|
|
44
|
+
error=AirbyteErrorTraceMessage(
|
|
45
|
+
message="Error starting the sync. This could be due to an invalid configuration or catalog. Please contact Support for assistance.",
|
|
46
|
+
stack_trace=traceback.format_exc(),
|
|
47
|
+
),
|
|
48
|
+
),
|
|
49
|
+
)
|
|
50
|
+
)
|
|
51
|
+
).decode()
|
|
46
52
|
)
|
|
47
53
|
else:
|
|
48
54
|
launch(source, args)
|
|
@@ -5,6 +5,8 @@
|
|
|
5
5
|
|
|
6
6
|
from typing import Any
|
|
7
7
|
|
|
8
|
+
from airbyte_protocol_dataclasses.models import AuthFlowType
|
|
9
|
+
|
|
8
10
|
from airbyte_cdk import AdvancedAuth, ConnectorSpecification, OAuthConfigSpecification
|
|
9
11
|
from airbyte_cdk.sources.file_based.file_based_source import FileBasedSource
|
|
10
12
|
|
|
@@ -19,7 +21,7 @@ class SourceAzureBlobStorage(FileBasedSource):
|
|
|
19
21
|
documentationUrl=self.spec_class.documentation_url(),
|
|
20
22
|
connectionSpecification=self.spec_class.schema(),
|
|
21
23
|
advanced_auth=AdvancedAuth(
|
|
22
|
-
auth_flow_type=
|
|
24
|
+
auth_flow_type=AuthFlowType.oauth2_0,
|
|
23
25
|
predicate_key=["credentials", "auth_type"],
|
|
24
26
|
predicate_value="oauth2",
|
|
25
27
|
oauth_config_specification=OAuthConfigSpecification(
|
|
@@ -9,7 +9,7 @@ import dpath.util
|
|
|
9
9
|
from pydantic.v1 import AnyUrl, BaseModel, Field
|
|
10
10
|
|
|
11
11
|
from airbyte_cdk import OneOfOptionConfig
|
|
12
|
-
from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
|
|
12
|
+
from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec, DeliverRawFiles, DeliverRecords
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class Oauth2(BaseModel):
|
|
@@ -107,6 +107,17 @@ class SourceAzureBlobStorageSpec(AbstractFileBasedSpec):
|
|
|
107
107
|
order=11,
|
|
108
108
|
)
|
|
109
109
|
|
|
110
|
+
delivery_method: Union[DeliverRecords, DeliverRawFiles] = Field(
|
|
111
|
+
title="Delivery Method",
|
|
112
|
+
discriminator="delivery_type",
|
|
113
|
+
type="object",
|
|
114
|
+
order=7,
|
|
115
|
+
display_type="radio",
|
|
116
|
+
group="advanced",
|
|
117
|
+
default="use_records_transfer",
|
|
118
|
+
airbyte_hidden=True,
|
|
119
|
+
)
|
|
120
|
+
|
|
110
121
|
@classmethod
|
|
111
122
|
def schema(cls, *args: Any, **kwargs: Any) -> Dict[str, Any]:
|
|
112
123
|
"""
|
|
@@ -9,11 +9,11 @@ import pytz
|
|
|
9
9
|
from azure.core.credentials import AccessToken, TokenCredential
|
|
10
10
|
from azure.core.exceptions import ResourceNotFoundError
|
|
11
11
|
from azure.storage.blob import BlobServiceClient, ContainerClient
|
|
12
|
-
from smart_open import open
|
|
12
|
+
from smart_open import open as so_open
|
|
13
13
|
|
|
14
14
|
from airbyte_cdk import AirbyteTracedException, FailureType
|
|
15
15
|
from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
|
|
16
|
-
from airbyte_cdk.sources.file_based.remote_file import
|
|
16
|
+
from airbyte_cdk.sources.file_based.remote_file import UploadableRemoteFile
|
|
17
17
|
from airbyte_cdk.sources.streams.http.requests_native_auth import Oauth2Authenticator
|
|
18
18
|
|
|
19
19
|
from .spec import SourceAzureBlobStorageSpec
|
|
@@ -70,6 +70,26 @@ class AzureOauth2Authenticator(Oauth2Authenticator, TokenCredential):
|
|
|
70
70
|
return AccessToken(token=self.get_access_token(), expires_on=7952342400)
|
|
71
71
|
|
|
72
72
|
|
|
73
|
+
class AzureBlobStorageUploadableRemoteFile(UploadableRemoteFile):
|
|
74
|
+
blob_client: Any
|
|
75
|
+
blob_properties: Any
|
|
76
|
+
|
|
77
|
+
def __init__(self, blob_client: Any, blob_properties: Any, **kwargs):
|
|
78
|
+
super().__init__(**kwargs)
|
|
79
|
+
self.blob_client = blob_client
|
|
80
|
+
self.blob_properties = blob_properties
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def size(self) -> int:
|
|
84
|
+
return self.blob_properties.size
|
|
85
|
+
|
|
86
|
+
def download_to_local_directory(self, local_file_path: str) -> None:
|
|
87
|
+
blob_client = self.blob_client.get_blob_client(container=self.blob_properties.container, blob=self.uri)
|
|
88
|
+
with open(file=local_file_path, mode="wb") as f:
|
|
89
|
+
download_stream = blob_client.download_blob()
|
|
90
|
+
f.write(download_stream.readall())
|
|
91
|
+
|
|
92
|
+
|
|
73
93
|
class SourceAzureBlobStorageStreamReader(AbstractFileBasedStreamReader):
|
|
74
94
|
_credentials = None
|
|
75
95
|
|
|
@@ -127,20 +147,29 @@ class SourceAzureBlobStorageStreamReader(AbstractFileBasedStreamReader):
|
|
|
127
147
|
globs: List[str],
|
|
128
148
|
prefix: Optional[str],
|
|
129
149
|
logger: logging.Logger,
|
|
130
|
-
) -> Iterable[
|
|
150
|
+
) -> Iterable[AzureBlobStorageUploadableRemoteFile]:
|
|
131
151
|
prefixes = [prefix] if prefix else self.get_prefixes_from_globs(globs)
|
|
132
152
|
prefixes = prefixes or [None]
|
|
133
153
|
try:
|
|
134
154
|
for prefix in prefixes:
|
|
135
155
|
for blob in self.azure_container_client.list_blobs(name_starts_with=prefix):
|
|
136
|
-
remote_file =
|
|
156
|
+
remote_file = AzureBlobStorageUploadableRemoteFile(
|
|
157
|
+
uri=blob.name,
|
|
158
|
+
last_modified=blob.last_modified.astimezone(pytz.utc).replace(tzinfo=None),
|
|
159
|
+
blob_client=self.azure_blob_service_client,
|
|
160
|
+
blob_properties=blob,
|
|
161
|
+
created_at=blob.creation_time.astimezone(pytz.utc).replace(tzinfo=None).strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
|
|
162
|
+
updated_at=blob.last_modified.astimezone(pytz.utc).replace(tzinfo=None).strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
|
|
163
|
+
)
|
|
137
164
|
yield from self.filter_files_by_globs_and_start_date([remote_file], globs)
|
|
138
165
|
except ResourceNotFoundError as e:
|
|
139
166
|
raise AirbyteTracedException(failure_type=FailureType.config_error, internal_message=e.message, message=e.reason or e.message)
|
|
140
167
|
|
|
141
|
-
def open_file(
|
|
168
|
+
def open_file(
|
|
169
|
+
self, file: AzureBlobStorageUploadableRemoteFile, mode: FileReadMode, encoding: Optional[str], logger: logging.Logger
|
|
170
|
+
) -> IOBase:
|
|
142
171
|
try:
|
|
143
|
-
result =
|
|
172
|
+
result = so_open(
|
|
144
173
|
f"azure://{self.config.azure_blob_storage_container_name}/{file.uri}",
|
|
145
174
|
transport_params={"client": self.azure_blob_service_client},
|
|
146
175
|
mode=mode.value,
|
{airbyte_source_azure_blob_storage-0.6.15 → airbyte_source_azure_blob_storage-0.7.0}/README.md
RENAMED
|
File without changes
|