airbyte-source-azure-blob-storage 0.6.9__py3-none-any.whl → 0.8.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,19 +1,21 @@
1
- Metadata-Version: 2.3
1
+ Metadata-Version: 2.1
2
2
  Name: airbyte-source-azure-blob-storage
3
- Version: 0.6.9
3
+ Version: 0.8.2
4
4
  Summary: Source implementation for Azure Blob Storage.
5
- License: MIT
5
+ Home-page: https://airbyte.com
6
+ License: ELv2
6
7
  Author: Airbyte
7
8
  Author-email: contact@airbyte.io
8
- Requires-Python: >=3.11,<3.12
9
- Classifier: License :: OSI Approved :: MIT License
9
+ Requires-Python: >=3.11,<3.14
10
+ Classifier: License :: Other/Proprietary License
10
11
  Classifier: Programming Language :: Python :: 3
11
12
  Classifier: Programming Language :: Python :: 3.11
12
- Requires-Dist: airbyte-cdk[file-based] (>=4,<5)
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Programming Language :: Python :: 3.13
15
+ Requires-Dist: airbyte-cdk[file-based] (>=7.0.0,<8.0.0)
13
16
  Requires-Dist: pytz (>=2024.1,<2025.0)
14
- Requires-Dist: smart-open[azure] (==6.4.0)
17
+ Requires-Dist: smart-open[azure] (==0.8.2)
15
18
  Project-URL: Documentation, https://docs.airbyte.com/integrations/sources/azure-blob-storage
16
- Project-URL: Homepage, https://airbyte.com
17
19
  Project-URL: Repository, https://github.com/airbytehq/airbyte
18
20
  Description-Content-Type: text/markdown
19
21
 
@@ -0,0 +1,10 @@
1
+ source_azure_blob_storage/__init__.py,sha256=Wx4PzvHg900-c2CpOOP1Wk0zcJpNVqJrkMnPtDcuaQM,319
2
+ source_azure_blob_storage/config_migrations.py,sha256=b0_UyUj1F4q_wPCxBpu4W8MxfMiHE2OvesXxks1bL2M,4331
3
+ source_azure_blob_storage/run.py,sha256=4FImO8txeARJAE-c-glw5xxnDuiRT-XSLYKFVBn70tI,2206
4
+ source_azure_blob_storage/source.py,sha256=ZubuoAmy_QxRtJIJVSugmBoMxw3dMjLTSAIXbZf3jjE,2493
5
+ source_azure_blob_storage/spec.py,sha256=tRiF9KacJFK97HaleJucQadR4e2hjnmStJnbLEFV4vc,5043
6
+ source_azure_blob_storage/stream_reader.py,sha256=YWzmk1mPqSy8H8gWRBYZc7F7-vBDqlNH3WrhVFCmAmk,7978
7
+ airbyte_source_azure_blob_storage-0.8.2.dist-info/METADATA,sha256=SZFa65iIEWD4KAqg_sv4VI9ptsDz4-Xc7x9jlyle4zs,6282
8
+ airbyte_source_azure_blob_storage-0.8.2.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
9
+ airbyte_source_azure_blob_storage-0.8.2.dist-info/entry_points.txt,sha256=75v_DA_Xu0qr0eqtEXyh8sPCqcL9eXKWY8UwdST3ANE,79
10
+ airbyte_source_azure_blob_storage-0.8.2.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.1.3
2
+ Generator: poetry-core 1.9.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -7,7 +7,10 @@ import logging
7
7
  from abc import ABC, abstractmethod
8
8
  from typing import Any, List, Mapping
9
9
 
10
+ import orjson
11
+
10
12
  from airbyte_cdk import AirbyteEntrypoint, Source, create_connector_config_control_message
13
+ from airbyte_cdk.models import AirbyteMessageSerializer
11
14
 
12
15
 
13
16
  logger = logging.getLogger("airbyte_logger")
@@ -47,7 +50,7 @@ class MigrateConfig(ABC):
47
50
  Args:
48
51
  - migrated_config (Mapping[str, Any]): The migrated configuration.
49
52
  """
50
- print(create_connector_config_control_message(migrated_config).json(exclude_unset=True))
53
+ print((orjson.dumps(AirbyteMessageSerializer.dump(create_connector_config_control_message(migrated_config))).decode()))
51
54
 
52
55
  @classmethod
53
56
  def migrate(cls, args: List[str], source: Source) -> None:
@@ -4,11 +4,13 @@
4
4
 
5
5
 
6
6
  import sys
7
+ import time
7
8
  import traceback
8
- from datetime import datetime
9
+
10
+ import orjson
9
11
 
10
12
  from airbyte_cdk import AirbyteEntrypoint, AirbyteMessage, Type, launch
11
- from airbyte_cdk.models import AirbyteErrorTraceMessage, AirbyteTraceMessage, TraceType
13
+ from airbyte_cdk.models import AirbyteErrorTraceMessage, AirbyteMessageSerializer, AirbyteTraceMessage, TraceType
12
14
  from airbyte_cdk.sources.file_based.stream.cursor import DefaultFileBasedCursor
13
15
  from source_azure_blob_storage import SourceAzureBlobStorage, SourceAzureBlobStorageSpec, SourceAzureBlobStorageStreamReader
14
16
  from source_azure_blob_storage.config_migrations import MigrateCredentials, MigrateLegacyConfig
@@ -32,17 +34,21 @@ def run():
32
34
  MigrateCredentials.migrate(sys.argv[1:], source)
33
35
  except Exception:
34
36
  print(
35
- AirbyteMessage(
36
- type=Type.TRACE,
37
- trace=AirbyteTraceMessage(
38
- type=TraceType.ERROR,
39
- emitted_at=int(datetime.now().timestamp() * 1000),
40
- error=AirbyteErrorTraceMessage(
41
- message="Error starting the sync. This could be due to an invalid configuration or catalog. Please contact Support for assistance.",
42
- stack_trace=traceback.format_exc(),
43
- ),
44
- ),
45
- ).json()
37
+ orjson.dumps(
38
+ AirbyteMessageSerializer.dump(
39
+ AirbyteMessage(
40
+ type=Type.TRACE,
41
+ trace=AirbyteTraceMessage(
42
+ type=TraceType.ERROR,
43
+ emitted_at=time.time_ns() // 1_000_000,
44
+ error=AirbyteErrorTraceMessage(
45
+ message="Error starting the sync. This could be due to an invalid configuration or catalog. Please contact Support for assistance.",
46
+ stack_trace=traceback.format_exc(),
47
+ ),
48
+ ),
49
+ )
50
+ )
51
+ ).decode()
46
52
  )
47
53
  else:
48
54
  launch(source, args)
@@ -5,6 +5,8 @@
5
5
 
6
6
  from typing import Any
7
7
 
8
+ from airbyte_protocol_dataclasses.models import AuthFlowType
9
+
8
10
  from airbyte_cdk import AdvancedAuth, ConnectorSpecification, OAuthConfigSpecification
9
11
  from airbyte_cdk.sources.file_based.file_based_source import FileBasedSource
10
12
 
@@ -19,7 +21,7 @@ class SourceAzureBlobStorage(FileBasedSource):
19
21
  documentationUrl=self.spec_class.documentation_url(),
20
22
  connectionSpecification=self.spec_class.schema(),
21
23
  advanced_auth=AdvancedAuth(
22
- auth_flow_type="oauth2.0",
24
+ auth_flow_type=AuthFlowType.oauth2_0,
23
25
  predicate_key=["credentials", "auth_type"],
24
26
  predicate_value="oauth2",
25
27
  oauth_config_specification=OAuthConfigSpecification(
@@ -9,7 +9,7 @@ import dpath.util
9
9
  from pydantic.v1 import AnyUrl, BaseModel, Field
10
10
 
11
11
  from airbyte_cdk import OneOfOptionConfig
12
- from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec
12
+ from airbyte_cdk.sources.file_based.config.abstract_file_based_spec import AbstractFileBasedSpec, DeliverRawFiles, DeliverRecords
13
13
 
14
14
 
15
15
  class Oauth2(BaseModel):
@@ -107,6 +107,17 @@ class SourceAzureBlobStorageSpec(AbstractFileBasedSpec):
107
107
  order=11,
108
108
  )
109
109
 
110
+ delivery_method: Union[DeliverRecords, DeliverRawFiles] = Field(
111
+ title="Delivery Method",
112
+ discriminator="delivery_type",
113
+ type="object",
114
+ order=7,
115
+ display_type="radio",
116
+ group="advanced",
117
+ default="use_records_transfer",
118
+ airbyte_hidden=True,
119
+ )
120
+
110
121
  @classmethod
111
122
  def schema(cls, *args: Any, **kwargs: Any) -> Dict[str, Any]:
112
123
  """
@@ -9,11 +9,11 @@ import pytz
9
9
  from azure.core.credentials import AccessToken, TokenCredential
10
10
  from azure.core.exceptions import ResourceNotFoundError
11
11
  from azure.storage.blob import BlobServiceClient, ContainerClient
12
- from smart_open import open
12
+ from smart_open import open as so_open
13
13
 
14
14
  from airbyte_cdk import AirbyteTracedException, FailureType
15
15
  from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader, FileReadMode
16
- from airbyte_cdk.sources.file_based.remote_file import RemoteFile
16
+ from airbyte_cdk.sources.file_based.remote_file import UploadableRemoteFile
17
17
  from airbyte_cdk.sources.streams.http.requests_native_auth import Oauth2Authenticator
18
18
 
19
19
  from .spec import SourceAzureBlobStorageSpec
@@ -70,6 +70,26 @@ class AzureOauth2Authenticator(Oauth2Authenticator, TokenCredential):
70
70
  return AccessToken(token=self.get_access_token(), expires_on=7952342400)
71
71
 
72
72
 
73
+ class AzureBlobStorageUploadableRemoteFile(UploadableRemoteFile):
74
+ blob_client: Any
75
+ blob_properties: Any
76
+
77
+ def __init__(self, blob_client: Any, blob_properties: Any, **kwargs):
78
+ super().__init__(**kwargs)
79
+ self.blob_client = blob_client
80
+ self.blob_properties = blob_properties
81
+
82
+ @property
83
+ def size(self) -> int:
84
+ return self.blob_properties.size
85
+
86
+ def download_to_local_directory(self, local_file_path: str) -> None:
87
+ blob_client = self.blob_client.get_blob_client(container=self.blob_properties.container, blob=self.uri)
88
+ with open(file=local_file_path, mode="wb") as f:
89
+ download_stream = blob_client.download_blob()
90
+ f.write(download_stream.readall())
91
+
92
+
73
93
  class SourceAzureBlobStorageStreamReader(AbstractFileBasedStreamReader):
74
94
  _credentials = None
75
95
 
@@ -127,20 +147,29 @@ class SourceAzureBlobStorageStreamReader(AbstractFileBasedStreamReader):
127
147
  globs: List[str],
128
148
  prefix: Optional[str],
129
149
  logger: logging.Logger,
130
- ) -> Iterable[RemoteFile]:
150
+ ) -> Iterable[AzureBlobStorageUploadableRemoteFile]:
131
151
  prefixes = [prefix] if prefix else self.get_prefixes_from_globs(globs)
132
152
  prefixes = prefixes or [None]
133
153
  try:
134
154
  for prefix in prefixes:
135
155
  for blob in self.azure_container_client.list_blobs(name_starts_with=prefix):
136
- remote_file = RemoteFile(uri=blob.name, last_modified=blob.last_modified.astimezone(pytz.utc).replace(tzinfo=None))
156
+ remote_file = AzureBlobStorageUploadableRemoteFile(
157
+ uri=blob.name,
158
+ last_modified=blob.last_modified.astimezone(pytz.utc).replace(tzinfo=None),
159
+ blob_client=self.azure_blob_service_client,
160
+ blob_properties=blob,
161
+ created_at=blob.creation_time.astimezone(pytz.utc).replace(tzinfo=None).strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
162
+ updated_at=blob.last_modified.astimezone(pytz.utc).replace(tzinfo=None).strftime("%Y-%m-%dT%H:%M:%S.%fZ"),
163
+ )
137
164
  yield from self.filter_files_by_globs_and_start_date([remote_file], globs)
138
165
  except ResourceNotFoundError as e:
139
166
  raise AirbyteTracedException(failure_type=FailureType.config_error, internal_message=e.message, message=e.reason or e.message)
140
167
 
141
- def open_file(self, file: RemoteFile, mode: FileReadMode, encoding: Optional[str], logger: logging.Logger) -> IOBase:
168
+ def open_file(
169
+ self, file: AzureBlobStorageUploadableRemoteFile, mode: FileReadMode, encoding: Optional[str], logger: logging.Logger
170
+ ) -> IOBase:
142
171
  try:
143
- result = open(
172
+ result = so_open(
144
173
  f"azure://{self.config.azure_blob_storage_container_name}/{file.uri}",
145
174
  transport_params={"client": self.azure_blob_service_client},
146
175
  mode=mode.value,
@@ -1,10 +0,0 @@
1
- source_azure_blob_storage/__init__.py,sha256=Wx4PzvHg900-c2CpOOP1Wk0zcJpNVqJrkMnPtDcuaQM,319
2
- source_azure_blob_storage/config_migrations.py,sha256=WKo5ETt4y09_cjfny3EH4filBsC48ltLMlQ_wLcxYVM,4229
3
- source_azure_blob_storage/run.py,sha256=3rwgY4Qs-KdOKZanpiLmGsQzNW5gUEE9zmrizEpCs20,1999
4
- source_azure_blob_storage/source.py,sha256=j5ibzjzX_gAi2kASpajoQ8ZMbnJ8LIwuSKChwk5i3vY,2420
5
- source_azure_blob_storage/spec.py,sha256=ysnDed3VwUmQnBhOYHHkWivUH3Q7EqDbjvT1Qqc7fPo,4697
6
- source_azure_blob_storage/stream_reader.py,sha256=ZSMgiwy6dOsFAGPyDIwCS4SklNIRo7dKdwesWIrsCrQ,6674
7
- airbyte_source_azure_blob_storage-0.6.9.dist-info/METADATA,sha256=z6Ag3OgwXS9oZ4vRrR5DmRocBY0wNV-KdFYW7SAhmeE,6185
8
- airbyte_source_azure_blob_storage-0.6.9.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
9
- airbyte_source_azure_blob_storage-0.6.9.dist-info/entry_points.txt,sha256=75v_DA_Xu0qr0eqtEXyh8sPCqcL9eXKWY8UwdST3ANE,79
10
- airbyte_source_azure_blob_storage-0.6.9.dist-info/RECORD,,