airbyte-cdk 6.21.1.dev0__py3-none-any.whl → 6.22.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1514,6 +1514,7 @@ definitions:
1514
1514
  anyOf:
1515
1515
  - "$ref": "#/definitions/JsonDecoder"
1516
1516
  - "$ref": "#/definitions/XmlDecoder"
1517
+ - "$ref": "#/definitions/CompositeRawDecoder"
1517
1518
  $parameters:
1518
1519
  type: object
1519
1520
  additionalProperties: true
@@ -2100,6 +2101,26 @@ definitions:
2100
2101
  $parameters:
2101
2102
  type: object
2102
2103
  additionalProperties: true
2104
+ ZipfileDecoder:
2105
+ title: Zipfile Decoder
2106
+ description: Decoder for response data that is returned as zipfile(s).
2107
+ type: object
2108
+ additionalProperties: true
2109
+ required:
2110
+ - type
2111
+ - parser
2112
+ properties:
2113
+ type:
2114
+ type: string
2115
+ enum: [ZipfileDecoder]
2116
+ parser:
2117
+ title: Parser
2118
+ description: Parser to parse the decompressed data from the zipfile(s).
2119
+ anyOf:
2120
+ - "$ref": "#/definitions/GzipParser"
2121
+ - "$ref": "#/definitions/JsonParser"
2122
+ - "$ref": "#/definitions/JsonLineParser"
2123
+ - "$ref": "#/definitions/CsvParser"
2103
2124
  ListPartitionRouter:
2104
2125
  title: List Partition Router
2105
2126
  description: A Partition router that specifies a list of attributes where each attribute describes a portion of the complete data set for a stream. During a sync, each value is iterated over and can be used as input to outbound API requests.
@@ -2928,6 +2949,7 @@ definitions:
2928
2949
  - "$ref": "#/definitions/XmlDecoder"
2929
2950
  - "$ref": "#/definitions/GzipJsonDecoder"
2930
2951
  - "$ref": "#/definitions/CompositeRawDecoder"
2952
+ - "$ref": "#/definitions/ZipfileDecoder"
2931
2953
  $parameters:
2932
2954
  type: object
2933
2955
  additionalProperties: true
@@ -3126,6 +3148,8 @@ definitions:
3126
3148
  - "$ref": "#/definitions/IterableDecoder"
3127
3149
  - "$ref": "#/definitions/XmlDecoder"
3128
3150
  - "$ref": "#/definitions/GzipJsonDecoder"
3151
+ - "$ref": "#/definitions/CompositeRawDecoder"
3152
+ - "$ref": "#/definitions/ZipfileDecoder"
3129
3153
  download_decoder:
3130
3154
  title: Download Decoder
3131
3155
  description: Component decoding the download response so records can be extracted.
@@ -3136,6 +3160,8 @@ definitions:
3136
3160
  - "$ref": "#/definitions/IterableDecoder"
3137
3161
  - "$ref": "#/definitions/XmlDecoder"
3138
3162
  - "$ref": "#/definitions/GzipJsonDecoder"
3163
+ - "$ref": "#/definitions/CompositeRawDecoder"
3164
+ - "$ref": "#/definitions/ZipfileDecoder"
3139
3165
  $parameters:
3140
3166
  type: object
3141
3167
  additionalProperties: true
@@ -2,7 +2,12 @@
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import CompositeRawDecoder
5
+ from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
6
+ CompositeRawDecoder,
7
+ GzipParser,
8
+ JsonParser,
9
+ Parser,
10
+ )
6
11
  from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
7
12
  from airbyte_cdk.sources.declarative.decoders.json_decoder import (
8
13
  GzipJsonDecoder,
@@ -15,15 +20,18 @@ from airbyte_cdk.sources.declarative.decoders.pagination_decoder_decorator impor
15
20
  PaginationDecoderDecorator,
16
21
  )
17
22
  from airbyte_cdk.sources.declarative.decoders.xml_decoder import XmlDecoder
23
+ from airbyte_cdk.sources.declarative.decoders.zipfile_decoder import ZipfileDecoder
18
24
 
19
25
  __all__ = [
20
26
  "Decoder",
21
27
  "CompositeRawDecoder",
22
28
  "JsonDecoder",
29
+ "JsonParser",
23
30
  "JsonlDecoder",
24
31
  "IterableDecoder",
25
32
  "GzipJsonDecoder",
26
33
  "NoopDecoder",
27
34
  "PaginationDecoderDecorator",
28
35
  "XmlDecoder",
36
+ "ZipfileDecoder",
29
37
  ]
@@ -0,0 +1,59 @@
1
+ #
2
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import logging
6
+ import zipfile
7
+ from dataclasses import dataclass
8
+ from io import BytesIO
9
+ from typing import Any, Generator, MutableMapping
10
+
11
+ import orjson
12
+ import requests
13
+
14
+ from airbyte_cdk.models import FailureType
15
+ from airbyte_cdk.sources.declarative.decoders import Decoder
16
+ from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
17
+ Parser,
18
+ )
19
+ from airbyte_cdk.utils import AirbyteTracedException
20
+
21
+ logger = logging.getLogger("airbyte")
22
+
23
+
24
+ @dataclass
25
+ class ZipfileDecoder(Decoder):
26
+ parser: Parser
27
+
28
+ def is_stream_response(self) -> bool:
29
+ return False
30
+
31
+ def decode(
32
+ self, response: requests.Response
33
+ ) -> Generator[MutableMapping[str, Any], None, None]:
34
+ try:
35
+ with zipfile.ZipFile(BytesIO(response.content)) as zip_file:
36
+ for file_name in zip_file.namelist():
37
+ unzipped_content = zip_file.read(file_name)
38
+ buffered_content = BytesIO(unzipped_content)
39
+ try:
40
+ yield from self.parser.parse(buffered_content)
41
+ except Exception as e:
42
+ logger.error(
43
+ f"Failed to parse file: {file_name} from zip file: {response.request.url} with exception {e}."
44
+ )
45
+ raise AirbyteTracedException(
46
+ message=f"Failed to parse file: {file_name} from zip file.",
47
+ internal_message=f"Failed to parse file: {file_name} from zip file: {response.request.url}.",
48
+ failure_type=FailureType.system_error,
49
+ ) from e
50
+ except zipfile.BadZipFile as e:
51
+ logger.error(
52
+ f"Received an invalid zip file in response to URL: {response.request.url}. "
53
+ f"The size of the response body is: {len(response.content)}"
54
+ )
55
+ raise AirbyteTracedException(
56
+ message="Received an invalid zip file in response.",
57
+ internal_message=f"Received an invalid zip file in response to URL: {response.request.url}.",
58
+ failure_type=FailureType.system_error,
59
+ ) from e
@@ -1242,9 +1242,6 @@ class LegacySessionTokenAuthenticator(BaseModel):
1242
1242
 
1243
1243
 
1244
1244
  class JsonParser(BaseModel):
1245
- class Config:
1246
- extra = Extra.allow
1247
-
1248
1245
  type: Literal["JsonParser"]
1249
1246
  encoding: Optional[str] = "utf-8"
1250
1247
 
@@ -1680,6 +1677,18 @@ class CompositeErrorHandler(BaseModel):
1680
1677
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1681
1678
 
1682
1679
 
1680
+ class ZipfileDecoder(BaseModel):
1681
+ class Config:
1682
+ extra = Extra.allow
1683
+
1684
+ type: Literal["ZipfileDecoder"]
1685
+ parser: Union[GzipParser, JsonParser, JsonLineParser, CsvParser] = Field(
1686
+ ...,
1687
+ description="Parser to parse the decompressed data from the zipfile(s).",
1688
+ title="Parser",
1689
+ )
1690
+
1691
+
1683
1692
  class CompositeRawDecoder(BaseModel):
1684
1693
  type: Literal["CompositeRawDecoder"]
1685
1694
  parser: Union[GzipParser, JsonParser, JsonLineParser, CsvParser]
@@ -1886,7 +1895,7 @@ class SessionTokenAuthenticator(BaseModel):
1886
1895
  description="Authentication method to use for requests sent to the API, specifying how to inject the session token.",
1887
1896
  title="Data Request Authentication",
1888
1897
  )
1889
- decoder: Optional[Union[JsonDecoder, XmlDecoder]] = Field(
1898
+ decoder: Optional[Union[JsonDecoder, XmlDecoder, CompositeRawDecoder]] = Field(
1890
1899
  None, description="Component used to decode the response.", title="Decoder"
1891
1900
  )
1892
1901
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
@@ -2092,6 +2101,7 @@ class SimpleRetriever(BaseModel):
2092
2101
  XmlDecoder,
2093
2102
  GzipJsonDecoder,
2094
2103
  CompositeRawDecoder,
2104
+ ZipfileDecoder,
2095
2105
  ]
2096
2106
  ] = Field(
2097
2107
  None,
@@ -2168,6 +2178,8 @@ class AsyncRetriever(BaseModel):
2168
2178
  IterableDecoder,
2169
2179
  XmlDecoder,
2170
2180
  GzipJsonDecoder,
2181
+ CompositeRawDecoder,
2182
+ ZipfileDecoder,
2171
2183
  ]
2172
2184
  ] = Field(
2173
2185
  None,
@@ -2182,6 +2194,8 @@ class AsyncRetriever(BaseModel):
2182
2194
  IterableDecoder,
2183
2195
  XmlDecoder,
2184
2196
  GzipJsonDecoder,
2197
+ CompositeRawDecoder,
2198
+ ZipfileDecoder,
2185
2199
  ]
2186
2200
  ] = Field(
2187
2201
  None,
@@ -66,6 +66,7 @@ from airbyte_cdk.sources.declarative.decoders import (
66
66
  JsonlDecoder,
67
67
  PaginationDecoderDecorator,
68
68
  XmlDecoder,
69
+ ZipfileDecoder,
69
70
  )
70
71
  from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
71
72
  CompositeRawDecoder,
@@ -359,6 +360,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
359
360
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
360
361
  XmlDecoder as XmlDecoderModel,
361
362
  )
363
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
364
+ ZipfileDecoder as ZipfileDecoderModel,
365
+ )
362
366
  from airbyte_cdk.sources.declarative.partition_routers import (
363
367
  CartesianProductStreamSlicer,
364
368
  ListPartitionRouter,
@@ -578,6 +582,7 @@ class ModelToComponentFactory:
578
582
  ConfigComponentsResolverModel: self.create_config_components_resolver,
579
583
  StreamConfigModel: self.create_stream_config,
580
584
  ComponentMappingDefinitionModel: self.create_components_mapping_definition,
585
+ ZipfileDecoderModel: self.create_zipfile_decoder,
581
586
  }
582
587
 
583
588
  # Needed for the case where we need to perform a second parse on the fields of a custom component
@@ -1820,6 +1825,12 @@ class ModelToComponentFactory:
1820
1825
  ) -> GzipJsonDecoder:
1821
1826
  return GzipJsonDecoder(parameters={}, encoding=model.encoding)
1822
1827
 
1828
+ def create_zipfile_decoder(
1829
+ self, model: ZipfileDecoderModel, config: Config, **kwargs: Any
1830
+ ) -> ZipfileDecoder:
1831
+ parser = self._create_component_from_model(model=model.parser, config=config)
1832
+ return ZipfileDecoder(parser=parser)
1833
+
1823
1834
  def create_gzip_parser(
1824
1835
  self, model: GzipParserModel, config: Config, **kwargs: Any
1825
1836
  ) -> GzipParser:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: airbyte-cdk
3
- Version: 6.21.1.dev0
3
+ Version: 6.22.0
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  License: MIT
6
6
  Keywords: airbyte,connector-development-kit,cdk
@@ -66,7 +66,7 @@ Requires-Dist: tiktoken (==0.8.0) ; extra == "vector-db-based"
66
66
  Requires-Dist: unstructured.pytesseract (>=0.3.12) ; extra == "file-based"
67
67
  Requires-Dist: unstructured[docx,pptx] (==0.10.27) ; extra == "file-based"
68
68
  Requires-Dist: wcmatch (==10.0)
69
- Requires-Dist: xmltodict (>=0.13.0,<0.14.0)
69
+ Requires-Dist: xmltodict (>=0.13,<0.15)
70
70
  Project-URL: Documentation, https://docs.airbyte.io/
71
71
  Project-URL: Homepage, https://airbyte.com
72
72
  Project-URL: Repository, https://github.com/airbytehq/airbyte-python-cdk
@@ -67,16 +67,17 @@ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=tSTCSmyM
67
67
  airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
68
68
  airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
69
69
  airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
70
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=6I_DmzMwyAmVir2402ps6QPv1gf_7prGxFUIa1YmWbY,136990
70
+ airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=1I-EkU0fZCntXLw7auNj10M934KZE_NZVx_VwvMJN0g,137923
71
71
  airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
72
72
  airbyte_cdk/sources/declarative/declarative_stream.py,sha256=JRyNeOIpsFu4ztVZsN6sncqUEIqIE-bUkD2TPgbMgk0,10375
73
- airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=edGj4fGxznBk4xzRQyCA1rGfbpqe7z-RE0K3kQQWbgA,858
73
+ airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=KSpQetKGqPCv-38QgcVJ5kzM5nzbFldTSsYDCS3Xf0Y,1035
74
74
  airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py,sha256=kQfUVMVhChKe5OngwIQrs0F9KGnRUN-CKVFakCU23DQ,4354
75
75
  airbyte_cdk/sources/declarative/decoders/decoder.py,sha256=sl-Gt8lXi7yD2Q-sD8je5QS2PbgrgsYjxRLWsay7DMc,826
76
76
  airbyte_cdk/sources/declarative/decoders/json_decoder.py,sha256=qdbjeR6RffKaah_iWvMsOcDolYuxJY5DaI3b9AMTZXg,3327
77
77
  airbyte_cdk/sources/declarative/decoders/noop_decoder.py,sha256=iZh0yKY_JzgBnJWiubEusf5c0o6Khd-8EWFWT-8EgFo,542
78
78
  airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py,sha256=ZVBZhAOl0I0MymXN5CKTC-kIXG4GuUQAEyn0XpUDuSE,1081
79
79
  airbyte_cdk/sources/declarative/decoders/xml_decoder.py,sha256=EU-7t-5vIGRHZ14h-f0GUE4V5-eTM9Flux-A8xgI1Rc,3117
80
+ airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py,sha256=OTGeNh-Zkab9JwCTgiHtLH1IS6PiVO9jnr82c0vrHbw,2269
80
81
  airbyte_cdk/sources/declarative/exceptions.py,sha256=kTPUA4I2NV4J6HDz-mKPGMrfuc592akJnOyYx38l_QM,176
81
82
  airbyte_cdk/sources/declarative/extractors/__init__.py,sha256=RmV-IkO1YLj0PSOrrqC9AV1gO8-90t8UTDVfJGshN9E,754
82
83
  airbyte_cdk/sources/declarative/extractors/dpath_extractor.py,sha256=wR4Ol4MG2lt5UlqXF5EU_k7qa5cN4_-luu3PJ1PlO3A,3131
@@ -107,12 +108,12 @@ airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW
107
108
  airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
108
109
  airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
109
110
  airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
110
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=5OC6heHHLNss132yuDLtKlfzNvWLT0y55PK5ZZHlgug,96464
111
+ airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=Fv6D9D5hyYhjCWfeIPpyeFWQakMsIsoBbqosSHLHmEs,96909
111
112
  airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
112
113
  airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
113
114
  airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
114
115
  airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
115
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=-CGUEiNli79cXUK8kxH-VqF8vPKkkPB-ZrdGAzfqRg8,113154
116
+ airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=xldBYPEQ45BKM5F1Lpid2l6kqtYcLzGK0yw4R6gPAJQ,113624
116
117
  airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=HJ-Syp3p7RpyR_OK0X_a2kSyISfu3W-PKrRI16iY0a8,957
117
118
  airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=n82J15S8bjeMZ5uROu--P3hnbQoxkY5v7RPHYx7g7ro,2929
118
119
  airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
@@ -345,8 +346,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
345
346
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
346
347
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
347
348
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
348
- airbyte_cdk-6.21.1.dev0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
349
- airbyte_cdk-6.21.1.dev0.dist-info/METADATA,sha256=rLLcg1ZkBdUIIOMKVg4fimX2xMBsH_zK4CxXaLdCBrU,6005
350
- airbyte_cdk-6.21.1.dev0.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
351
- airbyte_cdk-6.21.1.dev0.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
352
- airbyte_cdk-6.21.1.dev0.dist-info/RECORD,,
349
+ airbyte_cdk-6.22.0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
350
+ airbyte_cdk-6.22.0.dist-info/METADATA,sha256=FEDVsyYS-_65JwB58QODjqBHEVo1GONNrY6Ea9aHECs,5996
351
+ airbyte_cdk-6.22.0.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
352
+ airbyte_cdk-6.22.0.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
353
+ airbyte_cdk-6.22.0.dist-info/RECORD,,