airbyte-cdk 6.38.1__py3-none-any.whl → 6.38.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +102 -44
- airbyte_cdk/sources/declarative/decoders/decoder.py +3 -3
- airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
- airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +6 -9
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +0 -2
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +49 -9
- airbyte_cdk/sources/declarative/requesters/http_requester.py +33 -7
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +11 -6
- airbyte_cdk/sources/streams/concurrent/adapters.py +9 -1
- {airbyte_cdk-6.38.1.dist-info → airbyte_cdk-6.38.3.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.38.1.dist-info → airbyte_cdk-6.38.3.dist-info}/RECORD +15 -14
- {airbyte_cdk-6.38.1.dist-info → airbyte_cdk-6.38.3.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.38.1.dist-info → airbyte_cdk-6.38.3.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.38.1.dist-info → airbyte_cdk-6.38.3.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.38.1.dist-info → airbyte_cdk-6.38.3.dist-info}/entry_points.txt +0 -0
@@ -1,47 +1,47 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
1
5
|
import csv
|
2
6
|
import gzip
|
3
7
|
import io
|
4
8
|
import json
|
5
9
|
import logging
|
6
|
-
from abc import ABC, abstractmethod
|
7
10
|
from dataclasses import dataclass
|
8
11
|
from io import BufferedIOBase, TextIOWrapper
|
9
|
-
from typing import Any,
|
12
|
+
from typing import Any, Optional
|
10
13
|
|
11
14
|
import orjson
|
12
15
|
import requests
|
13
16
|
|
14
17
|
from airbyte_cdk.models import FailureType
|
15
|
-
from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
|
18
|
+
from airbyte_cdk.sources.declarative.decoders.decoder import DECODER_OUTPUT_TYPE, Decoder
|
19
|
+
from airbyte_cdk.sources.declarative.decoders.decoder_parser import (
|
20
|
+
PARSER_OUTPUT_TYPE,
|
21
|
+
PARSERS_BY_HEADER_TYPE,
|
22
|
+
PARSERS_TYPE,
|
23
|
+
Parser,
|
24
|
+
)
|
16
25
|
from airbyte_cdk.utils import AirbyteTracedException
|
17
26
|
|
18
27
|
logger = logging.getLogger("airbyte")
|
19
28
|
|
20
29
|
|
21
|
-
@dataclass
|
22
|
-
class Parser(ABC):
|
23
|
-
@abstractmethod
|
24
|
-
def parse(
|
25
|
-
self,
|
26
|
-
data: BufferedIOBase,
|
27
|
-
) -> Generator[MutableMapping[str, Any], None, None]:
|
28
|
-
"""
|
29
|
-
Parse data and yield dictionaries.
|
30
|
-
"""
|
31
|
-
pass
|
32
|
-
|
33
|
-
|
34
30
|
@dataclass
|
35
31
|
class GzipParser(Parser):
|
36
32
|
inner_parser: Parser
|
37
33
|
|
38
|
-
def parse(
|
39
|
-
self,
|
40
|
-
data: BufferedIOBase,
|
41
|
-
) -> Generator[MutableMapping[str, Any], None, None]:
|
34
|
+
def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE:
|
42
35
|
"""
|
43
36
|
Decompress gzipped bytes and pass decompressed data to the inner parser.
|
37
|
+
|
38
|
+
IMPORTANT:
|
39
|
+
- If the data is not gzipped, reset the pointer and pass the data to the inner parser as is.
|
40
|
+
|
41
|
+
Note:
|
42
|
+
- The data is not decoded by default.
|
44
43
|
"""
|
44
|
+
|
45
45
|
with gzip.GzipFile(fileobj=data, mode="rb") as gzipobj:
|
46
46
|
yield from self.inner_parser.parse(gzipobj)
|
47
47
|
|
@@ -50,7 +50,7 @@ class GzipParser(Parser):
|
|
50
50
|
class JsonParser(Parser):
|
51
51
|
encoding: str = "utf-8"
|
52
52
|
|
53
|
-
def parse(self, data: BufferedIOBase) ->
|
53
|
+
def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE:
|
54
54
|
"""
|
55
55
|
Attempts to deserialize data using orjson library. As an extra layer of safety we fallback on the json library to deserialize the data.
|
56
56
|
"""
|
@@ -90,10 +90,7 @@ class JsonParser(Parser):
|
|
90
90
|
class JsonLineParser(Parser):
|
91
91
|
encoding: Optional[str] = "utf-8"
|
92
92
|
|
93
|
-
def parse(
|
94
|
-
self,
|
95
|
-
data: BufferedIOBase,
|
96
|
-
) -> Generator[MutableMapping[str, Any], None, None]:
|
93
|
+
def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE:
|
97
94
|
for line in data:
|
98
95
|
try:
|
99
96
|
yield json.loads(line.decode(encoding=self.encoding or "utf-8"))
|
@@ -117,10 +114,7 @@ class CsvParser(Parser):
|
|
117
114
|
|
118
115
|
return self.delimiter
|
119
116
|
|
120
|
-
def parse(
|
121
|
-
self,
|
122
|
-
data: BufferedIOBase,
|
123
|
-
) -> Generator[MutableMapping[str, Any], None, None]:
|
117
|
+
def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE:
|
124
118
|
"""
|
125
119
|
Parse CSV data from decompressed bytes.
|
126
120
|
"""
|
@@ -130,31 +124,95 @@ class CsvParser(Parser):
|
|
130
124
|
yield row
|
131
125
|
|
132
126
|
|
133
|
-
@dataclass
|
134
127
|
class CompositeRawDecoder(Decoder):
|
135
128
|
"""
|
136
|
-
Decoder strategy to transform a requests.Response into a
|
129
|
+
Decoder strategy to transform a requests.Response into a PARSER_OUTPUT_TYPE
|
137
130
|
passed response.raw to parser(s).
|
138
|
-
|
139
|
-
|
131
|
+
|
132
|
+
Note: response.raw is not decoded/decompressed by default. Parsers should be instantiated recursively.
|
133
|
+
|
140
134
|
Example:
|
141
|
-
|
135
|
+
composite_raw_decoder = CompositeRawDecoder(
|
136
|
+
parser=GzipParser(
|
137
|
+
inner_parser=JsonLineParser(encoding="iso-8859-1")
|
138
|
+
)
|
139
|
+
)
|
142
140
|
"""
|
143
141
|
|
144
|
-
|
145
|
-
|
142
|
+
def __init__(
|
143
|
+
self,
|
144
|
+
parser: Parser,
|
145
|
+
stream_response: bool = True,
|
146
|
+
parsers_by_header: PARSERS_BY_HEADER_TYPE = None,
|
147
|
+
) -> None:
|
148
|
+
# since we moved from using `dataclass` to `__init__` method,
|
149
|
+
# we need to keep using the `parser` to be able to resolve the depenencies
|
150
|
+
# between the parsers correctly.
|
151
|
+
self.parser = parser
|
152
|
+
|
153
|
+
self._parsers_by_header = parsers_by_header if parsers_by_header else {}
|
154
|
+
self._stream_response = stream_response
|
155
|
+
|
156
|
+
@classmethod
|
157
|
+
def by_headers(
|
158
|
+
cls,
|
159
|
+
parsers: PARSERS_TYPE,
|
160
|
+
stream_response: bool,
|
161
|
+
fallback_parser: Parser,
|
162
|
+
) -> "CompositeRawDecoder":
|
163
|
+
"""
|
164
|
+
Create a CompositeRawDecoder instance based on header values.
|
165
|
+
|
166
|
+
Args:
|
167
|
+
parsers (PARSERS_TYPE): A list of tuples where each tuple contains headers, header values, and a parser.
|
168
|
+
stream_response (bool): A flag indicating whether the response should be streamed.
|
169
|
+
fallback_parser (Parser): A parser to use if no matching header is found.
|
170
|
+
|
171
|
+
Returns:
|
172
|
+
CompositeRawDecoder: An instance of CompositeRawDecoder configured with the provided parsers.
|
173
|
+
"""
|
174
|
+
parsers_by_header = {}
|
175
|
+
for headers, header_values, parser in parsers:
|
176
|
+
for header in headers:
|
177
|
+
parsers_by_header[header] = {header_value: parser for header_value in header_values}
|
178
|
+
return cls(fallback_parser, stream_response, parsers_by_header)
|
146
179
|
|
147
180
|
def is_stream_response(self) -> bool:
|
148
|
-
return self.
|
181
|
+
return self._stream_response
|
149
182
|
|
150
|
-
def decode(
|
151
|
-
|
152
|
-
) -> Generator[MutableMapping[str, Any], None, None]:
|
183
|
+
def decode(self, response: requests.Response) -> DECODER_OUTPUT_TYPE:
|
184
|
+
parser = self._select_parser(response)
|
153
185
|
if self.is_stream_response():
|
154
|
-
# urllib mentions that some interfaces don't play nice with auto_close
|
155
|
-
#
|
186
|
+
# urllib mentions that some interfaces don't play nice with auto_close
|
187
|
+
# More info here: https://urllib3.readthedocs.io/en/stable/user-guide.html#using-io-wrappers-with-response-content
|
188
|
+
# We have indeed observed some issues with CSV parsing.
|
189
|
+
# Hence, we will manage the closing of the file ourselves until we find a better solution.
|
156
190
|
response.raw.auto_close = False
|
157
|
-
yield from
|
191
|
+
yield from parser.parse(
|
192
|
+
data=response.raw, # type: ignore[arg-type]
|
193
|
+
)
|
158
194
|
response.raw.close()
|
159
195
|
else:
|
160
|
-
yield from
|
196
|
+
yield from parser.parse(data=io.BytesIO(response.content))
|
197
|
+
|
198
|
+
def _select_parser(self, response: requests.Response) -> Parser:
|
199
|
+
"""
|
200
|
+
Selects the appropriate parser based on the response headers.
|
201
|
+
|
202
|
+
This method iterates through the `_parsers_by_header` dictionary to find a matching parser
|
203
|
+
based on the headers in the response. If a matching header and header value are found,
|
204
|
+
the corresponding parser is returned. If no match is found, the default parser is returned.
|
205
|
+
|
206
|
+
Args:
|
207
|
+
response (requests.Response): The HTTP response object containing headers to check.
|
208
|
+
|
209
|
+
Returns:
|
210
|
+
Parser: The parser corresponding to the matched header value, or the default parser if no match is found.
|
211
|
+
"""
|
212
|
+
for header, parser_by_header_value in self._parsers_by_header.items():
|
213
|
+
if (
|
214
|
+
header in response.headers
|
215
|
+
and response.headers[header] in parser_by_header_value.keys()
|
216
|
+
):
|
217
|
+
return parser_by_header_value[response.headers[header]]
|
218
|
+
return self.parser
|
@@ -8,6 +8,8 @@ from typing import Any, Generator, MutableMapping
|
|
8
8
|
|
9
9
|
import requests
|
10
10
|
|
11
|
+
DECODER_OUTPUT_TYPE = Generator[MutableMapping[str, Any], None, None]
|
12
|
+
|
11
13
|
|
12
14
|
@dataclass
|
13
15
|
class Decoder:
|
@@ -22,9 +24,7 @@ class Decoder:
|
|
22
24
|
"""
|
23
25
|
|
24
26
|
@abstractmethod
|
25
|
-
def decode(
|
26
|
-
self, response: requests.Response
|
27
|
-
) -> Generator[MutableMapping[str, Any], None, None]:
|
27
|
+
def decode(self, response: requests.Response) -> DECODER_OUTPUT_TYPE:
|
28
28
|
"""
|
29
29
|
Decodes a requests.Response into a Mapping[str, Any] or an array
|
30
30
|
:param response: the response to decode
|
@@ -0,0 +1,30 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
|
6
|
+
import logging
|
7
|
+
from abc import ABC, abstractmethod
|
8
|
+
from dataclasses import dataclass
|
9
|
+
from io import BufferedIOBase
|
10
|
+
from typing import Any, Dict, Generator, List, MutableMapping, Optional, Set, Tuple
|
11
|
+
|
12
|
+
logger = logging.getLogger("airbyte")
|
13
|
+
|
14
|
+
|
15
|
+
PARSER_OUTPUT_TYPE = Generator[MutableMapping[str, Any], None, None]
|
16
|
+
|
17
|
+
|
18
|
+
@dataclass
|
19
|
+
class Parser(ABC):
|
20
|
+
@abstractmethod
|
21
|
+
def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE:
|
22
|
+
"""
|
23
|
+
Parse data and yield dictionaries.
|
24
|
+
"""
|
25
|
+
pass
|
26
|
+
|
27
|
+
|
28
|
+
# reusable parser types
|
29
|
+
PARSERS_TYPE = List[Tuple[Set[str], Set[str], Parser]]
|
30
|
+
PARSERS_BY_HEADER_TYPE = Optional[Dict[str, Dict[str, Parser]]]
|
@@ -6,16 +6,13 @@ import logging
|
|
6
6
|
import zipfile
|
7
7
|
from dataclasses import dataclass
|
8
8
|
from io import BytesIO
|
9
|
-
from typing import Any, Generator, MutableMapping
|
10
9
|
|
11
|
-
import orjson
|
12
10
|
import requests
|
13
11
|
|
14
12
|
from airbyte_cdk.models import FailureType
|
15
13
|
from airbyte_cdk.sources.declarative.decoders import Decoder
|
16
|
-
from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import
|
17
|
-
|
18
|
-
)
|
14
|
+
from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import Parser
|
15
|
+
from airbyte_cdk.sources.declarative.decoders.decoder import DECODER_OUTPUT_TYPE
|
19
16
|
from airbyte_cdk.utils import AirbyteTracedException
|
20
17
|
|
21
18
|
logger = logging.getLogger("airbyte")
|
@@ -28,16 +25,16 @@ class ZipfileDecoder(Decoder):
|
|
28
25
|
def is_stream_response(self) -> bool:
|
29
26
|
return False
|
30
27
|
|
31
|
-
def decode(
|
32
|
-
self, response: requests.Response
|
33
|
-
) -> Generator[MutableMapping[str, Any], None, None]:
|
28
|
+
def decode(self, response: requests.Response) -> DECODER_OUTPUT_TYPE:
|
34
29
|
try:
|
35
30
|
with zipfile.ZipFile(BytesIO(response.content)) as zip_file:
|
36
31
|
for file_name in zip_file.namelist():
|
37
32
|
unzipped_content = zip_file.read(file_name)
|
38
33
|
buffered_content = BytesIO(unzipped_content)
|
39
34
|
try:
|
40
|
-
yield from self.parser.parse(
|
35
|
+
yield from self.parser.parse(
|
36
|
+
buffered_content,
|
37
|
+
)
|
41
38
|
except Exception as e:
|
42
39
|
logger.error(
|
43
40
|
f"Failed to parse file: {file_name} from zip file: {response.request.url} with exception {e}."
|
@@ -15,7 +15,6 @@ from numpy import nan
|
|
15
15
|
|
16
16
|
from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
|
17
17
|
|
18
|
-
EMPTY_STR: str = ""
|
19
18
|
DEFAULT_ENCODING: str = "utf-8"
|
20
19
|
DOWNLOAD_CHUNK_SIZE: int = 1024 * 10
|
21
20
|
|
@@ -136,7 +135,6 @@ class ResponseToFileExtractor(RecordExtractor):
|
|
136
135
|
"""
|
137
136
|
|
138
137
|
try:
|
139
|
-
# TODO: Add support for other file types, like `json`, with `pd.read_json()`
|
140
138
|
with open(path, "r", encoding=file_encoding) as data:
|
141
139
|
chunks = pd.read_csv(
|
142
140
|
data, chunksize=chunk_size, iterator=True, dialect="unix", dtype=object
|
@@ -1476,7 +1476,19 @@ class ModelToComponentFactory:
|
|
1476
1476
|
try:
|
1477
1477
|
module_ref = importlib.import_module(module_name_full)
|
1478
1478
|
except ModuleNotFoundError as e:
|
1479
|
-
|
1479
|
+
if split[0] == "source_declarative_manifest":
|
1480
|
+
# During testing, the modules containing the custom components are not moved to source_declarative_manifest. In order to run the test, add the source folder to your PYTHONPATH or add it runtime using sys.path.append
|
1481
|
+
try:
|
1482
|
+
import os
|
1483
|
+
|
1484
|
+
module_name_with_source_declarative_manifest = ".".join(split[1:-1])
|
1485
|
+
module_ref = importlib.import_module(
|
1486
|
+
module_name_with_source_declarative_manifest
|
1487
|
+
)
|
1488
|
+
except ModuleNotFoundError:
|
1489
|
+
raise ValueError(f"Could not load module `{module_name_full}`.") from e
|
1490
|
+
else:
|
1491
|
+
raise ValueError(f"Could not load module `{module_name_full}`.") from e
|
1480
1492
|
|
1481
1493
|
try:
|
1482
1494
|
return getattr(module_ref, class_name)
|
@@ -2193,18 +2205,40 @@ class ModelToComponentFactory:
|
|
2193
2205
|
stream_response=False if self._emit_connector_builder_messages else True,
|
2194
2206
|
)
|
2195
2207
|
|
2196
|
-
|
2197
|
-
|
2208
|
+
def create_jsonl_decoder(
|
2209
|
+
self, model: JsonlDecoderModel, config: Config, **kwargs: Any
|
2210
|
+
) -> Decoder:
|
2198
2211
|
return CompositeRawDecoder(
|
2199
|
-
parser=ModelToComponentFactory._get_parser(model, config),
|
2212
|
+
parser=ModelToComponentFactory._get_parser(model, config),
|
2213
|
+
stream_response=False if self._emit_connector_builder_messages else True,
|
2200
2214
|
)
|
2201
2215
|
|
2202
2216
|
def create_gzip_decoder(
|
2203
2217
|
self, model: GzipDecoderModel, config: Config, **kwargs: Any
|
2204
2218
|
) -> Decoder:
|
2205
|
-
|
2206
|
-
|
2207
|
-
|
2219
|
+
_compressed_response_types = {
|
2220
|
+
"gzip",
|
2221
|
+
"x-gzip",
|
2222
|
+
"gzip, deflate",
|
2223
|
+
"x-gzip, deflate",
|
2224
|
+
"application/zip",
|
2225
|
+
"application/gzip",
|
2226
|
+
"application/x-gzip",
|
2227
|
+
"application/x-zip-compressed",
|
2228
|
+
}
|
2229
|
+
|
2230
|
+
gzip_parser: GzipParser = ModelToComponentFactory._get_parser(model, config) # type: ignore # based on the model, we know this will be a GzipParser
|
2231
|
+
|
2232
|
+
if self._emit_connector_builder_messages:
|
2233
|
+
# This is very surprising but if the response is not streamed,
|
2234
|
+
# CompositeRawDecoder calls response.content and the requests library actually uncompress the data as opposed to response.raw,
|
2235
|
+
# which uses urllib3 directly and does not uncompress the data.
|
2236
|
+
return CompositeRawDecoder(gzip_parser.inner_parser, False)
|
2237
|
+
|
2238
|
+
return CompositeRawDecoder.by_headers(
|
2239
|
+
[({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)],
|
2240
|
+
stream_response=True,
|
2241
|
+
fallback_parser=gzip_parser.inner_parser,
|
2208
2242
|
)
|
2209
2243
|
|
2210
2244
|
@staticmethod
|
@@ -2753,7 +2787,10 @@ class ModelToComponentFactory:
|
|
2753
2787
|
)
|
2754
2788
|
paginator = (
|
2755
2789
|
self._create_component_from_model(
|
2756
|
-
model=model.download_paginator,
|
2790
|
+
model=model.download_paginator,
|
2791
|
+
decoder=decoder,
|
2792
|
+
config=config,
|
2793
|
+
url_base="",
|
2757
2794
|
)
|
2758
2795
|
if model.download_paginator
|
2759
2796
|
else NoPagination(parameters={})
|
@@ -2870,7 +2907,10 @@ class ModelToComponentFactory:
|
|
2870
2907
|
model=model.status_extractor, decoder=decoder, config=config, name=name
|
2871
2908
|
)
|
2872
2909
|
download_target_extractor = self._create_component_from_model(
|
2873
|
-
model=model.download_target_extractor,
|
2910
|
+
model=model.download_target_extractor,
|
2911
|
+
decoder=decoder,
|
2912
|
+
config=config,
|
2913
|
+
name=name,
|
2874
2914
|
)
|
2875
2915
|
job_repository: AsyncJobRepository = AsyncHttpJobRepository(
|
2876
2916
|
creation_requester=creation_requester,
|
@@ -16,7 +16,9 @@ from airbyte_cdk.sources.declarative.auth.declarative_authenticator import (
|
|
16
16
|
)
|
17
17
|
from airbyte_cdk.sources.declarative.decoders import Decoder
|
18
18
|
from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder
|
19
|
-
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import
|
19
|
+
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import (
|
20
|
+
InterpolatedString,
|
21
|
+
)
|
20
22
|
from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_request_options_provider import (
|
21
23
|
InterpolatedRequestOptionsProvider,
|
22
24
|
)
|
@@ -26,7 +28,10 @@ from airbyte_cdk.sources.streams.call_rate import APIBudget
|
|
26
28
|
from airbyte_cdk.sources.streams.http import HttpClient
|
27
29
|
from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler
|
28
30
|
from airbyte_cdk.sources.types import Config, EmptyString, StreamSlice, StreamState
|
29
|
-
from airbyte_cdk.utils.mapping_helpers import
|
31
|
+
from airbyte_cdk.utils.mapping_helpers import (
|
32
|
+
combine_mappings,
|
33
|
+
get_interpolation_context,
|
34
|
+
)
|
30
35
|
|
31
36
|
|
32
37
|
@dataclass
|
@@ -155,7 +160,9 @@ class HttpRequester(Requester):
|
|
155
160
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
156
161
|
) -> MutableMapping[str, Any]:
|
157
162
|
return self._request_options_provider.get_request_params(
|
158
|
-
stream_state=stream_state,
|
163
|
+
stream_state=stream_state,
|
164
|
+
stream_slice=stream_slice,
|
165
|
+
next_page_token=next_page_token,
|
159
166
|
)
|
160
167
|
|
161
168
|
def get_request_headers(
|
@@ -166,7 +173,9 @@ class HttpRequester(Requester):
|
|
166
173
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
167
174
|
) -> Mapping[str, Any]:
|
168
175
|
return self._request_options_provider.get_request_headers(
|
169
|
-
stream_state=stream_state,
|
176
|
+
stream_state=stream_state,
|
177
|
+
stream_slice=stream_slice,
|
178
|
+
next_page_token=next_page_token,
|
170
179
|
)
|
171
180
|
|
172
181
|
# fixing request options provider types has a lot of dependencies
|
@@ -195,7 +204,9 @@ class HttpRequester(Requester):
|
|
195
204
|
next_page_token: Optional[Mapping[str, Any]] = None,
|
196
205
|
) -> Optional[Mapping[str, Any]]:
|
197
206
|
return self._request_options_provider.get_request_body_json(
|
198
|
-
stream_state=stream_state,
|
207
|
+
stream_state=stream_state,
|
208
|
+
stream_slice=stream_slice,
|
209
|
+
next_page_token=next_page_token,
|
199
210
|
)
|
200
211
|
|
201
212
|
@property
|
@@ -350,9 +361,24 @@ class HttpRequester(Requester):
|
|
350
361
|
path (str): The path to join with the base URL.
|
351
362
|
|
352
363
|
Returns:
|
353
|
-
str: The
|
364
|
+
str: The resulting joined URL.
|
365
|
+
|
366
|
+
Note:
|
367
|
+
Related issue: https://github.com/airbytehq/airbyte-internal-issues/issues/11869
|
368
|
+
- If the path is an empty string or None, the method returns the base URL with any trailing slash removed.
|
369
|
+
|
370
|
+
Example:
|
371
|
+
1) _join_url("https://example.com/api/", "endpoint") >> 'https://example.com/api/endpoint'
|
372
|
+
2) _join_url("https://example.com/api", "/endpoint") >> 'https://example.com/api/endpoint'
|
373
|
+
3) _join_url("https://example.com/api/", "") >> 'https://example.com/api'
|
374
|
+
4) _join_url("https://example.com/api", None) >> 'https://example.com/api'
|
354
375
|
"""
|
355
|
-
|
376
|
+
|
377
|
+
# return a full-url if provided directly from interpolation context
|
378
|
+
if path == EmptyString or path is None:
|
379
|
+
return url_base.rstrip("/")
|
380
|
+
|
381
|
+
return urljoin(url_base, path)
|
356
382
|
|
357
383
|
def send_request(
|
358
384
|
self,
|
@@ -39,13 +39,18 @@ class YamlDeclarativeSource(ConcurrentDeclarativeSource[List[AirbyteStateMessage
|
|
39
39
|
)
|
40
40
|
|
41
41
|
def _read_and_parse_yaml_file(self, path_to_yaml_file: str) -> ConnectionDefinition:
|
42
|
-
|
42
|
+
try:
|
43
|
+
# For testing purposes, we want to allow to just pass a file
|
44
|
+
with open(path_to_yaml_file, "r") as f:
|
45
|
+
return yaml.safe_load(f) # type: ignore # we assume the yaml represents a ConnectionDefinition
|
46
|
+
except FileNotFoundError:
|
47
|
+
# Running inside the container, the working directory during an operation is not structured the same as the static files
|
48
|
+
package = self.__class__.__module__.split(".")[0]
|
43
49
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
else:
|
50
|
+
yaml_config = pkgutil.get_data(package, path_to_yaml_file)
|
51
|
+
if yaml_config:
|
52
|
+
decoded_yaml = yaml_config.decode()
|
53
|
+
return self._parse(decoded_yaml)
|
49
54
|
return {}
|
50
55
|
|
51
56
|
def _emit_manifest_debug_message(self, extra_args: dict[str, Any]) -> None:
|
@@ -276,7 +276,7 @@ class StreamPartition(Partition):
|
|
276
276
|
def read(self) -> Iterable[Record]:
|
277
277
|
"""
|
278
278
|
Read messages from the stream.
|
279
|
-
If the StreamData is a Mapping, it will be converted to a Record.
|
279
|
+
If the StreamData is a Mapping or an AirbyteMessage of type RECORD, it will be converted to a Record.
|
280
280
|
Otherwise, the message will be emitted on the message repository.
|
281
281
|
"""
|
282
282
|
try:
|
@@ -292,6 +292,8 @@ class StreamPartition(Partition):
|
|
292
292
|
stream_slice=copy.deepcopy(self._slice),
|
293
293
|
stream_state=self._state,
|
294
294
|
):
|
295
|
+
# Noting we'll also need to support FileTransferRecordMessage if we want to support file-based connectors in this facade
|
296
|
+
# For now, file-based connectors have their own stream facade
|
295
297
|
if isinstance(record_data, Mapping):
|
296
298
|
data_to_return = dict(record_data)
|
297
299
|
self._stream.transformer.transform(
|
@@ -302,6 +304,12 @@ class StreamPartition(Partition):
|
|
302
304
|
stream_name=self.stream_name(),
|
303
305
|
associated_slice=self._slice, # type: ignore [arg-type]
|
304
306
|
)
|
307
|
+
elif isinstance(record_data, AirbyteMessage) and record_data.record is not None:
|
308
|
+
yield Record(
|
309
|
+
data=record_data.record.data or {},
|
310
|
+
stream_name=self.stream_name(),
|
311
|
+
associated_slice=self._slice, # type: ignore [arg-type]
|
312
|
+
)
|
305
313
|
else:
|
306
314
|
self._message_repository.emit_message(record_data)
|
307
315
|
except Exception as e:
|
@@ -75,13 +75,14 @@ airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=n8hJVqu
|
|
75
75
|
airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
|
76
76
|
airbyte_cdk/sources/declarative/declarative_stream.py,sha256=venZjfpvtqr3oFSuvMBWtn4h9ayLhD4L65ACuXCDZ64,10445
|
77
77
|
airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=JHb_0d3SE6kNY10mxA5YBEKPeSbsWYjByq1gUQxepoE,953
|
78
|
-
airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py,sha256=
|
79
|
-
airbyte_cdk/sources/declarative/decoders/decoder.py,sha256=
|
78
|
+
airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py,sha256=Jd7URkDQBoHSDQHQuYUqzeex1HYfLRtGcY_-dVW33pA,7884
|
79
|
+
airbyte_cdk/sources/declarative/decoders/decoder.py,sha256=1PeKwuMK8x9dsA2zqUjSVinEWVSEgYcUS6npiW3aC2c,855
|
80
|
+
airbyte_cdk/sources/declarative/decoders/decoder_parser.py,sha256=e0be6kfzvbnhmcou-AuloFTSoLxiV9sG9YaglWo5mto,714
|
80
81
|
airbyte_cdk/sources/declarative/decoders/json_decoder.py,sha256=BdWpXXPhEGf_zknggJmhojLosmxuw51RBVTS0jvdCPc,2080
|
81
82
|
airbyte_cdk/sources/declarative/decoders/noop_decoder.py,sha256=iZh0yKY_JzgBnJWiubEusf5c0o6Khd-8EWFWT-8EgFo,542
|
82
83
|
airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py,sha256=ZVBZhAOl0I0MymXN5CKTC-kIXG4GuUQAEyn0XpUDuSE,1081
|
83
84
|
airbyte_cdk/sources/declarative/decoders/xml_decoder.py,sha256=EU-7t-5vIGRHZ14h-f0GUE4V5-eTM9Flux-A8xgI1Rc,3117
|
84
|
-
airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py,sha256=
|
85
|
+
airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py,sha256=Din18m61aly2oG6TaXGpLcbfUHVOzjGzuMYkyxfHXT4,2290
|
85
86
|
airbyte_cdk/sources/declarative/exceptions.py,sha256=kTPUA4I2NV4J6HDz-mKPGMrfuc592akJnOyYx38l_QM,176
|
86
87
|
airbyte_cdk/sources/declarative/extractors/__init__.py,sha256=RmV-IkO1YLj0PSOrrqC9AV1gO8-90t8UTDVfJGshN9E,754
|
87
88
|
airbyte_cdk/sources/declarative/extractors/dpath_extractor.py,sha256=wR4Ol4MG2lt5UlqXF5EU_k7qa5cN4_-luu3PJ1PlO3A,3131
|
@@ -89,7 +90,7 @@ airbyte_cdk/sources/declarative/extractors/http_selector.py,sha256=2zWZ4ewTqQC8V
|
|
89
90
|
airbyte_cdk/sources/declarative/extractors/record_extractor.py,sha256=XJELMjahAsaomlvQgN2zrNO0DJX0G0fr9r682gUz7Pg,691
|
90
91
|
airbyte_cdk/sources/declarative/extractors/record_filter.py,sha256=yTdEkyDUSW2KbFkEwJJMlS963C955LgCCOVfTmmScpQ,3367
|
91
92
|
airbyte_cdk/sources/declarative/extractors/record_selector.py,sha256=HCqx7IyENM_aRF4it2zJN26_vDu6WeP8XgCxQWHUvcY,6934
|
92
|
-
airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py,sha256=
|
93
|
+
airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py,sha256=WJyA2OYIEgFpVP5Y3o0tIj69AV6IKkn9B16MeXaEItI,6513
|
93
94
|
airbyte_cdk/sources/declarative/extractors/type_transformer.py,sha256=d6Y2Rfg8pMVEEnHllfVksWZdNVOU55yk34O03dP9muY,1626
|
94
95
|
airbyte_cdk/sources/declarative/incremental/__init__.py,sha256=U1oZKtBaEC6IACmvziY9Wzg7Z8EgF4ZuR7NwvjlB_Sk,1255
|
95
96
|
airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py,sha256=MT5JbdEbnPzk3VWZGGvThe4opoX5dHhSXFrnTRYC6dg,22210
|
@@ -119,7 +120,7 @@ airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py,sha256=jDw_TttD3
|
|
119
120
|
airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
|
120
121
|
airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
|
121
122
|
airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
|
122
|
-
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=
|
123
|
+
airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=4FEIBgkAn85qmOEjmi8rRPBERdktNpOFjXGbw0Fsau8,142441
|
123
124
|
airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=HJ-Syp3p7RpyR_OK0X_a2kSyISfu3W-PKrRI16iY0a8,957
|
124
125
|
airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=VelO7zKqKtzMJ35jyFeg0ypJLQC0plqqIBNXoBW1G2E,3001
|
125
126
|
airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
|
@@ -143,7 +144,7 @@ airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_
|
|
143
144
|
airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py,sha256=Tan66odx8VHzfdyyXMQkXz2pJYksllGqvxmpoajgcK4,669
|
144
145
|
airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py,sha256=E-fQbt4ShfxZVoqfnmOx69C6FUPWZz8BIqI3DN9Kcjs,7935
|
145
146
|
airbyte_cdk/sources/declarative/requesters/http_job_repository.py,sha256=4wpP0ZNTMLugi-Rc1OFdFaxWfRZSl45nzhHqMFCE8SQ,11924
|
146
|
-
airbyte_cdk/sources/declarative/requesters/http_requester.py,sha256=
|
147
|
+
airbyte_cdk/sources/declarative/requesters/http_requester.py,sha256=5i16IqHp4gARSI619babc60_uWUSebNSbkqdci5itSs,17194
|
147
148
|
airbyte_cdk/sources/declarative/requesters/paginators/__init__.py,sha256=uArbKs9JKNCt7t9tZoeWwjDpyI1HoPp29FNW0JzvaEM,644
|
148
149
|
airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py,sha256=SB-Af3CRb4mJwhm4EKNxzl_PK2w5QS4tqrSNNMO2IV4,12760
|
149
150
|
airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py,sha256=b1-zKxYOUMHn7ahdWpzKEzfG4A7s_WQWy-vzRqZWzME,2152
|
@@ -193,7 +194,7 @@ airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py,
|
|
193
194
|
airbyte_cdk/sources/declarative/transformations/remove_fields.py,sha256=EwUP0SZ2p4GRJ6Q8CUzlz9dcUeEidEFDlI2IBye2tlc,2745
|
194
195
|
airbyte_cdk/sources/declarative/transformations/transformation.py,sha256=4sXtx9cNY2EHUPq-xHvDs8GQEBUy3Eo6TkRLKHPXx68,1161
|
195
196
|
airbyte_cdk/sources/declarative/types.py,sha256=yqx0xlZv_76tkC7fqJKefmvl4GJJ8mXbeddwVV8XRJU,778
|
196
|
-
airbyte_cdk/sources/declarative/yaml_declarative_source.py,sha256=
|
197
|
+
airbyte_cdk/sources/declarative/yaml_declarative_source.py,sha256=nJCZkzLGP-dwvfwKsl4VqQFZQdhx6fiGCRez1gma0wE,2714
|
197
198
|
airbyte_cdk/sources/file_based/README.md,sha256=iMqww4VZ882jfNQIdljjDgqreKs-mkdtSrRKA94iX6A,11085
|
198
199
|
airbyte_cdk/sources/file_based/__init__.py,sha256=EaxHv_9ot-eRlUCR47ZMZ0IOtB-n0HH24om7Bfn-uuQ,868
|
199
200
|
airbyte_cdk/sources/file_based/availability_strategy/__init__.py,sha256=ddKQfUmk-Ls7LJaG8gtrqDybG3d8S7KXOAEjLeYLrTg,399
|
@@ -263,7 +264,7 @@ airbyte_cdk/sources/streams/concurrent/README.md,sha256=0nvgnlCBfZJiPDAofT8yFmUh
|
|
263
264
|
airbyte_cdk/sources/streams/concurrent/__init__.py,sha256=4Hw-PX1-VgESLF16cDdvuYCzGJtHntThLF4qIiULWeo,61
|
264
265
|
airbyte_cdk/sources/streams/concurrent/abstract_stream.py,sha256=3OB5VsvOkJmCxIMABKgdJAwvCdZtkxeaAVrUNIW3jMQ,3902
|
265
266
|
airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py,sha256=QTry1QCBUwJDw1QSCEvz23s7zIEx_7QMxkPq9j-oPIQ,1358
|
266
|
-
airbyte_cdk/sources/streams/concurrent/adapters.py,sha256=
|
267
|
+
airbyte_cdk/sources/streams/concurrent/adapters.py,sha256=aZtJ_75gVPmoCS-URtfQQX8mYId5xk5Q5mLQYeTM0N4,15814
|
267
268
|
airbyte_cdk/sources/streams/concurrent/availability_strategy.py,sha256=4La5v2UffSjGnhmF4kwNIKt_g3RXk2ux1mSHA1ejgYM,2898
|
268
269
|
airbyte_cdk/sources/streams/concurrent/clamping.py,sha256=i26GVyui2ScEXSP-IP_61K2HaTp1-6lTlYHsZVYpuZA,3240
|
269
270
|
airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=LFXbKBEMtNSVz_kZs9qydS9fPvzTU5wdgXRagRRJeHo,21388
|
@@ -356,9 +357,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
|
|
356
357
|
airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
|
357
358
|
airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
|
358
359
|
airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
|
359
|
-
airbyte_cdk-6.38.
|
360
|
-
airbyte_cdk-6.38.
|
361
|
-
airbyte_cdk-6.38.
|
362
|
-
airbyte_cdk-6.38.
|
363
|
-
airbyte_cdk-6.38.
|
364
|
-
airbyte_cdk-6.38.
|
360
|
+
airbyte_cdk-6.38.3.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
|
361
|
+
airbyte_cdk-6.38.3.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
|
362
|
+
airbyte_cdk-6.38.3.dist-info/METADATA,sha256=8jMYkKC_ie6Xm0bs5_gB5FcSirgKCNaCrUL4Rz01YIM,6013
|
363
|
+
airbyte_cdk-6.38.3.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
364
|
+
airbyte_cdk-6.38.3.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
|
365
|
+
airbyte_cdk-6.38.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|