airbyte-cdk 6.38.0.dev0__py3-none-any.whl → 6.38.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/entrypoint.py +6 -6
- airbyte_cdk/logger.py +1 -4
- airbyte_cdk/sources/declarative/datetime/__init__.py +0 -4
- airbyte_cdk/sources/declarative/datetime/datetime_parser.py +4 -0
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +10 -2
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +104 -42
- airbyte_cdk/sources/declarative/decoders/decoder.py +3 -3
- airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
- airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +6 -9
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +0 -2
- airbyte_cdk/sources/declarative/interpolation/macros.py +3 -5
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +7 -5
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +37 -9
- airbyte_cdk/sources/declarative/requesters/http_requester.py +77 -25
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +25 -4
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +6 -1
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +7 -2
- airbyte_cdk/sources/declarative/requesters/requester.py +7 -1
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +21 -4
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +0 -1
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +3 -3
- airbyte_cdk/sources/types.py +1 -0
- airbyte_cdk/utils/mapping_helpers.py +18 -1
- {airbyte_cdk-6.38.0.dev0.dist-info → airbyte_cdk-6.38.2.dist-info}/METADATA +3 -3
- {airbyte_cdk-6.38.0.dev0.dist-info → airbyte_cdk-6.38.2.dist-info}/RECORD +29 -33
- airbyte_cdk/sources/embedded/__init__.py +0 -3
- airbyte_cdk/sources/embedded/base_integration.py +0 -61
- airbyte_cdk/sources/embedded/catalog.py +0 -57
- airbyte_cdk/sources/embedded/runner.py +0 -57
- airbyte_cdk/sources/embedded/tools.py +0 -27
- {airbyte_cdk-6.38.0.dev0.dist-info → airbyte_cdk-6.38.2.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.38.0.dev0.dist-info → airbyte_cdk-6.38.2.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.38.0.dev0.dist-info → airbyte_cdk-6.38.2.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.38.0.dev0.dist-info → airbyte_cdk-6.38.2.dist-info}/entry_points.txt +0 -0
airbyte_cdk/entrypoint.py
CHANGED
@@ -22,7 +22,7 @@ from requests import PreparedRequest, Response, Session
|
|
22
22
|
|
23
23
|
from airbyte_cdk.connector import TConfig
|
24
24
|
from airbyte_cdk.exception_handler import init_uncaught_exception_handler
|
25
|
-
from airbyte_cdk.logger import
|
25
|
+
from airbyte_cdk.logger import init_logger
|
26
26
|
from airbyte_cdk.models import (
|
27
27
|
AirbyteConnectionStatus,
|
28
28
|
AirbyteMessage,
|
@@ -337,11 +337,11 @@ def launch(source: Source, args: List[str]) -> None:
|
|
337
337
|
parsed_args = source_entrypoint.parse_args(args)
|
338
338
|
# temporarily removes the PrintBuffer because we're seeing weird print behavior for concurrent syncs
|
339
339
|
# Refer to: https://github.com/airbytehq/oncall/issues/6235
|
340
|
-
with
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
340
|
+
# with PrintBuffer():
|
341
|
+
for message in source_entrypoint.run(parsed_args):
|
342
|
+
# simply printing is creating issues for concurrent CDK as Python uses different two instructions to print: one for the message and
|
343
|
+
# the other for the break line. Adding `\n` to the message ensure that both are printed at the same time
|
344
|
+
print(f"{message}\n", end="", flush=True)
|
345
345
|
|
346
346
|
|
347
347
|
def _init_internal_request_filter() -> None:
|
airbyte_cdk/logger.py
CHANGED
@@ -16,11 +16,8 @@ from airbyte_cdk.models import (
|
|
16
16
|
Level,
|
17
17
|
Type,
|
18
18
|
)
|
19
|
-
from airbyte_cdk.utils import PrintBuffer
|
20
19
|
from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
|
21
20
|
|
22
|
-
PRINT_BUFFER = PrintBuffer(flush_interval=0.1)
|
23
|
-
|
24
21
|
LOGGING_CONFIG = {
|
25
22
|
"version": 1,
|
26
23
|
"disable_existing_loggers": False,
|
@@ -30,7 +27,7 @@ LOGGING_CONFIG = {
|
|
30
27
|
"handlers": {
|
31
28
|
"console": {
|
32
29
|
"class": "logging.StreamHandler",
|
33
|
-
"stream":
|
30
|
+
"stream": "ext://sys.stdout",
|
34
31
|
"formatter": "airbyte",
|
35
32
|
},
|
36
33
|
},
|
@@ -29,6 +29,8 @@ class DatetimeParser:
|
|
29
29
|
return datetime.datetime.fromtimestamp(int(date), tz=datetime.timezone.utc)
|
30
30
|
elif format == "%s_as_float":
|
31
31
|
return datetime.datetime.fromtimestamp(float(date), tz=datetime.timezone.utc)
|
32
|
+
elif format == "%epoch_microseconds":
|
33
|
+
return self._UNIX_EPOCH + datetime.timedelta(microseconds=int(date))
|
32
34
|
elif format == "%ms":
|
33
35
|
return self._UNIX_EPOCH + datetime.timedelta(milliseconds=int(date))
|
34
36
|
elif "%_ms" in format:
|
@@ -46,6 +48,8 @@ class DatetimeParser:
|
|
46
48
|
return str(int(dt.timestamp()))
|
47
49
|
if format == "%s_as_float":
|
48
50
|
return str(float(dt.timestamp()))
|
51
|
+
if format == "%epoch_microseconds":
|
52
|
+
return str(int(dt.timestamp() * 1_000_000))
|
49
53
|
if format == "%ms":
|
50
54
|
# timstamp() returns a float representing the number of seconds since the unix epoch
|
51
55
|
return str(int(dt.timestamp() * 1000))
|
@@ -1794,7 +1794,6 @@ definitions:
|
|
1794
1794
|
type: object
|
1795
1795
|
required:
|
1796
1796
|
- type
|
1797
|
-
- path
|
1798
1797
|
- url_base
|
1799
1798
|
properties:
|
1800
1799
|
type:
|
@@ -1806,9 +1805,18 @@ definitions:
|
|
1806
1805
|
type: string
|
1807
1806
|
interpolation_context:
|
1808
1807
|
- config
|
1808
|
+
- next_page_token
|
1809
|
+
- stream_interval
|
1810
|
+
- stream_partition
|
1811
|
+
- stream_slice
|
1812
|
+
- creation_response
|
1813
|
+
- polling_response
|
1814
|
+
- download_target
|
1809
1815
|
examples:
|
1810
1816
|
- "https://connect.squareup.com/v2"
|
1811
|
-
- "{{ config['base_url'] or 'https://app.posthog.com'}}/api
|
1817
|
+
- "{{ config['base_url'] or 'https://app.posthog.com'}}/api"
|
1818
|
+
- "https://connect.squareup.com/v2/quotes/{{ stream_partition['id'] }}/quote_line_groups"
|
1819
|
+
- "https://example.com/api/v1/resource/{{ next_page_token['id'] }}"
|
1812
1820
|
path:
|
1813
1821
|
title: URL Path
|
1814
1822
|
description: Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.
|
@@ -1,47 +1,47 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
1
5
|
import csv
|
2
6
|
import gzip
|
3
7
|
import io
|
4
8
|
import json
|
5
9
|
import logging
|
6
|
-
from abc import ABC, abstractmethod
|
7
10
|
from dataclasses import dataclass
|
8
11
|
from io import BufferedIOBase, TextIOWrapper
|
9
|
-
from typing import Any,
|
12
|
+
from typing import Any, Optional
|
10
13
|
|
11
14
|
import orjson
|
12
15
|
import requests
|
13
16
|
|
14
17
|
from airbyte_cdk.models import FailureType
|
15
|
-
from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
|
18
|
+
from airbyte_cdk.sources.declarative.decoders.decoder import DECODER_OUTPUT_TYPE, Decoder
|
19
|
+
from airbyte_cdk.sources.declarative.decoders.decoder_parser import (
|
20
|
+
PARSER_OUTPUT_TYPE,
|
21
|
+
PARSERS_BY_HEADER_TYPE,
|
22
|
+
PARSERS_TYPE,
|
23
|
+
Parser,
|
24
|
+
)
|
16
25
|
from airbyte_cdk.utils import AirbyteTracedException
|
17
26
|
|
18
27
|
logger = logging.getLogger("airbyte")
|
19
28
|
|
20
29
|
|
21
|
-
@dataclass
|
22
|
-
class Parser(ABC):
|
23
|
-
@abstractmethod
|
24
|
-
def parse(
|
25
|
-
self,
|
26
|
-
data: BufferedIOBase,
|
27
|
-
) -> Generator[MutableMapping[str, Any], None, None]:
|
28
|
-
"""
|
29
|
-
Parse data and yield dictionaries.
|
30
|
-
"""
|
31
|
-
pass
|
32
|
-
|
33
|
-
|
34
30
|
@dataclass
|
35
31
|
class GzipParser(Parser):
|
36
32
|
inner_parser: Parser
|
37
33
|
|
38
|
-
def parse(
|
39
|
-
self,
|
40
|
-
data: BufferedIOBase,
|
41
|
-
) -> Generator[MutableMapping[str, Any], None, None]:
|
34
|
+
def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE:
|
42
35
|
"""
|
43
36
|
Decompress gzipped bytes and pass decompressed data to the inner parser.
|
37
|
+
|
38
|
+
IMPORTANT:
|
39
|
+
- If the data is not gzipped, reset the pointer and pass the data to the inner parser as is.
|
40
|
+
|
41
|
+
Note:
|
42
|
+
- The data is not decoded by default.
|
44
43
|
"""
|
44
|
+
|
45
45
|
with gzip.GzipFile(fileobj=data, mode="rb") as gzipobj:
|
46
46
|
yield from self.inner_parser.parse(gzipobj)
|
47
47
|
|
@@ -50,7 +50,7 @@ class GzipParser(Parser):
|
|
50
50
|
class JsonParser(Parser):
|
51
51
|
encoding: str = "utf-8"
|
52
52
|
|
53
|
-
def parse(self, data: BufferedIOBase) ->
|
53
|
+
def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE:
|
54
54
|
"""
|
55
55
|
Attempts to deserialize data using orjson library. As an extra layer of safety we fallback on the json library to deserialize the data.
|
56
56
|
"""
|
@@ -90,10 +90,7 @@ class JsonParser(Parser):
|
|
90
90
|
class JsonLineParser(Parser):
|
91
91
|
encoding: Optional[str] = "utf-8"
|
92
92
|
|
93
|
-
def parse(
|
94
|
-
self,
|
95
|
-
data: BufferedIOBase,
|
96
|
-
) -> Generator[MutableMapping[str, Any], None, None]:
|
93
|
+
def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE:
|
97
94
|
for line in data:
|
98
95
|
try:
|
99
96
|
yield json.loads(line.decode(encoding=self.encoding or "utf-8"))
|
@@ -117,10 +114,7 @@ class CsvParser(Parser):
|
|
117
114
|
|
118
115
|
return self.delimiter
|
119
116
|
|
120
|
-
def parse(
|
121
|
-
self,
|
122
|
-
data: BufferedIOBase,
|
123
|
-
) -> Generator[MutableMapping[str, Any], None, None]:
|
117
|
+
def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE:
|
124
118
|
"""
|
125
119
|
Parse CSV data from decompressed bytes.
|
126
120
|
"""
|
@@ -130,27 +124,95 @@ class CsvParser(Parser):
|
|
130
124
|
yield row
|
131
125
|
|
132
126
|
|
133
|
-
@dataclass
|
134
127
|
class CompositeRawDecoder(Decoder):
|
135
128
|
"""
|
136
|
-
Decoder strategy to transform a requests.Response into a
|
129
|
+
Decoder strategy to transform a requests.Response into a PARSER_OUTPUT_TYPE
|
137
130
|
passed response.raw to parser(s).
|
138
|
-
|
139
|
-
|
131
|
+
|
132
|
+
Note: response.raw is not decoded/decompressed by default. Parsers should be instantiated recursively.
|
133
|
+
|
140
134
|
Example:
|
141
|
-
|
135
|
+
composite_raw_decoder = CompositeRawDecoder(
|
136
|
+
parser=GzipParser(
|
137
|
+
inner_parser=JsonLineParser(encoding="iso-8859-1")
|
138
|
+
)
|
139
|
+
)
|
142
140
|
"""
|
143
141
|
|
144
|
-
|
145
|
-
|
142
|
+
def __init__(
|
143
|
+
self,
|
144
|
+
parser: Parser,
|
145
|
+
stream_response: bool = True,
|
146
|
+
parsers_by_header: PARSERS_BY_HEADER_TYPE = None,
|
147
|
+
) -> None:
|
148
|
+
# since we moved from using `dataclass` to `__init__` method,
|
149
|
+
# we need to keep using the `parser` to be able to resolve the depenencies
|
150
|
+
# between the parsers correctly.
|
151
|
+
self.parser = parser
|
152
|
+
|
153
|
+
self._parsers_by_header = parsers_by_header if parsers_by_header else {}
|
154
|
+
self._stream_response = stream_response
|
155
|
+
|
156
|
+
@classmethod
|
157
|
+
def by_headers(
|
158
|
+
cls,
|
159
|
+
parsers: PARSERS_TYPE,
|
160
|
+
stream_response: bool,
|
161
|
+
fallback_parser: Parser,
|
162
|
+
) -> "CompositeRawDecoder":
|
163
|
+
"""
|
164
|
+
Create a CompositeRawDecoder instance based on header values.
|
165
|
+
|
166
|
+
Args:
|
167
|
+
parsers (PARSERS_TYPE): A list of tuples where each tuple contains headers, header values, and a parser.
|
168
|
+
stream_response (bool): A flag indicating whether the response should be streamed.
|
169
|
+
fallback_parser (Parser): A parser to use if no matching header is found.
|
170
|
+
|
171
|
+
Returns:
|
172
|
+
CompositeRawDecoder: An instance of CompositeRawDecoder configured with the provided parsers.
|
173
|
+
"""
|
174
|
+
parsers_by_header = {}
|
175
|
+
for headers, header_values, parser in parsers:
|
176
|
+
for header in headers:
|
177
|
+
parsers_by_header[header] = {header_value: parser for header_value in header_values}
|
178
|
+
return cls(fallback_parser, stream_response, parsers_by_header)
|
146
179
|
|
147
180
|
def is_stream_response(self) -> bool:
|
148
|
-
return self.
|
181
|
+
return self._stream_response
|
149
182
|
|
150
|
-
def decode(
|
151
|
-
|
152
|
-
) -> Generator[MutableMapping[str, Any], None, None]:
|
183
|
+
def decode(self, response: requests.Response) -> DECODER_OUTPUT_TYPE:
|
184
|
+
parser = self._select_parser(response)
|
153
185
|
if self.is_stream_response():
|
154
|
-
|
186
|
+
# urllib mentions that some interfaces don't play nice with auto_close
|
187
|
+
# More info here: https://urllib3.readthedocs.io/en/stable/user-guide.html#using-io-wrappers-with-response-content
|
188
|
+
# We have indeed observed some issues with CSV parsing.
|
189
|
+
# Hence, we will manage the closing of the file ourselves until we find a better solution.
|
190
|
+
response.raw.auto_close = False
|
191
|
+
yield from parser.parse(
|
192
|
+
data=response.raw, # type: ignore[arg-type]
|
193
|
+
)
|
194
|
+
response.raw.close()
|
155
195
|
else:
|
156
|
-
yield from
|
196
|
+
yield from parser.parse(data=io.BytesIO(response.content))
|
197
|
+
|
198
|
+
def _select_parser(self, response: requests.Response) -> Parser:
|
199
|
+
"""
|
200
|
+
Selects the appropriate parser based on the response headers.
|
201
|
+
|
202
|
+
This method iterates through the `_parsers_by_header` dictionary to find a matching parser
|
203
|
+
based on the headers in the response. If a matching header and header value are found,
|
204
|
+
the corresponding parser is returned. If no match is found, the default parser is returned.
|
205
|
+
|
206
|
+
Args:
|
207
|
+
response (requests.Response): The HTTP response object containing headers to check.
|
208
|
+
|
209
|
+
Returns:
|
210
|
+
Parser: The parser corresponding to the matched header value, or the default parser if no match is found.
|
211
|
+
"""
|
212
|
+
for header, parser_by_header_value in self._parsers_by_header.items():
|
213
|
+
if (
|
214
|
+
header in response.headers
|
215
|
+
and response.headers[header] in parser_by_header_value.keys()
|
216
|
+
):
|
217
|
+
return parser_by_header_value[response.headers[header]]
|
218
|
+
return self.parser
|
@@ -8,6 +8,8 @@ from typing import Any, Generator, MutableMapping
|
|
8
8
|
|
9
9
|
import requests
|
10
10
|
|
11
|
+
DECODER_OUTPUT_TYPE = Generator[MutableMapping[str, Any], None, None]
|
12
|
+
|
11
13
|
|
12
14
|
@dataclass
|
13
15
|
class Decoder:
|
@@ -22,9 +24,7 @@ class Decoder:
|
|
22
24
|
"""
|
23
25
|
|
24
26
|
@abstractmethod
|
25
|
-
def decode(
|
26
|
-
self, response: requests.Response
|
27
|
-
) -> Generator[MutableMapping[str, Any], None, None]:
|
27
|
+
def decode(self, response: requests.Response) -> DECODER_OUTPUT_TYPE:
|
28
28
|
"""
|
29
29
|
Decodes a requests.Response into a Mapping[str, Any] or an array
|
30
30
|
:param response: the response to decode
|
@@ -0,0 +1,30 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
+
#
|
4
|
+
|
5
|
+
|
6
|
+
import logging
|
7
|
+
from abc import ABC, abstractmethod
|
8
|
+
from dataclasses import dataclass
|
9
|
+
from io import BufferedIOBase
|
10
|
+
from typing import Any, Dict, Generator, List, MutableMapping, Optional, Set, Tuple
|
11
|
+
|
12
|
+
logger = logging.getLogger("airbyte")
|
13
|
+
|
14
|
+
|
15
|
+
PARSER_OUTPUT_TYPE = Generator[MutableMapping[str, Any], None, None]
|
16
|
+
|
17
|
+
|
18
|
+
@dataclass
|
19
|
+
class Parser(ABC):
|
20
|
+
@abstractmethod
|
21
|
+
def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE:
|
22
|
+
"""
|
23
|
+
Parse data and yield dictionaries.
|
24
|
+
"""
|
25
|
+
pass
|
26
|
+
|
27
|
+
|
28
|
+
# reusable parser types
|
29
|
+
PARSERS_TYPE = List[Tuple[Set[str], Set[str], Parser]]
|
30
|
+
PARSERS_BY_HEADER_TYPE = Optional[Dict[str, Dict[str, Parser]]]
|
@@ -6,16 +6,13 @@ import logging
|
|
6
6
|
import zipfile
|
7
7
|
from dataclasses import dataclass
|
8
8
|
from io import BytesIO
|
9
|
-
from typing import Any, Generator, MutableMapping
|
10
9
|
|
11
|
-
import orjson
|
12
10
|
import requests
|
13
11
|
|
14
12
|
from airbyte_cdk.models import FailureType
|
15
13
|
from airbyte_cdk.sources.declarative.decoders import Decoder
|
16
|
-
from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import
|
17
|
-
|
18
|
-
)
|
14
|
+
from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import Parser
|
15
|
+
from airbyte_cdk.sources.declarative.decoders.decoder import DECODER_OUTPUT_TYPE
|
19
16
|
from airbyte_cdk.utils import AirbyteTracedException
|
20
17
|
|
21
18
|
logger = logging.getLogger("airbyte")
|
@@ -28,16 +25,16 @@ class ZipfileDecoder(Decoder):
|
|
28
25
|
def is_stream_response(self) -> bool:
|
29
26
|
return False
|
30
27
|
|
31
|
-
def decode(
|
32
|
-
self, response: requests.Response
|
33
|
-
) -> Generator[MutableMapping[str, Any], None, None]:
|
28
|
+
def decode(self, response: requests.Response) -> DECODER_OUTPUT_TYPE:
|
34
29
|
try:
|
35
30
|
with zipfile.ZipFile(BytesIO(response.content)) as zip_file:
|
36
31
|
for file_name in zip_file.namelist():
|
37
32
|
unzipped_content = zip_file.read(file_name)
|
38
33
|
buffered_content = BytesIO(unzipped_content)
|
39
34
|
try:
|
40
|
-
yield from self.parser.parse(
|
35
|
+
yield from self.parser.parse(
|
36
|
+
buffered_content,
|
37
|
+
)
|
41
38
|
except Exception as e:
|
42
39
|
logger.error(
|
43
40
|
f"Failed to parse file: {file_name} from zip file: {response.request.url} with exception {e}."
|
@@ -15,7 +15,6 @@ from numpy import nan
|
|
15
15
|
|
16
16
|
from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
|
17
17
|
|
18
|
-
EMPTY_STR: str = ""
|
19
18
|
DEFAULT_ENCODING: str = "utf-8"
|
20
19
|
DOWNLOAD_CHUNK_SIZE: int = 1024 * 10
|
21
20
|
|
@@ -136,7 +135,6 @@ class ResponseToFileExtractor(RecordExtractor):
|
|
136
135
|
"""
|
137
136
|
|
138
137
|
try:
|
139
|
-
# TODO: Add support for other file types, like `json`, with `pd.read_json()`
|
140
138
|
with open(path, "r", encoding=file_encoding) as data:
|
141
139
|
chunks = pd.read_csv(
|
142
140
|
data, chunksize=chunk_size, iterator=True, dialect="unix", dtype=object
|
@@ -12,6 +12,8 @@ import pytz
|
|
12
12
|
from dateutil import parser
|
13
13
|
from isodate import parse_duration
|
14
14
|
|
15
|
+
from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser
|
16
|
+
|
15
17
|
"""
|
16
18
|
This file contains macros that can be evaluated by a `JinjaInterpolation` object
|
17
19
|
"""
|
@@ -171,11 +173,7 @@ def format_datetime(
|
|
171
173
|
dt_datetime = (
|
172
174
|
datetime.datetime.strptime(dt, input_format) if input_format else str_to_datetime(dt)
|
173
175
|
)
|
174
|
-
|
175
|
-
return str(int(dt_datetime.timestamp()))
|
176
|
-
elif format == "%ms":
|
177
|
-
return str(int(dt_datetime.timestamp() * 1_000_000))
|
178
|
-
return dt_datetime.strftime(format)
|
176
|
+
return DatetimeParser().format(dt=dt_datetime, format=format)
|
179
177
|
|
180
178
|
|
181
179
|
_macros_list = [
|
@@ -939,7 +939,7 @@ class MinMaxDatetime(BaseModel):
|
|
939
939
|
)
|
940
940
|
datetime_format: Optional[str] = Field(
|
941
941
|
"",
|
942
|
-
description='Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (Sunday as first day) - `00`, `01`, ..., `53`\n * **%W**: Week number of the year (Monday as first day) - `00`, `01`, ..., `53`\n * **%c**: Date and time representation - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date representation - `08/16/1988`\n * **%X**: Time representation - `21:30:00`\n * **%%**: Literal \'%\' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n',
|
942
|
+
description='Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`\n * **%_ms**: Millisecond (zero-padded to 3 digits) - `000`, `001`, ..., `999`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (Sunday as first day) - `00`, `01`, ..., `53`\n * **%W**: Week number of the year (Monday as first day) - `00`, `01`, ..., `53`\n * **%c**: Date and time representation - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date representation - `08/16/1988`\n * **%X**: Time representation - `21:30:00`\n * **%%**: Literal \'%\' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n',
|
943
943
|
examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s"],
|
944
944
|
title="Datetime Format",
|
945
945
|
)
|
@@ -1545,7 +1545,7 @@ class DatetimeBasedCursor(BaseModel):
|
|
1545
1545
|
)
|
1546
1546
|
datetime_format: str = Field(
|
1547
1547
|
...,
|
1548
|
-
description="The datetime format used to format the datetime values that are sent in outgoing requests to the API. Use placeholders starting with \"%\" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp (milliseconds) - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date standard format - `08/16/1988`\n * **%X**: Time standard format - `21:30:00`\n * **%%**: Literal '%' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n",
|
1548
|
+
description="The datetime format used to format the datetime values that are sent in outgoing requests to the API. Use placeholders starting with \"%\" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp (milliseconds) - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n * **%_ms**: Millisecond (zero-padded to 3 digits) - `000`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date standard format - `08/16/1988`\n * **%X**: Time standard format - `21:30:00`\n * **%%**: Literal '%' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n",
|
1549
1549
|
examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s", "%ms", "%s_as_float"],
|
1550
1550
|
title="Outgoing Datetime Format",
|
1551
1551
|
)
|
@@ -2072,12 +2072,14 @@ class HttpRequester(BaseModel):
|
|
2072
2072
|
description="Base URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.",
|
2073
2073
|
examples=[
|
2074
2074
|
"https://connect.squareup.com/v2",
|
2075
|
-
"{{ config['base_url'] or 'https://app.posthog.com'}}/api
|
2075
|
+
"{{ config['base_url'] or 'https://app.posthog.com'}}/api",
|
2076
|
+
"https://connect.squareup.com/v2/quotes/{{ stream_partition['id'] }}/quote_line_groups",
|
2077
|
+
"https://example.com/api/v1/resource/{{ next_page_token['id'] }}",
|
2076
2078
|
],
|
2077
2079
|
title="API Base URL",
|
2078
2080
|
)
|
2079
|
-
path: str = Field(
|
2080
|
-
|
2081
|
+
path: Optional[str] = Field(
|
2082
|
+
None,
|
2081
2083
|
description="Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.",
|
2082
2084
|
examples=[
|
2083
2085
|
"/products",
|
@@ -56,7 +56,7 @@ from airbyte_cdk.sources.declarative.auth.token_provider import (
|
|
56
56
|
)
|
57
57
|
from airbyte_cdk.sources.declarative.checks import CheckDynamicStream, CheckStream
|
58
58
|
from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel
|
59
|
-
from airbyte_cdk.sources.declarative.datetime import MinMaxDatetime
|
59
|
+
from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime
|
60
60
|
from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
|
61
61
|
from airbyte_cdk.sources.declarative.decoders import (
|
62
62
|
Decoder,
|
@@ -2193,18 +2193,40 @@ class ModelToComponentFactory:
|
|
2193
2193
|
stream_response=False if self._emit_connector_builder_messages else True,
|
2194
2194
|
)
|
2195
2195
|
|
2196
|
-
|
2197
|
-
|
2196
|
+
def create_jsonl_decoder(
|
2197
|
+
self, model: JsonlDecoderModel, config: Config, **kwargs: Any
|
2198
|
+
) -> Decoder:
|
2198
2199
|
return CompositeRawDecoder(
|
2199
|
-
parser=ModelToComponentFactory._get_parser(model, config),
|
2200
|
+
parser=ModelToComponentFactory._get_parser(model, config),
|
2201
|
+
stream_response=False if self._emit_connector_builder_messages else True,
|
2200
2202
|
)
|
2201
2203
|
|
2202
2204
|
def create_gzip_decoder(
|
2203
2205
|
self, model: GzipDecoderModel, config: Config, **kwargs: Any
|
2204
2206
|
) -> Decoder:
|
2205
|
-
|
2206
|
-
|
2207
|
-
|
2207
|
+
_compressed_response_types = {
|
2208
|
+
"gzip",
|
2209
|
+
"x-gzip",
|
2210
|
+
"gzip, deflate",
|
2211
|
+
"x-gzip, deflate",
|
2212
|
+
"application/zip",
|
2213
|
+
"application/gzip",
|
2214
|
+
"application/x-gzip",
|
2215
|
+
"application/x-zip-compressed",
|
2216
|
+
}
|
2217
|
+
|
2218
|
+
gzip_parser: GzipParser = ModelToComponentFactory._get_parser(model, config) # type: ignore # based on the model, we know this will be a GzipParser
|
2219
|
+
|
2220
|
+
if self._emit_connector_builder_messages:
|
2221
|
+
# This is very surprising but if the response is not streamed,
|
2222
|
+
# CompositeRawDecoder calls response.content and the requests library actually uncompress the data as opposed to response.raw,
|
2223
|
+
# which uses urllib3 directly and does not uncompress the data.
|
2224
|
+
return CompositeRawDecoder(gzip_parser.inner_parser, False)
|
2225
|
+
|
2226
|
+
return CompositeRawDecoder.by_headers(
|
2227
|
+
[({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)],
|
2228
|
+
stream_response=True,
|
2229
|
+
fallback_parser=gzip_parser.inner_parser,
|
2208
2230
|
)
|
2209
2231
|
|
2210
2232
|
@staticmethod
|
@@ -2753,7 +2775,10 @@ class ModelToComponentFactory:
|
|
2753
2775
|
)
|
2754
2776
|
paginator = (
|
2755
2777
|
self._create_component_from_model(
|
2756
|
-
model=model.download_paginator,
|
2778
|
+
model=model.download_paginator,
|
2779
|
+
decoder=decoder,
|
2780
|
+
config=config,
|
2781
|
+
url_base="",
|
2757
2782
|
)
|
2758
2783
|
if model.download_paginator
|
2759
2784
|
else NoPagination(parameters={})
|
@@ -2870,7 +2895,10 @@ class ModelToComponentFactory:
|
|
2870
2895
|
model=model.status_extractor, decoder=decoder, config=config, name=name
|
2871
2896
|
)
|
2872
2897
|
download_target_extractor = self._create_component_from_model(
|
2873
|
-
model=model.download_target_extractor,
|
2898
|
+
model=model.download_target_extractor,
|
2899
|
+
decoder=decoder,
|
2900
|
+
config=config,
|
2901
|
+
name=name,
|
2874
2902
|
)
|
2875
2903
|
job_repository: AsyncJobRepository = AsyncHttpJobRepository(
|
2876
2904
|
creation_requester=creation_requester,
|