airbyte-cdk 6.38.0.dev0__py3-none-any.whl → 6.38.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. airbyte_cdk/entrypoint.py +6 -6
  2. airbyte_cdk/logger.py +1 -4
  3. airbyte_cdk/sources/declarative/datetime/__init__.py +0 -4
  4. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +4 -0
  5. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +10 -2
  6. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +104 -42
  7. airbyte_cdk/sources/declarative/decoders/decoder.py +3 -3
  8. airbyte_cdk/sources/declarative/decoders/decoder_parser.py +30 -0
  9. airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +6 -9
  10. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +0 -2
  11. airbyte_cdk/sources/declarative/interpolation/macros.py +3 -5
  12. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +7 -5
  13. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +37 -9
  14. airbyte_cdk/sources/declarative/requesters/http_requester.py +77 -25
  15. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +25 -4
  16. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +6 -1
  17. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +7 -2
  18. airbyte_cdk/sources/declarative/requesters/requester.py +7 -1
  19. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +21 -4
  20. airbyte_cdk/sources/declarative/yaml_declarative_source.py +0 -1
  21. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +3 -3
  22. airbyte_cdk/sources/types.py +1 -0
  23. airbyte_cdk/utils/mapping_helpers.py +18 -1
  24. {airbyte_cdk-6.38.0.dev0.dist-info → airbyte_cdk-6.38.2.dist-info}/METADATA +3 -3
  25. {airbyte_cdk-6.38.0.dev0.dist-info → airbyte_cdk-6.38.2.dist-info}/RECORD +29 -33
  26. airbyte_cdk/sources/embedded/__init__.py +0 -3
  27. airbyte_cdk/sources/embedded/base_integration.py +0 -61
  28. airbyte_cdk/sources/embedded/catalog.py +0 -57
  29. airbyte_cdk/sources/embedded/runner.py +0 -57
  30. airbyte_cdk/sources/embedded/tools.py +0 -27
  31. {airbyte_cdk-6.38.0.dev0.dist-info → airbyte_cdk-6.38.2.dist-info}/LICENSE.txt +0 -0
  32. {airbyte_cdk-6.38.0.dev0.dist-info → airbyte_cdk-6.38.2.dist-info}/LICENSE_SHORT +0 -0
  33. {airbyte_cdk-6.38.0.dev0.dist-info → airbyte_cdk-6.38.2.dist-info}/WHEEL +0 -0
  34. {airbyte_cdk-6.38.0.dev0.dist-info → airbyte_cdk-6.38.2.dist-info}/entry_points.txt +0 -0
airbyte_cdk/entrypoint.py CHANGED
@@ -22,7 +22,7 @@ from requests import PreparedRequest, Response, Session
22
22
 
23
23
  from airbyte_cdk.connector import TConfig
24
24
  from airbyte_cdk.exception_handler import init_uncaught_exception_handler
25
- from airbyte_cdk.logger import PRINT_BUFFER, init_logger
25
+ from airbyte_cdk.logger import init_logger
26
26
  from airbyte_cdk.models import (
27
27
  AirbyteConnectionStatus,
28
28
  AirbyteMessage,
@@ -337,11 +337,11 @@ def launch(source: Source, args: List[str]) -> None:
337
337
  parsed_args = source_entrypoint.parse_args(args)
338
338
  # temporarily removes the PrintBuffer because we're seeing weird print behavior for concurrent syncs
339
339
  # Refer to: https://github.com/airbytehq/oncall/issues/6235
340
- with PRINT_BUFFER:
341
- for message in source_entrypoint.run(parsed_args):
342
- # simply printing is creating issues for concurrent CDK as Python uses different two instructions to print: one for the message and
343
- # the other for the break line. Adding `\n` to the message ensure that both are printed at the same time
344
- print(f"{message}\n", end="")
340
+ # with PrintBuffer():
341
+ for message in source_entrypoint.run(parsed_args):
342
+ # simply printing is creating issues for concurrent CDK as Python uses different two instructions to print: one for the message and
343
+ # the other for the break line. Adding `\n` to the message ensure that both are printed at the same time
344
+ print(f"{message}\n", end="", flush=True)
345
345
 
346
346
 
347
347
  def _init_internal_request_filter() -> None:
airbyte_cdk/logger.py CHANGED
@@ -16,11 +16,8 @@ from airbyte_cdk.models import (
16
16
  Level,
17
17
  Type,
18
18
  )
19
- from airbyte_cdk.utils import PrintBuffer
20
19
  from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
21
20
 
22
- PRINT_BUFFER = PrintBuffer(flush_interval=0.1)
23
-
24
21
  LOGGING_CONFIG = {
25
22
  "version": 1,
26
23
  "disable_existing_loggers": False,
@@ -30,7 +27,7 @@ LOGGING_CONFIG = {
30
27
  "handlers": {
31
28
  "console": {
32
29
  "class": "logging.StreamHandler",
33
- "stream": PRINT_BUFFER,
30
+ "stream": "ext://sys.stdout",
34
31
  "formatter": "airbyte",
35
32
  },
36
33
  },
@@ -1,7 +1,3 @@
1
1
  #
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
-
5
- from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime
6
-
7
- __all__ = ["MinMaxDatetime"]
@@ -29,6 +29,8 @@ class DatetimeParser:
29
29
  return datetime.datetime.fromtimestamp(int(date), tz=datetime.timezone.utc)
30
30
  elif format == "%s_as_float":
31
31
  return datetime.datetime.fromtimestamp(float(date), tz=datetime.timezone.utc)
32
+ elif format == "%epoch_microseconds":
33
+ return self._UNIX_EPOCH + datetime.timedelta(microseconds=int(date))
32
34
  elif format == "%ms":
33
35
  return self._UNIX_EPOCH + datetime.timedelta(milliseconds=int(date))
34
36
  elif "%_ms" in format:
@@ -46,6 +48,8 @@ class DatetimeParser:
46
48
  return str(int(dt.timestamp()))
47
49
  if format == "%s_as_float":
48
50
  return str(float(dt.timestamp()))
51
+ if format == "%epoch_microseconds":
52
+ return str(int(dt.timestamp() * 1_000_000))
49
53
  if format == "%ms":
50
54
  # timstamp() returns a float representing the number of seconds since the unix epoch
51
55
  return str(int(dt.timestamp() * 1000))
@@ -1794,7 +1794,6 @@ definitions:
1794
1794
  type: object
1795
1795
  required:
1796
1796
  - type
1797
- - path
1798
1797
  - url_base
1799
1798
  properties:
1800
1799
  type:
@@ -1806,9 +1805,18 @@ definitions:
1806
1805
  type: string
1807
1806
  interpolation_context:
1808
1807
  - config
1808
+ - next_page_token
1809
+ - stream_interval
1810
+ - stream_partition
1811
+ - stream_slice
1812
+ - creation_response
1813
+ - polling_response
1814
+ - download_target
1809
1815
  examples:
1810
1816
  - "https://connect.squareup.com/v2"
1811
- - "{{ config['base_url'] or 'https://app.posthog.com'}}/api/"
1817
+ - "{{ config['base_url'] or 'https://app.posthog.com'}}/api"
1818
+ - "https://connect.squareup.com/v2/quotes/{{ stream_partition['id'] }}/quote_line_groups"
1819
+ - "https://example.com/api/v1/resource/{{ next_page_token['id'] }}"
1812
1820
  path:
1813
1821
  title: URL Path
1814
1822
  description: Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.
@@ -1,47 +1,47 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
1
5
  import csv
2
6
  import gzip
3
7
  import io
4
8
  import json
5
9
  import logging
6
- from abc import ABC, abstractmethod
7
10
  from dataclasses import dataclass
8
11
  from io import BufferedIOBase, TextIOWrapper
9
- from typing import Any, Generator, MutableMapping, Optional
12
+ from typing import Any, Optional
10
13
 
11
14
  import orjson
12
15
  import requests
13
16
 
14
17
  from airbyte_cdk.models import FailureType
15
- from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
18
+ from airbyte_cdk.sources.declarative.decoders.decoder import DECODER_OUTPUT_TYPE, Decoder
19
+ from airbyte_cdk.sources.declarative.decoders.decoder_parser import (
20
+ PARSER_OUTPUT_TYPE,
21
+ PARSERS_BY_HEADER_TYPE,
22
+ PARSERS_TYPE,
23
+ Parser,
24
+ )
16
25
  from airbyte_cdk.utils import AirbyteTracedException
17
26
 
18
27
  logger = logging.getLogger("airbyte")
19
28
 
20
29
 
21
- @dataclass
22
- class Parser(ABC):
23
- @abstractmethod
24
- def parse(
25
- self,
26
- data: BufferedIOBase,
27
- ) -> Generator[MutableMapping[str, Any], None, None]:
28
- """
29
- Parse data and yield dictionaries.
30
- """
31
- pass
32
-
33
-
34
30
  @dataclass
35
31
  class GzipParser(Parser):
36
32
  inner_parser: Parser
37
33
 
38
- def parse(
39
- self,
40
- data: BufferedIOBase,
41
- ) -> Generator[MutableMapping[str, Any], None, None]:
34
+ def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE:
42
35
  """
43
36
  Decompress gzipped bytes and pass decompressed data to the inner parser.
37
+
38
+ IMPORTANT:
39
+ - If the data is not gzipped, reset the pointer and pass the data to the inner parser as is.
40
+
41
+ Note:
42
+ - The data is not decoded by default.
44
43
  """
44
+
45
45
  with gzip.GzipFile(fileobj=data, mode="rb") as gzipobj:
46
46
  yield from self.inner_parser.parse(gzipobj)
47
47
 
@@ -50,7 +50,7 @@ class GzipParser(Parser):
50
50
  class JsonParser(Parser):
51
51
  encoding: str = "utf-8"
52
52
 
53
- def parse(self, data: BufferedIOBase) -> Generator[MutableMapping[str, Any], None, None]:
53
+ def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE:
54
54
  """
55
55
  Attempts to deserialize data using orjson library. As an extra layer of safety we fallback on the json library to deserialize the data.
56
56
  """
@@ -90,10 +90,7 @@ class JsonParser(Parser):
90
90
  class JsonLineParser(Parser):
91
91
  encoding: Optional[str] = "utf-8"
92
92
 
93
- def parse(
94
- self,
95
- data: BufferedIOBase,
96
- ) -> Generator[MutableMapping[str, Any], None, None]:
93
+ def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE:
97
94
  for line in data:
98
95
  try:
99
96
  yield json.loads(line.decode(encoding=self.encoding or "utf-8"))
@@ -117,10 +114,7 @@ class CsvParser(Parser):
117
114
 
118
115
  return self.delimiter
119
116
 
120
- def parse(
121
- self,
122
- data: BufferedIOBase,
123
- ) -> Generator[MutableMapping[str, Any], None, None]:
117
+ def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE:
124
118
  """
125
119
  Parse CSV data from decompressed bytes.
126
120
  """
@@ -130,27 +124,95 @@ class CsvParser(Parser):
130
124
  yield row
131
125
 
132
126
 
133
- @dataclass
134
127
  class CompositeRawDecoder(Decoder):
135
128
  """
136
- Decoder strategy to transform a requests.Response into a Generator[MutableMapping[str, Any], None, None]
129
+ Decoder strategy to transform a requests.Response into a PARSER_OUTPUT_TYPE
137
130
  passed response.raw to parser(s).
138
- Note: response.raw is not decoded/decompressed by default.
139
- parsers should be instantiated recursively.
131
+
132
+ Note: response.raw is not decoded/decompressed by default. Parsers should be instantiated recursively.
133
+
140
134
  Example:
141
- composite_raw_decoder = CompositeRawDecoder(parser=GzipParser(inner_parser=JsonLineParser(encoding="iso-8859-1")))
135
+ composite_raw_decoder = CompositeRawDecoder(
136
+ parser=GzipParser(
137
+ inner_parser=JsonLineParser(encoding="iso-8859-1")
138
+ )
139
+ )
142
140
  """
143
141
 
144
- parser: Parser
145
- stream_response: bool = True
142
+ def __init__(
143
+ self,
144
+ parser: Parser,
145
+ stream_response: bool = True,
146
+ parsers_by_header: PARSERS_BY_HEADER_TYPE = None,
147
+ ) -> None:
148
+ # since we moved from using `dataclass` to `__init__` method,
149
+ # we need to keep using the `parser` to be able to resolve the depenencies
150
+ # between the parsers correctly.
151
+ self.parser = parser
152
+
153
+ self._parsers_by_header = parsers_by_header if parsers_by_header else {}
154
+ self._stream_response = stream_response
155
+
156
+ @classmethod
157
+ def by_headers(
158
+ cls,
159
+ parsers: PARSERS_TYPE,
160
+ stream_response: bool,
161
+ fallback_parser: Parser,
162
+ ) -> "CompositeRawDecoder":
163
+ """
164
+ Create a CompositeRawDecoder instance based on header values.
165
+
166
+ Args:
167
+ parsers (PARSERS_TYPE): A list of tuples where each tuple contains headers, header values, and a parser.
168
+ stream_response (bool): A flag indicating whether the response should be streamed.
169
+ fallback_parser (Parser): A parser to use if no matching header is found.
170
+
171
+ Returns:
172
+ CompositeRawDecoder: An instance of CompositeRawDecoder configured with the provided parsers.
173
+ """
174
+ parsers_by_header = {}
175
+ for headers, header_values, parser in parsers:
176
+ for header in headers:
177
+ parsers_by_header[header] = {header_value: parser for header_value in header_values}
178
+ return cls(fallback_parser, stream_response, parsers_by_header)
146
179
 
147
180
  def is_stream_response(self) -> bool:
148
- return self.stream_response
181
+ return self._stream_response
149
182
 
150
- def decode(
151
- self, response: requests.Response
152
- ) -> Generator[MutableMapping[str, Any], None, None]:
183
+ def decode(self, response: requests.Response) -> DECODER_OUTPUT_TYPE:
184
+ parser = self._select_parser(response)
153
185
  if self.is_stream_response():
154
- yield from self.parser.parse(data=response.raw) # type: ignore[arg-type]
186
+ # urllib mentions that some interfaces don't play nice with auto_close
187
+ # More info here: https://urllib3.readthedocs.io/en/stable/user-guide.html#using-io-wrappers-with-response-content
188
+ # We have indeed observed some issues with CSV parsing.
189
+ # Hence, we will manage the closing of the file ourselves until we find a better solution.
190
+ response.raw.auto_close = False
191
+ yield from parser.parse(
192
+ data=response.raw, # type: ignore[arg-type]
193
+ )
194
+ response.raw.close()
155
195
  else:
156
- yield from self.parser.parse(data=io.BytesIO(response.content))
196
+ yield from parser.parse(data=io.BytesIO(response.content))
197
+
198
+ def _select_parser(self, response: requests.Response) -> Parser:
199
+ """
200
+ Selects the appropriate parser based on the response headers.
201
+
202
+ This method iterates through the `_parsers_by_header` dictionary to find a matching parser
203
+ based on the headers in the response. If a matching header and header value are found,
204
+ the corresponding parser is returned. If no match is found, the default parser is returned.
205
+
206
+ Args:
207
+ response (requests.Response): The HTTP response object containing headers to check.
208
+
209
+ Returns:
210
+ Parser: The parser corresponding to the matched header value, or the default parser if no match is found.
211
+ """
212
+ for header, parser_by_header_value in self._parsers_by_header.items():
213
+ if (
214
+ header in response.headers
215
+ and response.headers[header] in parser_by_header_value.keys()
216
+ ):
217
+ return parser_by_header_value[response.headers[header]]
218
+ return self.parser
@@ -8,6 +8,8 @@ from typing import Any, Generator, MutableMapping
8
8
 
9
9
  import requests
10
10
 
11
+ DECODER_OUTPUT_TYPE = Generator[MutableMapping[str, Any], None, None]
12
+
11
13
 
12
14
  @dataclass
13
15
  class Decoder:
@@ -22,9 +24,7 @@ class Decoder:
22
24
  """
23
25
 
24
26
  @abstractmethod
25
- def decode(
26
- self, response: requests.Response
27
- ) -> Generator[MutableMapping[str, Any], None, None]:
27
+ def decode(self, response: requests.Response) -> DECODER_OUTPUT_TYPE:
28
28
  """
29
29
  Decodes a requests.Response into a Mapping[str, Any] or an array
30
30
  :param response: the response to decode
@@ -0,0 +1,30 @@
1
+ #
2
+ # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+
6
+ import logging
7
+ from abc import ABC, abstractmethod
8
+ from dataclasses import dataclass
9
+ from io import BufferedIOBase
10
+ from typing import Any, Dict, Generator, List, MutableMapping, Optional, Set, Tuple
11
+
12
+ logger = logging.getLogger("airbyte")
13
+
14
+
15
+ PARSER_OUTPUT_TYPE = Generator[MutableMapping[str, Any], None, None]
16
+
17
+
18
+ @dataclass
19
+ class Parser(ABC):
20
+ @abstractmethod
21
+ def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE:
22
+ """
23
+ Parse data and yield dictionaries.
24
+ """
25
+ pass
26
+
27
+
28
+ # reusable parser types
29
+ PARSERS_TYPE = List[Tuple[Set[str], Set[str], Parser]]
30
+ PARSERS_BY_HEADER_TYPE = Optional[Dict[str, Dict[str, Parser]]]
@@ -6,16 +6,13 @@ import logging
6
6
  import zipfile
7
7
  from dataclasses import dataclass
8
8
  from io import BytesIO
9
- from typing import Any, Generator, MutableMapping
10
9
 
11
- import orjson
12
10
  import requests
13
11
 
14
12
  from airbyte_cdk.models import FailureType
15
13
  from airbyte_cdk.sources.declarative.decoders import Decoder
16
- from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
17
- Parser,
18
- )
14
+ from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import Parser
15
+ from airbyte_cdk.sources.declarative.decoders.decoder import DECODER_OUTPUT_TYPE
19
16
  from airbyte_cdk.utils import AirbyteTracedException
20
17
 
21
18
  logger = logging.getLogger("airbyte")
@@ -28,16 +25,16 @@ class ZipfileDecoder(Decoder):
28
25
  def is_stream_response(self) -> bool:
29
26
  return False
30
27
 
31
- def decode(
32
- self, response: requests.Response
33
- ) -> Generator[MutableMapping[str, Any], None, None]:
28
+ def decode(self, response: requests.Response) -> DECODER_OUTPUT_TYPE:
34
29
  try:
35
30
  with zipfile.ZipFile(BytesIO(response.content)) as zip_file:
36
31
  for file_name in zip_file.namelist():
37
32
  unzipped_content = zip_file.read(file_name)
38
33
  buffered_content = BytesIO(unzipped_content)
39
34
  try:
40
- yield from self.parser.parse(buffered_content)
35
+ yield from self.parser.parse(
36
+ buffered_content,
37
+ )
41
38
  except Exception as e:
42
39
  logger.error(
43
40
  f"Failed to parse file: {file_name} from zip file: {response.request.url} with exception {e}."
@@ -15,7 +15,6 @@ from numpy import nan
15
15
 
16
16
  from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
17
17
 
18
- EMPTY_STR: str = ""
19
18
  DEFAULT_ENCODING: str = "utf-8"
20
19
  DOWNLOAD_CHUNK_SIZE: int = 1024 * 10
21
20
 
@@ -136,7 +135,6 @@ class ResponseToFileExtractor(RecordExtractor):
136
135
  """
137
136
 
138
137
  try:
139
- # TODO: Add support for other file types, like `json`, with `pd.read_json()`
140
138
  with open(path, "r", encoding=file_encoding) as data:
141
139
  chunks = pd.read_csv(
142
140
  data, chunksize=chunk_size, iterator=True, dialect="unix", dtype=object
@@ -12,6 +12,8 @@ import pytz
12
12
  from dateutil import parser
13
13
  from isodate import parse_duration
14
14
 
15
+ from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser
16
+
15
17
  """
16
18
  This file contains macros that can be evaluated by a `JinjaInterpolation` object
17
19
  """
@@ -171,11 +173,7 @@ def format_datetime(
171
173
  dt_datetime = (
172
174
  datetime.datetime.strptime(dt, input_format) if input_format else str_to_datetime(dt)
173
175
  )
174
- if format == "%s":
175
- return str(int(dt_datetime.timestamp()))
176
- elif format == "%ms":
177
- return str(int(dt_datetime.timestamp() * 1_000_000))
178
- return dt_datetime.strftime(format)
176
+ return DatetimeParser().format(dt=dt_datetime, format=format)
179
177
 
180
178
 
181
179
  _macros_list = [
@@ -939,7 +939,7 @@ class MinMaxDatetime(BaseModel):
939
939
  )
940
940
  datetime_format: Optional[str] = Field(
941
941
  "",
942
- description='Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (Sunday as first day) - `00`, `01`, ..., `53`\n * **%W**: Week number of the year (Monday as first day) - `00`, `01`, ..., `53`\n * **%c**: Date and time representation - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date representation - `08/16/1988`\n * **%X**: Time representation - `21:30:00`\n * **%%**: Literal \'%\' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n',
942
+ description='Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`\n * **%_ms**: Millisecond (zero-padded to 3 digits) - `000`, `001`, ..., `999`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (Sunday as first day) - `00`, `01`, ..., `53`\n * **%W**: Week number of the year (Monday as first day) - `00`, `01`, ..., `53`\n * **%c**: Date and time representation - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date representation - `08/16/1988`\n * **%X**: Time representation - `21:30:00`\n * **%%**: Literal \'%\' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n',
943
943
  examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s"],
944
944
  title="Datetime Format",
945
945
  )
@@ -1545,7 +1545,7 @@ class DatetimeBasedCursor(BaseModel):
1545
1545
  )
1546
1546
  datetime_format: str = Field(
1547
1547
  ...,
1548
- description="The datetime format used to format the datetime values that are sent in outgoing requests to the API. Use placeholders starting with \"%\" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp (milliseconds) - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date standard format - `08/16/1988`\n * **%X**: Time standard format - `21:30:00`\n * **%%**: Literal '%' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n",
1548
+ description="The datetime format used to format the datetime values that are sent in outgoing requests to the API. Use placeholders starting with \"%\" to describe the format the API is using. The following placeholders are available:\n * **%s**: Epoch unix timestamp - `1686218963`\n * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n * **%ms**: Epoch unix timestamp (milliseconds) - `1686218963123`\n * **%a**: Weekday (abbreviated) - `Sun`\n * **%A**: Weekday (full) - `Sunday`\n * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n * **%b**: Month (abbreviated) - `Jan`\n * **%B**: Month (full) - `January`\n * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n * **%p**: AM/PM indicator\n * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n * **%_ms**: Millisecond (zero-padded to 3 digits) - `000`\n * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n * **%x**: Date standard format - `08/16/1988`\n * **%X**: Time standard format - `21:30:00`\n * **%%**: Literal '%' character\n\n Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n",
1549
1549
  examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s", "%ms", "%s_as_float"],
1550
1550
  title="Outgoing Datetime Format",
1551
1551
  )
@@ -2072,12 +2072,14 @@ class HttpRequester(BaseModel):
2072
2072
  description="Base URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.",
2073
2073
  examples=[
2074
2074
  "https://connect.squareup.com/v2",
2075
- "{{ config['base_url'] or 'https://app.posthog.com'}}/api/",
2075
+ "{{ config['base_url'] or 'https://app.posthog.com'}}/api",
2076
+ "https://connect.squareup.com/v2/quotes/{{ stream_partition['id'] }}/quote_line_groups",
2077
+ "https://example.com/api/v1/resource/{{ next_page_token['id'] }}",
2076
2078
  ],
2077
2079
  title="API Base URL",
2078
2080
  )
2079
- path: str = Field(
2080
- ...,
2081
+ path: Optional[str] = Field(
2082
+ None,
2081
2083
  description="Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.",
2082
2084
  examples=[
2083
2085
  "/products",
@@ -56,7 +56,7 @@ from airbyte_cdk.sources.declarative.auth.token_provider import (
56
56
  )
57
57
  from airbyte_cdk.sources.declarative.checks import CheckDynamicStream, CheckStream
58
58
  from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel
59
- from airbyte_cdk.sources.declarative.datetime import MinMaxDatetime
59
+ from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime
60
60
  from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
61
61
  from airbyte_cdk.sources.declarative.decoders import (
62
62
  Decoder,
@@ -2193,18 +2193,40 @@ class ModelToComponentFactory:
2193
2193
  stream_response=False if self._emit_connector_builder_messages else True,
2194
2194
  )
2195
2195
 
2196
- @staticmethod
2197
- def create_jsonl_decoder(model: JsonlDecoderModel, config: Config, **kwargs: Any) -> Decoder:
2196
+ def create_jsonl_decoder(
2197
+ self, model: JsonlDecoderModel, config: Config, **kwargs: Any
2198
+ ) -> Decoder:
2198
2199
  return CompositeRawDecoder(
2199
- parser=ModelToComponentFactory._get_parser(model, config), stream_response=True
2200
+ parser=ModelToComponentFactory._get_parser(model, config),
2201
+ stream_response=False if self._emit_connector_builder_messages else True,
2200
2202
  )
2201
2203
 
2202
2204
  def create_gzip_decoder(
2203
2205
  self, model: GzipDecoderModel, config: Config, **kwargs: Any
2204
2206
  ) -> Decoder:
2205
- return CompositeRawDecoder(
2206
- parser=ModelToComponentFactory._get_parser(model, config),
2207
- stream_response=False if self._emit_connector_builder_messages else True,
2207
+ _compressed_response_types = {
2208
+ "gzip",
2209
+ "x-gzip",
2210
+ "gzip, deflate",
2211
+ "x-gzip, deflate",
2212
+ "application/zip",
2213
+ "application/gzip",
2214
+ "application/x-gzip",
2215
+ "application/x-zip-compressed",
2216
+ }
2217
+
2218
+ gzip_parser: GzipParser = ModelToComponentFactory._get_parser(model, config) # type: ignore # based on the model, we know this will be a GzipParser
2219
+
2220
+ if self._emit_connector_builder_messages:
2221
+ # This is very surprising but if the response is not streamed,
2222
+ # CompositeRawDecoder calls response.content and the requests library actually uncompress the data as opposed to response.raw,
2223
+ # which uses urllib3 directly and does not uncompress the data.
2224
+ return CompositeRawDecoder(gzip_parser.inner_parser, False)
2225
+
2226
+ return CompositeRawDecoder.by_headers(
2227
+ [({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)],
2228
+ stream_response=True,
2229
+ fallback_parser=gzip_parser.inner_parser,
2208
2230
  )
2209
2231
 
2210
2232
  @staticmethod
@@ -2753,7 +2775,10 @@ class ModelToComponentFactory:
2753
2775
  )
2754
2776
  paginator = (
2755
2777
  self._create_component_from_model(
2756
- model=model.download_paginator, decoder=decoder, config=config, url_base=""
2778
+ model=model.download_paginator,
2779
+ decoder=decoder,
2780
+ config=config,
2781
+ url_base="",
2757
2782
  )
2758
2783
  if model.download_paginator
2759
2784
  else NoPagination(parameters={})
@@ -2870,7 +2895,10 @@ class ModelToComponentFactory:
2870
2895
  model=model.status_extractor, decoder=decoder, config=config, name=name
2871
2896
  )
2872
2897
  download_target_extractor = self._create_component_from_model(
2873
- model=model.download_target_extractor, decoder=decoder, config=config, name=name
2898
+ model=model.download_target_extractor,
2899
+ decoder=decoder,
2900
+ config=config,
2901
+ name=name,
2874
2902
  )
2875
2903
  job_repository: AsyncJobRepository = AsyncHttpJobRepository(
2876
2904
  creation_requester=creation_requester,