PyPI - airbyte-cdk - Versions diffs - 6.38.0.dev0__py3-none-any.whl → 6.38.2__py3-none-any.whl - Mend

airbyte-cdk 6.38.0.dev0py3-none-any.whl → 6.38.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

airbyte_cdk/entrypoint.py CHANGED Viewed

@@ -22,7 +22,7 @@ from requests import PreparedRequest, Response, Session
 from airbyte_cdk.connector import TConfig
 from airbyte_cdk.exception_handler import init_uncaught_exception_handler
-from airbyte_cdk.logger import PRINT_BUFFER, init_logger
+from airbyte_cdk.logger import init_logger
 from airbyte_cdk.models import (
     AirbyteConnectionStatus,
     AirbyteMessage,
@@ -337,11 +337,11 @@ def launch(source: Source, args: List[str]) -> None:
     parsed_args = source_entrypoint.parse_args(args)
     # temporarily removes the PrintBuffer because we're seeing weird print behavior for concurrent syncs
     # Refer to: https://github.com/airbytehq/oncall/issues/6235
-    with PRINT_BUFFER:
-        for message in source_entrypoint.run(parsed_args):
-            # simply printing is creating issues for concurrent CDK as Python uses different two instructions to print: one for the message and
-            # the other for the break line. Adding `\n` to the message ensure that both are printed at the same time
-            print(f"{message}\n", end="")
+    # with PrintBuffer():
+    for message in source_entrypoint.run(parsed_args):
+        # simply printing is creating issues for concurrent CDK as Python uses different two instructions to print: one for the message and
+        # the other for the break line. Adding `\n` to the message ensure that both are printed at the same time
+        print(f"{message}\n", end="", flush=True)
 def _init_internal_request_filter() -> None:

airbyte_cdk/logger.py CHANGED Viewed

@@ -16,11 +16,8 @@ from airbyte_cdk.models import (
     Level,
     Type,
 )
-from airbyte_cdk.utils import PrintBuffer
 from airbyte_cdk.utils.airbyte_secrets_utils import filter_secrets
-PRINT_BUFFER = PrintBuffer(flush_interval=0.1)
 LOGGING_CONFIG = {
     "version": 1,
     "disable_existing_loggers": False,
@@ -30,7 +27,7 @@ LOGGING_CONFIG = {
     "handlers": {
         "console": {
             "class": "logging.StreamHandler",
-            "stream": PRINT_BUFFER,
+            "stream": "ext://sys.stdout",
             "formatter": "airbyte",
         },
     },

airbyte_cdk/sources/declarative/datetime/__init__.py CHANGED Viewed

@@ -1,7 +1,3 @@
 #
 # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
 #
-from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime
-__all__ = ["MinMaxDatetime"]

airbyte_cdk/sources/declarative/datetime/datetime_parser.py CHANGED Viewed

@@ -29,6 +29,8 @@ class DatetimeParser:
             return datetime.datetime.fromtimestamp(int(date), tz=datetime.timezone.utc)
         elif format == "%s_as_float":
             return datetime.datetime.fromtimestamp(float(date), tz=datetime.timezone.utc)
+        elif format == "%epoch_microseconds":
+            return self._UNIX_EPOCH + datetime.timedelta(microseconds=int(date))
         elif format == "%ms":
             return self._UNIX_EPOCH + datetime.timedelta(milliseconds=int(date))
         elif "%_ms" in format:
@@ -46,6 +48,8 @@ class DatetimeParser:
             return str(int(dt.timestamp()))
         if format == "%s_as_float":
             return str(float(dt.timestamp()))
+        if format == "%epoch_microseconds":
+            return str(int(dt.timestamp() * 1_000_000))
         if format == "%ms":
             # timstamp() returns a float representing the number of seconds since the unix epoch
             return str(int(dt.timestamp() * 1000))

airbyte_cdk/sources/declarative/declarative_component_schema.yaml CHANGED Viewed

@@ -1794,7 +1794,6 @@ definitions:
     type: object
     required:
       - type
-      - path
       - url_base
     properties:
       type:
@@ -1806,9 +1805,18 @@ definitions:
         type: string
         interpolation_context:
           - config
+          - next_page_token
+          - stream_interval
+          - stream_partition
+          - stream_slice
+          - creation_response
+          - polling_response
+          - download_target
         examples:
           - "https://connect.squareup.com/v2"
-          - "{{ config['base_url'] or 'https://app.posthog.com'}}/api/"
+          - "{{ config['base_url'] or 'https://app.posthog.com'}}/api"
+          - "https://connect.squareup.com/v2/quotes/{{ stream_partition['id'] }}/quote_line_groups"
+          - "https://example.com/api/v1/resource/{{ next_page_token['id'] }}"
       path:
         title: URL Path
         description: Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.

airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py CHANGED Viewed

@@ -1,47 +1,47 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
 import csv
 import gzip
 import io
 import json
 import logging
-from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from io import BufferedIOBase, TextIOWrapper
-from typing import Any, Generator, MutableMapping, Optional
+from typing import Any, Optional
 import orjson
 import requests
 from airbyte_cdk.models import FailureType
-from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
+from airbyte_cdk.sources.declarative.decoders.decoder import DECODER_OUTPUT_TYPE, Decoder
+from airbyte_cdk.sources.declarative.decoders.decoder_parser import (
+    PARSER_OUTPUT_TYPE,
+    PARSERS_BY_HEADER_TYPE,
+    PARSERS_TYPE,
+    Parser,
+)
 from airbyte_cdk.utils import AirbyteTracedException
 logger = logging.getLogger("airbyte")
-@dataclass
-class Parser(ABC):
-    @abstractmethod
-    def parse(
-        self,
-        data: BufferedIOBase,
-    ) -> Generator[MutableMapping[str, Any], None, None]:
-        """
-        Parse data and yield dictionaries.
-        """
-        pass
 @dataclass
 class GzipParser(Parser):
     inner_parser: Parser
-    def parse(
-        self,
-        data: BufferedIOBase,
-    ) -> Generator[MutableMapping[str, Any], None, None]:
+    def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE:
         """
         Decompress gzipped bytes and pass decompressed data to the inner parser.
+        IMPORTANT:
+            - If the data is not gzipped, reset the pointer and pass the data to the inner parser as is.
+        Note:
+            - The data is not decoded by default.
         """
         with gzip.GzipFile(fileobj=data, mode="rb") as gzipobj:
             yield from self.inner_parser.parse(gzipobj)
@@ -50,7 +50,7 @@ class GzipParser(Parser):
 class JsonParser(Parser):
     encoding: str = "utf-8"
-    def parse(self, data: BufferedIOBase) -> Generator[MutableMapping[str, Any], None, None]:
+    def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE:
         """
         Attempts to deserialize data using orjson library. As an extra layer of safety we fallback on the json library to deserialize the data.
         """
@@ -90,10 +90,7 @@ class JsonParser(Parser):
 class JsonLineParser(Parser):
     encoding: Optional[str] = "utf-8"
-    def parse(
-        self,
-        data: BufferedIOBase,
-    ) -> Generator[MutableMapping[str, Any], None, None]:
+    def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE:
         for line in data:
             try:
                 yield json.loads(line.decode(encoding=self.encoding or "utf-8"))
@@ -117,10 +114,7 @@ class CsvParser(Parser):
         return self.delimiter
-    def parse(
-        self,
-        data: BufferedIOBase,
-    ) -> Generator[MutableMapping[str, Any], None, None]:
+    def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE:
         """
         Parse CSV data from decompressed bytes.
         """
@@ -130,27 +124,95 @@ class CsvParser(Parser):
             yield row
-@dataclass
 class CompositeRawDecoder(Decoder):
     """
-    Decoder strategy to transform a requests.Response into a Generator[MutableMapping[str, Any], None, None]
+    Decoder strategy to transform a requests.Response into a PARSER_OUTPUT_TYPE
     passed response.raw to parser(s).
-    Note: response.raw is not decoded/decompressed by default.
-    parsers should be instantiated recursively.
+    Note: response.raw is not decoded/decompressed by default. Parsers should be instantiated recursively.
     Example:
-    composite_raw_decoder = CompositeRawDecoder(parser=GzipParser(inner_parser=JsonLineParser(encoding="iso-8859-1")))
+        composite_raw_decoder = CompositeRawDecoder(
+            parser=GzipParser(
+                inner_parser=JsonLineParser(encoding="iso-8859-1")
+            )
+        )
     """
-    parser: Parser
-    stream_response: bool = True
+    def __init__(
+        self,
+        parser: Parser,
+        stream_response: bool = True,
+        parsers_by_header: PARSERS_BY_HEADER_TYPE = None,
+    ) -> None:
+        # since we moved from using `dataclass` to `__init__` method,
+        # we need to keep using the `parser` to be able to resolve the depenencies
+        # between the parsers correctly.
+        self.parser = parser
+        self._parsers_by_header = parsers_by_header if parsers_by_header else {}
+        self._stream_response = stream_response
+    @classmethod
+    def by_headers(
+        cls,
+        parsers: PARSERS_TYPE,
+        stream_response: bool,
+        fallback_parser: Parser,
+    ) -> "CompositeRawDecoder":
+        """
+        Create a CompositeRawDecoder instance based on header values.
+        Args:
+            parsers (PARSERS_TYPE): A list of tuples where each tuple contains headers, header values, and a parser.
+            stream_response (bool): A flag indicating whether the response should be streamed.
+            fallback_parser (Parser): A parser to use if no matching header is found.
+        Returns:
+            CompositeRawDecoder: An instance of CompositeRawDecoder configured with the provided parsers.
+        """
+        parsers_by_header = {}
+        for headers, header_values, parser in parsers:
+            for header in headers:
+                parsers_by_header[header] = {header_value: parser for header_value in header_values}
+        return cls(fallback_parser, stream_response, parsers_by_header)
     def is_stream_response(self) -> bool:
-        return self.stream_response
+        return self._stream_response
-    def decode(
-        self, response: requests.Response
-    ) -> Generator[MutableMapping[str, Any], None, None]:
+    def decode(self, response: requests.Response) -> DECODER_OUTPUT_TYPE:
+        parser = self._select_parser(response)
         if self.is_stream_response():
-            yield from self.parser.parse(data=response.raw)  # type: ignore[arg-type]
+            # urllib mentions that some interfaces don't play nice with auto_close
+            # More info here: https://urllib3.readthedocs.io/en/stable/user-guide.html#using-io-wrappers-with-response-content
+            # We have indeed observed some issues with CSV parsing.
+            # Hence, we will manage the closing of the file ourselves until we find a better solution.
+            response.raw.auto_close = False
+            yield from parser.parse(
+                data=response.raw,  # type: ignore[arg-type]
+            )
+            response.raw.close()
         else:
-            yield from self.parser.parse(data=io.BytesIO(response.content))
+            yield from parser.parse(data=io.BytesIO(response.content))
+    def _select_parser(self, response: requests.Response) -> Parser:
+        """
+        Selects the appropriate parser based on the response headers.
+        This method iterates through the `_parsers_by_header` dictionary to find a matching parser
+        based on the headers in the response. If a matching header and header value are found,
+        the corresponding parser is returned. If no match is found, the default parser is returned.
+        Args:
+            response (requests.Response): The HTTP response object containing headers to check.
+        Returns:
+            Parser: The parser corresponding to the matched header value, or the default parser if no match is found.
+        """
+        for header, parser_by_header_value in self._parsers_by_header.items():
+            if (
+                header in response.headers
+                and response.headers[header] in parser_by_header_value.keys()
+            ):
+                return parser_by_header_value[response.headers[header]]
+        return self.parser

airbyte_cdk/sources/declarative/decoders/decoder.py CHANGED Viewed

@@ -8,6 +8,8 @@ from typing import Any, Generator, MutableMapping
 import requests
+DECODER_OUTPUT_TYPE = Generator[MutableMapping[str, Any], None, None]
 @dataclass
 class Decoder:
@@ -22,9 +24,7 @@ class Decoder:
         """
     @abstractmethod
-    def decode(
-        self, response: requests.Response
-    ) -> Generator[MutableMapping[str, Any], None, None]:
+    def decode(self, response: requests.Response) -> DECODER_OUTPUT_TYPE:
         """
         Decodes a requests.Response into a Mapping[str, Any] or an array
         :param response: the response to decode

airbyte_cdk/sources/declarative/decoders/decoder_parser.py ADDED Viewed

@@ -0,0 +1,30 @@
+#
+# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
+#
+import logging
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from io import BufferedIOBase
+from typing import Any, Dict, Generator, List, MutableMapping, Optional, Set, Tuple
+logger = logging.getLogger("airbyte")
+PARSER_OUTPUT_TYPE = Generator[MutableMapping[str, Any], None, None]
+@dataclass
+class Parser(ABC):
+    @abstractmethod
+    def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE:
+        """
+        Parse data and yield dictionaries.
+        """
+        pass
+# reusable parser types
+PARSERS_TYPE = List[Tuple[Set[str], Set[str], Parser]]
+PARSERS_BY_HEADER_TYPE = Optional[Dict[str, Dict[str, Parser]]]

airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py CHANGED Viewed

@@ -6,16 +6,13 @@ import logging
 import zipfile
 from dataclasses import dataclass
 from io import BytesIO
-from typing import Any, Generator, MutableMapping
-import orjson
 import requests
 from airbyte_cdk.models import FailureType
 from airbyte_cdk.sources.declarative.decoders import Decoder
-from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
-    Parser,
-)
+from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import Parser
+from airbyte_cdk.sources.declarative.decoders.decoder import DECODER_OUTPUT_TYPE
 from airbyte_cdk.utils import AirbyteTracedException
 logger = logging.getLogger("airbyte")
@@ -28,16 +25,16 @@ class ZipfileDecoder(Decoder):
     def is_stream_response(self) -> bool:
         return False
-    def decode(
-        self, response: requests.Response
-    ) -> Generator[MutableMapping[str, Any], None, None]:
+    def decode(self, response: requests.Response) -> DECODER_OUTPUT_TYPE:
         try:
             with zipfile.ZipFile(BytesIO(response.content)) as zip_file:
                 for file_name in zip_file.namelist():
                     unzipped_content = zip_file.read(file_name)
                     buffered_content = BytesIO(unzipped_content)
                     try:
-                        yield from self.parser.parse(buffered_content)
+                        yield from self.parser.parse(
+                            buffered_content,
+                        )
                     except Exception as e:
                         logger.error(
                             f"Failed to parse file: {file_name} from zip file: {response.request.url} with exception {e}."

airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py CHANGED Viewed

@@ -15,7 +15,6 @@ from numpy import nan
 from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
-EMPTY_STR: str = ""
 DEFAULT_ENCODING: str = "utf-8"
 DOWNLOAD_CHUNK_SIZE: int = 1024 * 10
@@ -136,7 +135,6 @@ class ResponseToFileExtractor(RecordExtractor):
         """
         try:
-            # TODO: Add support for other file types, like `json`, with `pd.read_json()`
             with open(path, "r", encoding=file_encoding) as data:
                 chunks = pd.read_csv(
                     data, chunksize=chunk_size, iterator=True, dialect="unix", dtype=object

airbyte_cdk/sources/declarative/interpolation/macros.py CHANGED Viewed

@@ -12,6 +12,8 @@ import pytz
 from dateutil import parser
 from isodate import parse_duration
+from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser
 """
 This file contains macros that can be evaluated by a `JinjaInterpolation` object
 """
@@ -171,11 +173,7 @@ def format_datetime(
     dt_datetime = (
         datetime.datetime.strptime(dt, input_format) if input_format else str_to_datetime(dt)
     )
-    if format == "%s":
-        return str(int(dt_datetime.timestamp()))
-    elif format == "%ms":
-        return str(int(dt_datetime.timestamp() * 1_000_000))
-    return dt_datetime.strftime(format)
+    return DatetimeParser().format(dt=dt_datetime, format=format)
 _macros_list = [

airbyte_cdk/sources/declarative/models/declarative_component_schema.py CHANGED Viewed

@@ -939,7 +939,7 @@ class MinMaxDatetime(BaseModel):
     )
     datetime_format: Optional[str] = Field(
         "",
-        description='Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n  * **%s**: Epoch unix timestamp - `1686218963`\n  * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n  * **%ms**: Epoch unix timestamp - `1686218963123`\n  * **%a**: Weekday (abbreviated) - `Sun`\n  * **%A**: Weekday (full) - `Sunday`\n  * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n  * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n  * **%b**: Month (abbreviated) - `Jan`\n  * **%B**: Month (full) - `January`\n  * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n  * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n  * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n  * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n  * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n  * **%p**: AM/PM indicator\n  * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n  * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n  * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`\n  * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n  * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n  * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n  * **%U**: Week number of the year (Sunday as first day) - `00`, `01`, ..., `53`\n  * **%W**: Week number of the year (Monday as first day) - `00`, `01`, ..., `53`\n  * **%c**: Date and time representation - `Tue Aug 16 21:30:00 1988`\n  * **%x**: Date representation - `08/16/1988`\n  * **%X**: Time representation - `21:30:00`\n  * **%%**: Literal \'%\' character\n\n  Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n',
+        description='Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available:\n  * **%s**: Epoch unix timestamp - `1686218963`\n  * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n  * **%ms**: Epoch unix timestamp - `1686218963123`\n  * **%a**: Weekday (abbreviated) - `Sun`\n  * **%A**: Weekday (full) - `Sunday`\n  * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n  * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n  * **%b**: Month (abbreviated) - `Jan`\n  * **%B**: Month (full) - `January`\n  * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n  * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n  * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n  * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n  * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n  * **%p**: AM/PM indicator\n  * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n  * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n  * **%f**: Microsecond (zero-padded to 6 digits) - `000000`, `000001`, ..., `999999`\n  * **%_ms**: Millisecond (zero-padded to 3 digits) - `000`, `001`, ..., `999`\n  * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n  * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n  * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n  * **%U**: Week number of the year (Sunday as first day) - `00`, `01`, ..., `53`\n  * **%W**: Week number of the year (Monday as first day) - `00`, `01`, ..., `53`\n  * **%c**: Date and time representation - `Tue Aug 16 21:30:00 1988`\n  * **%x**: Date representation - `08/16/1988`\n  * **%X**: Time representation - `21:30:00`\n  * **%%**: Literal \'%\' character\n\n  Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n',
         examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s"],
         title="Datetime Format",
     )
@@ -1545,7 +1545,7 @@ class DatetimeBasedCursor(BaseModel):
     )
     datetime_format: str = Field(
         ...,
-        description="The datetime format used to format the datetime values that are sent in outgoing requests to the API. Use placeholders starting with \"%\" to describe the format the API is using. The following placeholders are available:\n  * **%s**: Epoch unix timestamp - `1686218963`\n  * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n  * **%ms**: Epoch unix timestamp (milliseconds) - `1686218963123`\n  * **%a**: Weekday (abbreviated) - `Sun`\n  * **%A**: Weekday (full) - `Sunday`\n  * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n  * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n  * **%b**: Month (abbreviated) - `Jan`\n  * **%B**: Month (full) - `January`\n  * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n  * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n  * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n  * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n  * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n  * **%p**: AM/PM indicator\n  * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n  * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n  * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n  * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n  * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n  * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n  * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n  * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n  * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n  * **%x**: Date standard format - `08/16/1988`\n  * **%X**: Time standard format - `21:30:00`\n  * **%%**: Literal '%' character\n\n  Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n",
+        description="The datetime format used to format the datetime values that are sent in outgoing requests to the API. Use placeholders starting with \"%\" to describe the format the API is using. The following placeholders are available:\n  * **%s**: Epoch unix timestamp - `1686218963`\n  * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456`\n  * **%ms**: Epoch unix timestamp (milliseconds) - `1686218963123`\n  * **%a**: Weekday (abbreviated) - `Sun`\n  * **%A**: Weekday (full) - `Sunday`\n  * **%w**: Weekday (decimal) - `0` (Sunday), `6` (Saturday)\n  * **%d**: Day of the month (zero-padded) - `01`, `02`, ..., `31`\n  * **%b**: Month (abbreviated) - `Jan`\n  * **%B**: Month (full) - `January`\n  * **%m**: Month (zero-padded) - `01`, `02`, ..., `12`\n  * **%y**: Year (without century, zero-padded) - `00`, `01`, ..., `99`\n  * **%Y**: Year (with century) - `0001`, `0002`, ..., `9999`\n  * **%H**: Hour (24-hour, zero-padded) - `00`, `01`, ..., `23`\n  * **%I**: Hour (12-hour, zero-padded) - `01`, `02`, ..., `12`\n  * **%p**: AM/PM indicator\n  * **%M**: Minute (zero-padded) - `00`, `01`, ..., `59`\n  * **%S**: Second (zero-padded) - `00`, `01`, ..., `59`\n  * **%f**: Microsecond (zero-padded to 6 digits) - `000000`\n  * **%_ms**: Millisecond (zero-padded to 3 digits) - `000`\n  * **%z**: UTC offset - `(empty)`, `+0000`, `-04:00`\n  * **%Z**: Time zone name - `(empty)`, `UTC`, `GMT`\n  * **%j**: Day of the year (zero-padded) - `001`, `002`, ..., `366`\n  * **%U**: Week number of the year (starting Sunday) - `00`, ..., `53`\n  * **%W**: Week number of the year (starting Monday) - `00`, ..., `53`\n  * **%c**: Date and time - `Tue Aug 16 21:30:00 1988`\n  * **%x**: Date standard format - `08/16/1988`\n  * **%X**: Time standard format - `21:30:00`\n  * **%%**: Literal '%' character\n\n  Some placeholders depend on the locale of the underlying system - in most cases this locale is configured as en/US. For more information see the [Python documentation](https://docs.python.org/3/library/datetime.html#strftime-and-strptime-format-codes).\n",
         examples=["%Y-%m-%dT%H:%M:%S.%f%z", "%Y-%m-%d", "%s", "%ms", "%s_as_float"],
         title="Outgoing Datetime Format",
     )
@@ -2072,12 +2072,14 @@ class HttpRequester(BaseModel):
         description="Base URL of the API source. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.",
         examples=[
             "https://connect.squareup.com/v2",
-            "{{ config['base_url'] or 'https://app.posthog.com'}}/api/",
+            "{{ config['base_url'] or 'https://app.posthog.com'}}/api",
+            "https://connect.squareup.com/v2/quotes/{{ stream_partition['id'] }}/quote_line_groups",
+            "https://example.com/api/v1/resource/{{ next_page_token['id'] }}",
         ],
         title="API Base URL",
     )
-    path: str = Field(
-        ...,
+    path: Optional[str] = Field(
+        None,
         description="Path the specific API endpoint that this stream represents. Do not put sensitive information (e.g. API tokens) into this field - Use the Authentication component for this.",
         examples=[
             "/products",

airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py CHANGED Viewed

@@ -56,7 +56,7 @@ from airbyte_cdk.sources.declarative.auth.token_provider import (
 )
 from airbyte_cdk.sources.declarative.checks import CheckDynamicStream, CheckStream
 from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel
-from airbyte_cdk.sources.declarative.datetime import MinMaxDatetime
+from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime
 from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
 from airbyte_cdk.sources.declarative.decoders import (
     Decoder,
@@ -2193,18 +2193,40 @@ class ModelToComponentFactory:
             stream_response=False if self._emit_connector_builder_messages else True,
         )
-    @staticmethod
-    def create_jsonl_decoder(model: JsonlDecoderModel, config: Config, **kwargs: Any) -> Decoder:
+    def create_jsonl_decoder(
+        self, model: JsonlDecoderModel, config: Config, **kwargs: Any
+    ) -> Decoder:
         return CompositeRawDecoder(
-            parser=ModelToComponentFactory._get_parser(model, config), stream_response=True
+            parser=ModelToComponentFactory._get_parser(model, config),
+            stream_response=False if self._emit_connector_builder_messages else True,
         )
     def create_gzip_decoder(
         self, model: GzipDecoderModel, config: Config, **kwargs: Any
     ) -> Decoder:
-        return CompositeRawDecoder(
-            parser=ModelToComponentFactory._get_parser(model, config),
-            stream_response=False if self._emit_connector_builder_messages else True,
+        _compressed_response_types = {
+            "gzip",
+            "x-gzip",
+            "gzip, deflate",
+            "x-gzip, deflate",
+            "application/zip",
+            "application/gzip",
+            "application/x-gzip",
+            "application/x-zip-compressed",
+        }
+        gzip_parser: GzipParser = ModelToComponentFactory._get_parser(model, config)  # type: ignore  # based on the model, we know this will be a GzipParser
+        if self._emit_connector_builder_messages:
+            # This is very surprising but if the response is not streamed,
+            # CompositeRawDecoder calls response.content and the requests library actually uncompress the data as opposed to response.raw,
+            # which uses urllib3 directly and does not uncompress the data.
+            return CompositeRawDecoder(gzip_parser.inner_parser, False)
+        return CompositeRawDecoder.by_headers(
+            [({"Content-Encoding", "Content-Type"}, _compressed_response_types, gzip_parser)],
+            stream_response=True,
+            fallback_parser=gzip_parser.inner_parser,
         )
     @staticmethod
@@ -2753,7 +2775,10 @@ class ModelToComponentFactory:
             )
             paginator = (
                 self._create_component_from_model(
-                    model=model.download_paginator, decoder=decoder, config=config, url_base=""
+                    model=model.download_paginator,
+                    decoder=decoder,
+                    config=config,
+                    url_base="",
                 )
                 if model.download_paginator
                 else NoPagination(parameters={})
@@ -2870,7 +2895,10 @@ class ModelToComponentFactory:
             model=model.status_extractor, decoder=decoder, config=config, name=name
         )
         download_target_extractor = self._create_component_from_model(
-            model=model.download_target_extractor, decoder=decoder, config=config, name=name
+            model=model.download_target_extractor,
+            decoder=decoder,
+            config=config,
+            name=name,
         )
         job_repository: AsyncJobRepository = AsyncHttpJobRepository(
             creation_requester=creation_requester,

airbyte-cdk 6.38.0.dev0__py3-none-any.whl → 6.38.2__py3-none-any.whl

airbyte-cdk 6.38.0.dev0py3-none-any.whl → 6.38.2py3-none-any.whl