PyPI - airbyte-source-google-ads - Versions diffs - 4.1.0rc7.dev202510212244__tar.gz → 4.1.0rc8__tar.gz - Mend

airbyte-source-google-ads 4.1.0rc7.dev202510212244tar.gz → 4.1.0rc8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

{airbyte_source_google_ads-4.1.0rc7.dev202510212244 → airbyte_source_google_ads-4.1.0rc8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: airbyte-source-google-ads
-Version: 4.1.0rc7.dev202510212244
+Version: 4.1.0rc8
 Summary: Source implementation for Google Ads.
 Home-page: https://airbyte.com
 License: Elv2
@@ -11,7 +11,7 @@ Classifier: License :: Other/Proprietary License
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
-Requires-Dist: airbyte-cdk (>=7.0.5,<8.0.0)
+Requires-Dist: airbyte-cdk (>=7.4.1,<8.0.0)
 Requires-Dist: google-ads (==27.0.0)
 Requires-Dist: pendulum (<3.0.0)
 Requires-Dist: protobuf (==4.25.2)

{airbyte_source_google_ads-4.1.0rc7.dev202510212244 → airbyte_source_google_ads-4.1.0rc8}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",]
 build-backend = "poetry.core.masonry.api"
 [tool.poetry]
-version = "4.1.0-rc.7.dev.202510212244"
+version = "4.1.0-rc.8"
 name = "airbyte-source-google-ads"
 description = "Source implementation for Google Ads."
 authors = [ "Airbyte <contact@airbyte.io>",]
@@ -20,7 +20,7 @@ python = "^3.10,<3.12"
 google-ads = "==27.0.0"
 protobuf = "==4.25.2"
 pendulum = "<3.0.0"
-airbyte-cdk = "^7.0.5"
+airbyte-cdk = "^7.4.1"
 [tool.poetry.scripts]
 source-google-ads = "source_google_ads.run:run"

{airbyte_source_google_ads-4.1.0rc7.dev202510212244 → airbyte_source_google_ads-4.1.0rc8}/source_google_ads/components.py RENAMED Viewed

@@ -2,11 +2,12 @@
 # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
 #
+import io
 import json
 import logging
 import re
 import threading
-from dataclasses import InitVar, dataclass
+from dataclasses import dataclass, field
 from itertools import groupby
 from typing import Any, Callable, Dict, Generator, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union
@@ -14,6 +15,8 @@ import anyascii
 import requests
 from airbyte_cdk import AirbyteTracedException, FailureType, InterpolatedString
+from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import JsonParser
+from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
 from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
 from airbyte_cdk.sources.declarative.extractors.record_filter import RecordFilter
 from airbyte_cdk.sources.declarative.migrations.state_migration import StateMigration
@@ -25,8 +28,6 @@ from airbyte_cdk.sources.declarative.transformations import RecordTransformation
 from airbyte_cdk.sources.streams.concurrent.default_stream import DefaultStream
 from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
 from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
-from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
 from .google_ads import GoogleAds
@@ -414,54 +415,6 @@ class KeysToSnakeCaseGoogleAdsTransformation(RecordTransformation):
         return "_".join(token.lower() for token in tokens)
-@dataclass
-class ChangeStatusRetriever(SimpleRetriever):
-    """
-    Retrieves change status records from the Google Ads API.
-    ChangeStatus stream requires custom retriever because Google Ads API requires limit for this stream to be set to 10,000.
-    When the number of records exceeds this limit, we need to adjust the start date to the last record's cursor.
-    """
-    QUERY_LIMIT = 10000
-    cursor_field: str = "change_status.last_change_date_time"
-    def _read_pages(
-        self,
-        records_generator_fn: Callable[[Optional[Mapping]], Iterable[Record]],
-        stream_state: StreamState,
-        stream_slice: StreamSlice,
-    ) -> Iterable[Record]:
-        """
-        Since this stream doesn’t support “real” pagination, we treat each HTTP
-        call as a slice defined by a start_date / end_date. If we hit the
-        QUERY_LIMIT exactly, we assume there may be more data at the end of that
-        slice, so we bump start_date forward to the last-record cursor and retry.
-        """
-        while True:
-            record_count = 0
-            last_record = None
-            response = self._fetch_next_page(stream_state, stream_slice)
-            # Yield everything we got
-            for rec in records_generator_fn(response):
-                record_count += 1
-                last_record = rec
-                yield rec
-            if record_count < self.QUERY_LIMIT:
-                break
-            # Update the stream slice start time to the last record's cursor
-            last_cursor = last_record[self.cursor_field]
-            cursor_slice = stream_slice.cursor_slice
-            cursor_slice["start_time"] = last_cursor
-            stream_slice = StreamSlice(
-                partition=stream_slice.partition,
-                cursor_slice=cursor_slice,
-                extra_fields=stream_slice.extra_fields,
-            )
 @dataclass
 class ChangeStatusRequester(GoogleAdsHttpRequester):
     CURSOR_FIELD: str = "change_status.last_change_date_time"
@@ -507,7 +460,6 @@ class CriterionRetriever(SimpleRetriever):
     def _read_pages(
         self,
         records_generator_fn: Callable[[Optional[Mapping]], Iterable[Record]],
-        stream_state: StreamState,
         stream_slice: StreamSlice,
     ) -> Iterable[Record]:
         """
@@ -537,6 +489,7 @@ class CriterionRetriever(SimpleRetriever):
                             self.primary_key[0]: _id,
                             "deleted_at": ts,
                         },
+                        associated_slice=stream_slice,
                         stream_name=self.name,
                     )
                 else:
@@ -556,7 +509,7 @@ class CriterionRetriever(SimpleRetriever):
                 cursor_slice=stream_slice.cursor_slice,
                 extra_fields={"change_status.last_change_date_time": updated_times},
             )
-            response = self._fetch_next_page(stream_state, new_slice)
+            response = self._fetch_next_page(new_slice)
             for rec in records_generator_fn(response):
                 # attach timestamp from ChangeStatus
                 rec.data[self.cursor_field] = time_map.get(rec.data.get(self.primary_key[0]))
@@ -624,13 +577,26 @@ class GoogleAdsCriterionParentStateMigration(StateMigration):
     """
     def should_migrate(self, stream_state: Mapping[str, Any]) -> bool:
-        return stream_state and "parent_state" not in stream_state
+        return stream_state and not stream_state.get("parent_state")
     def migrate(self, stream_state: Mapping[str, Any]) -> Mapping[str, Any]:
         if not self.should_migrate(stream_state):
             return stream_state
-        return {"parent_state": stream_state}
+        return {"parent_state": {"change_status": stream_state}}
+class GoogleAdsGlobalStateMigration(StateMigration):
+    """
+    Migrates global state to include use_global_cursor key. Previously legacy GlobalSubstreamCursor was used.
+    """
+    def should_migrate(self, stream_state: Mapping[str, Any]) -> bool:
+        return stream_state and not stream_state.get("use_global_cursor")
+    def migrate(self, stream_state: Mapping[str, Any]) -> Mapping[str, Any]:
+        stream_state["use_global_cursor"] = True
+        return stream_state
 @dataclass(repr=False, eq=False, frozen=True)
@@ -898,145 +864,209 @@ class CustomGAQuerySchemaLoader(SchemaLoader):
 @dataclass
-class RowsStreamingDecoder(Decoder):
-    parameters: InitVar[Mapping[str, Any]]
+class StringParseState:
+    inside_string: bool = False
+    escape_next_character: bool = False
+    collected_string_chars: List[str] = field(default_factory=list)
+    last_parsed_key: Optional[str] = None
+@dataclass
+class TopLevelObjectState:
+    depth: int = 0
+@dataclass
+class ResultsArrayState:
+    inside_results_array: bool = False
+    array_nesting_depth: int = 0
+    expecting_results_array_start: bool = False
+@dataclass
+class RecordParseState:
+    inside_record: bool = False
+    record_text_buffer: List[str] = field(default_factory=list)
+    record_nesting_depth: int = 0
+@dataclass
+class GoogleAdsStreamingDecoder(Decoder):
+    """
+    JSON streaming decoder optimized for Google Ads API responses.
+    Uses a fast JSON parse when the full payload fits within max_direct_decode_bytes;
+    otherwise streams records incrementally from the `results` array.
+    Ensures truncated or structurally invalid JSON is detected and reported.
+    """
+    chunk_size: int = 5 * 1024 * 1024  # 5 MB
+    # Fast-path threshold: if whole body < 20 MB, decode with json.loads
+    max_direct_decode_bytes: int = 20 * 1024 * 1024  # 20 MB
+    def __post_init__(self):
+        self.parser = JsonParser()
     def is_stream_response(self) -> bool:
         return True
-    def decode(
-        self, response: requests.Response
-    ) -> Generator[MutableMapping[str, Any], None, None]:
-        for row in self._iter_rows_from_bytes(response.iter_content(chunk_size=65536)):
-            yield {"results": [row]}
+    def decode(self, response: requests.Response) -> Generator[MutableMapping[str, Any], None, None]:
+        data, complete = self._buffer_up_to_limit(response)
+        if complete:
+            yield from self.parser.parse(io.BytesIO(data))
+            return
-    def _iter_rows_from_bytes(self, byte_iter: Iterable[bytes], encoding: str = "utf-8") -> Generator[Dict[str, Any], None, None]:
-        """
-        Incrementally scan the searchStream response and yield each object from the
-        top-level "results" array as soon as that object is complete, without waiting
-        for the enclosing message object to finish.
-        This is a character-level state machine:
-          - Handles split chunks and concatenated JSON objects
-          - Tracks strings/escapes so braces inside strings don't confuse depth
-          - Detects the `"results": [` array and streams its items one-by-one
-        """
-        # Global scanning state
-        depth = 0
-        in_str = False
-        esc = False
-        # Detect the "results" array
-        last_string = None  # last completed JSON string token
-        awaiting_results_array = False
-        results_array_depth = None  # the depth level of the '[' that starts the array
-        # Per-item buffering state
-        collecting_item = False
-        item_buf = []  # characters of the current item
-        item_depth = 0  # nesting within the item (starts at 1 when we see '{')
-        # Temp buffer for current string token
-        str_buf = []
-        def finish_item():
-            nonlocal item_buf, collecting_item, item_depth
-            obj_text = "".join(item_buf).strip()
-            item_buf = []
-            collecting_item = False
-            item_depth = 0
-            if obj_text:
-                return json.loads(obj_text)
+        records_batch: List[Dict[str, Any]] = []
+        for record in self._parse_records_from_stream(data):
+            records_batch.append(record)
+            if len(records_batch) >= 100:
+                yield {"results": records_batch}
+                records_batch = []
+        if records_batch:
+            yield {"results": records_batch}
+    def _buffer_up_to_limit(self, response: requests.Response) -> Tuple[Union[bytes, Iterable[bytes]], bool]:
+        buf = bytearray()
+        response_stream = response.iter_content(chunk_size=self.chunk_size)
+        while chunk := next(response_stream, None):
+            buf.extend(chunk)
+            if len(buf) >= self.max_direct_decode_bytes:
+                return (self._chain_prefix_and_stream(bytes(buf), response_stream), False)
+        return (bytes(buf), True)
+    @staticmethod
+    def _chain_prefix_and_stream(prefix: bytes, rest_stream: Iterable[bytes]) -> Iterable[bytes]:
+        yield prefix
+        yield from rest_stream
+    def _parse_records_from_stream(self, byte_iter: Iterable[bytes], encoding: str = "utf-8") -> Generator[Dict[str, Any], None, None]:
+        string_state = StringParseState()
+        results_state = ResultsArrayState()
+        record_state = RecordParseState()
+        top_level_state = TopLevelObjectState()
         for chunk in byte_iter:
-            text = chunk.decode(encoding, errors="replace")
-            for ch in text:
-                # Always feed characters to item buffer if we're inside an item
-                if collecting_item:
-                    item_buf.append(ch)
-                # --- String handling (so braces inside strings are ignored) ---
-                if in_str:
-                    if esc:
-                        esc = False
-                        continue
-                    if ch == "\\":
-                        esc = True
-                        continue
-                    if ch == '"':
-                        # string ended
-                        in_str = False
-                        last_string = "".join(str_buf)
-                        str_buf = []
-                    else:
-                        str_buf.append(ch)
-                    continue
+            for char in chunk.decode(encoding, errors="replace"):
+                self._append_to_current_record_if_any(char, record_state)
-                if ch == '"':
-                    in_str = True
-                    str_buf = []
-                    # If we are collecting an item, we already appended the quote to item_buf above
+                if self._update_string_state(char, string_state):
                     continue
-                # --- Structural characters outside strings ---
-                if ch in "{[":
-                    depth += 1
-                    # Detect the start of the "results" array: we just saw '[' after key "results": ...
-                    if ch == "[" and awaiting_results_array and results_array_depth is None:
-                        results_array_depth = depth  # this '[' depth
-                        awaiting_results_array = False
-                    # Detect the start of an item object directly inside "results"
-                    if (
-                            ch == "{"
-                            and results_array_depth is not None
-                            and not collecting_item
-                            and depth == results_array_depth + 1
-                    ):
-                        collecting_item = True
-                        item_buf = ["{"]  # start buffer anew
-                        item_depth = 1
-                    elif collecting_item and ch in "{[":
-                        # Nested structure inside item
-                        item_depth += 1
+                # Track outer braces only outside results array
+                if not results_state.inside_results_array:
+                    if char == "{":
+                        top_level_state.depth += 1
+                    elif char == "}":
+                        top_level_state.depth = max(0, top_level_state.depth - 1)
+                if not results_state.inside_results_array:
+                    self._detect_results_array(char, string_state, results_state)
                     continue
-                if ch in "}]":
-                    # If we're collecting an item, adjust its own nesting counter
-                    if collecting_item:
-                        item_depth -= 1
-                        if item_depth == 0:
-                            # Item just finished -> emit it immediately
-                            item = finish_item()
-                            if item is not None:
-                                yield item
-                            # Note: we do NOT 'continue' here; we still need to update global depth below
-                    depth -= 1
-                    # If we closed the results array, reset array tracking
-                    if (
-                            ch == "]"
-                            and results_array_depth is not None
-                            and depth < results_array_depth
-                    ):
-                        results_array_depth = None
-                    continue
+                record = self._parse_record_structure(char, results_state, record_state)
+                if record is not None:
+                    yield record
+        # EOF validation
+        if (
+            string_state.inside_string
+            or record_state.inside_record
+            or record_state.record_nesting_depth != 0
+            or results_state.inside_results_array
+            or results_state.array_nesting_depth != 0
+            or top_level_state.depth != 0
+        ):
+            raise AirbyteTracedException(
+                message="Response JSON stream ended prematurely and is incomplete.",
+                internal_message=(
+                    "Detected truncated JSON stream: one or more structural elements were not fully closed before the response ended."
+                ),
+                failure_type=FailureType.system_error,
+            )
+    def _update_string_state(self, char: str, state: StringParseState) -> bool:
+        """Return True if char was handled as part of string parsing."""
+        if state.inside_string:
+            if state.escape_next_character:
+                state.escape_next_character = False
+                return True
+            if char == "\\":
+                state.escape_next_character = True
+                return True
+            if char == '"':
+                state.inside_string = False
+                state.last_parsed_key = "".join(state.collected_string_chars)
+                state.collected_string_chars.clear()
+                return True
+            state.collected_string_chars.append(char)
+            return True
-                # Detect `"results":` key just seen (outside strings)
-                if ch == ":" and last_string == "results" and results_array_depth is None:
-                    awaiting_results_array = True
-                    # don't 'continue'; normal flow is fine
+        if char == '"':
+            state.inside_string = True
+            state.collected_string_chars.clear()
+            return True
-                # Commas/whitespace are irrelevant; any other chars just pass through
+        return False
-        # End of stream: if we somehow have a finished item without seeing the closing bracket
-        # (rare, but be defensive), try to flush.
-        if collecting_item and item_depth == 0:
-            item = finish_item()
-            if item is not None:
-                yield item
+    def _detect_results_array(self, char: str, string_state: StringParseState, results_state: ResultsArrayState) -> None:
+        if char == ":" and string_state.last_parsed_key == "results":
+            results_state.expecting_results_array_start = True
+        elif char == "[" and results_state.expecting_results_array_start:
+            results_state.inside_results_array = True
+            results_state.array_nesting_depth = 1
+            results_state.expecting_results_array_start = False
+    def _parse_record_structure(
+        self, char: str, results_state: ResultsArrayState, record_state: RecordParseState
+    ) -> Optional[Dict[str, Any]]:
+        if char == "{":
+            if record_state.inside_record:
+                record_state.record_nesting_depth += 1
+            else:
+                self._start_record(record_state)
+            return None
+        if char == "}":
+            if record_state.inside_record:
+                record_state.record_nesting_depth -= 1
+                if record_state.record_nesting_depth == 0:
+                    return self._finish_record(record_state)
+            return None
+        if char == "[":
+            if record_state.inside_record:
+                record_state.record_nesting_depth += 1
+            else:
+                results_state.array_nesting_depth += 1
+            return None
+        if char == "]":
+            if record_state.inside_record:
+                record_state.record_nesting_depth -= 1
+            else:
+                results_state.array_nesting_depth -= 1
+                if results_state.array_nesting_depth == 0:
+                    results_state.inside_results_array = False
+        return None
+    @staticmethod
+    def _append_to_current_record_if_any(char: str, record_state: RecordParseState):
+        if record_state.inside_record:
+            record_state.record_text_buffer.append(char)
+    @staticmethod
+    def _start_record(record_state: RecordParseState):
+        record_state.inside_record = True
+        record_state.record_text_buffer = ["{"]
+        record_state.record_nesting_depth = 1
+    @staticmethod
+    def _finish_record(record_state: RecordParseState) -> Optional[Dict[str, Any]]:
+        text = "".join(record_state.record_text_buffer).strip()
+        record_state.inside_record = False
+        record_state.record_text_buffer.clear()
+        record_state.record_nesting_depth = 0
+        return json.loads(text) if text else None

{airbyte_source_google_ads-4.1.0rc7.dev202510212244 → airbyte_source_google_ads-4.1.0rc8}/source_google_ads/manifest.yaml RENAMED Viewed

@@ -56,7 +56,7 @@ definitions:
         action: IGNORE
         http_codes:
           - 403
-#        error_message_contains: "The customer account can\\'t be accessed because it is not yet enabled or has been deactivated."
+  #        error_message_contains: "The customer account can\\'t be accessed because it is not yet enabled or has been deactivated."
   base_selector:
     type: RecordSelector
@@ -98,11 +98,6 @@ definitions:
     type: DeclarativeStream
     retriever:
       $ref: "#/definitions/base_retriever"
-      paginator:
-        type: NoPagination
-      decoder:
-        type: CustomDecoder
-        class_name: "source_google_ads.components.RowsStreamingDecoder"
       requester:
         $ref: "#/definitions/stream_requester"
       record_selector:
@@ -136,11 +131,6 @@ definitions:
       $ref: "#/definitions/base_retriever"
       requester:
         $ref: "#/definitions/stream_requester"
-      paginator:
-        type: NoPagination
-      decoder:
-        type: CustomDecoder
-        class_name: "source_google_ads.components.RowsStreamingDecoder"
       record_selector:
         extractor:
           type: DpathExtractor
@@ -295,6 +285,8 @@ definitions:
       state_migrations:
         - type: CustomStateMigration
           class_name: source_google_ads.components.GoogleAdsCriterionParentStateMigration
+        - type: CustomStateMigration
+          class_name: source_google_ads.components.GoogleAdsGlobalStateMigration
   accessible_accounts:
     $ref: "#/definitions/stream_base"
@@ -411,7 +403,7 @@ definitions:
         class_name: "source_google_ads.components.CustomGAQueryHttpRequester"
         authenticator:
           $ref: "#/definitions/authenticator"
-        url_base: "https://googleads.googleapis.com/v20/{{ stream_partition['customer_id'] }}/googleAds:search"
+        url_base: "https://googleads.googleapis.com/v20/{{ stream_partition['customer_id'] }}/googleAds:searchStream"
         http_method: POST
         error_handler:
           $ref: "#/definitions/base_error_handler"
@@ -430,8 +422,11 @@ definitions:
             parent_key: "clientCustomer"
             partition_field: "customer_id"
             stream: "#/definitions/customer_client"
+      decoder:
+        type: CustomDecoder
+        class_name: "source_google_ads.components.GoogleAdsStreamingDecoder"
       paginator:
-        $ref: "#/definitions/cursor_paginator"
+        type: NoPagination
     transformations:
       - type: CustomTransformation
         class_name: "source_google_ads.components.KeysToSnakeCaseGoogleAdsTransformation"
@@ -492,6 +487,13 @@ definitions:
   ad_group_ad_stream:
     $ref: "#/definitions/incremental_stream_base"
+    retriever:
+      $ref: "#/definitions/incremental_stream_base/retriever"
+      paginator:
+        type: NoPagination
+      decoder:
+        type: CustomDecoder
+        class_name: "source_google_ads.components.GoogleAdsStreamingDecoder"
     name: ad_group_ad
     primary_key:
       - ad_group.id
@@ -669,8 +671,6 @@ definitions:
           $ref: "#/definitions/base_error_handler"
       paginator:
         $ref: "#/definitions/cursor_paginator"
-      decoder:
-        type: JsonDecoder
     incremental_sync:
       type: DatetimeBasedCursor
       cursor_field: segments.date
@@ -845,8 +845,7 @@ definitions:
     $parameters:
       url_base: "https://googleads.googleapis.com/v20/{{ stream_partition['customer_id'] }}/googleAds:search"
     retriever:
-      type: CustomRetriever
-      class_name: "source_google_ads.components.ChangeStatusRetriever"
+      type: SimpleRetriever
       requester:
         type: CustomRequester
         class_name: "source_google_ads.components.ChangeStatusRequester"
@@ -862,6 +861,12 @@ definitions:
         name: change_status
       paginator:
         $ref: "#/definitions/cursor_paginator"
+      pagination_reset:
+        type: PaginationReset
+        action: SPLIT_USING_CURSOR
+        limits:
+          type: PaginationResetLimits
+          number_of_records: 10000
       record_selector:
         type: RecordSelector
         $parameters: