airbyte-source-google-ads 4.1.0rc7.dev202510171521__py3-none-any.whl → 4.1.0rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-source-google-ads
3
- Version: 4.1.0rc7.dev202510171521
3
+ Version: 4.1.0rc8
4
4
  Summary: Source implementation for Google Ads.
5
5
  Home-page: https://airbyte.com
6
6
  License: Elv2
@@ -11,7 +11,7 @@ Classifier: License :: Other/Proprietary License
11
11
  Classifier: Programming Language :: Python :: 3
12
12
  Classifier: Programming Language :: Python :: 3.10
13
13
  Classifier: Programming Language :: Python :: 3.11
14
- Requires-Dist: airbyte-cdk (>=7.0.5,<8.0.0)
14
+ Requires-Dist: airbyte-cdk (>=7.4.1,<8.0.0)
15
15
  Requires-Dist: google-ads (==27.0.0)
16
16
  Requires-Dist: pendulum (<3.0.0)
17
17
  Requires-Dist: protobuf (==4.25.2)
@@ -1,8 +1,8 @@
1
1
  source_google_ads/__init__.py,sha256=Nlo5H6LlaSgg7tx_LyqMIy3MXiAagfk3izZ9o44VvSE,1201
2
- source_google_ads/components.py,sha256=7L__n98oQmBGyyqO-rhnK28--2aj1CKtqoxqcwpBjo4,34017
2
+ source_google_ads/components.py,sha256=8Vbomr6tbkF12lZh2ocTLWbtxB0ajrO143qYkpSFgQ4,40831
3
3
  source_google_ads/config_migrations.py,sha256=oBi_qNqBpLS8GNCaIOBo0stNdYuyqVl6lkrhdXRwMX8,4405
4
4
  source_google_ads/google_ads.py,sha256=cxS18tz0fFJjmIhlhFQ3Zvu2K8bhDtmsl1kFeO7nNhk,11595
5
- source_google_ads/manifest.yaml,sha256=ttxdrDhJDLQqKMIu1Jgx4ZywpetsHO5NdEelIAGATZ8,218086
5
+ source_google_ads/manifest.yaml,sha256=ixoaAGH6YJEI_jWe6StVMVNjjb0el90zFzdZR-JF5-8,218667
6
6
  source_google_ads/models.py,sha256=ZmdS3z_2roaEQgV2Mx1CDm33MztpQ66SfHDzP8XwZog,1658
7
7
  source_google_ads/run.py,sha256=ydIyq_vSNV5Z4mJYnsO5GyNDsLDd0qibBsq6wnvuFAo,2002
8
8
  source_google_ads/schemas/customer_client.json,sha256=oThcyUDO1yWpxtWPWdoAFqTXEIweF8N4q6mRI73Q6yU,984
@@ -11,7 +11,7 @@ source_google_ads/source.py,sha256=hz5ep6stMWHNvD73PIF_7bjnee49sY9YHHjaYNAPnOQ,1
11
11
  source_google_ads/spec.json,sha256=8hbc7smbaffIkYCkX2BYJLB9kgaH8vYKCg-H0y1FvUs,7810
12
12
  source_google_ads/streams.py,sha256=FB-DNJlXhjQADptT-wrv3iGWoliyRuvDuHGeqiN9HsY,13349
13
13
  source_google_ads/utils.py,sha256=-KpgGv2W8WueXvGRC3xbVreDl5-5-vU9OwzC5SZDKVc,21409
14
- airbyte_source_google_ads-4.1.0rc7.dev202510171521.dist-info/METADATA,sha256=LohNvWMj-wWNhHf3XoSJeERS7WhdNBvQ_YYbPhbsaAo,5379
15
- airbyte_source_google_ads-4.1.0rc7.dev202510171521.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
16
- airbyte_source_google_ads-4.1.0rc7.dev202510171521.dist-info/entry_points.txt,sha256=pP4Llir93XGkHFDZfXXxK7qOWo9_U1ssCJToyxEUB4w,63
17
- airbyte_source_google_ads-4.1.0rc7.dev202510171521.dist-info/RECORD,,
14
+ airbyte_source_google_ads-4.1.0rc8.dist-info/METADATA,sha256=YWcjHZzCKEs1cjtyjGfc6aRK_4-wmgQnOVQ3Mqah0zY,5363
15
+ airbyte_source_google_ads-4.1.0rc8.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
16
+ airbyte_source_google_ads-4.1.0rc8.dist-info/entry_points.txt,sha256=pP4Llir93XGkHFDZfXXxK7qOWo9_U1ssCJToyxEUB4w,63
17
+ airbyte_source_google_ads-4.1.0rc8.dist-info/RECORD,,
@@ -2,18 +2,21 @@
2
2
  # Copyright (c) 2025 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
+ import io
5
6
  import json
6
7
  import logging
7
8
  import re
8
9
  import threading
9
- from dataclasses import dataclass
10
+ from dataclasses import dataclass, field
10
11
  from itertools import groupby
11
- from typing import Any, Callable, Dict, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union
12
+ from typing import Any, Callable, Dict, Generator, Iterable, List, Mapping, MutableMapping, Optional, Tuple, Union
12
13
 
13
14
  import anyascii
14
15
  import requests
15
16
 
16
17
  from airbyte_cdk import AirbyteTracedException, FailureType, InterpolatedString
18
+ from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import JsonParser
19
+ from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
17
20
  from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
18
21
  from airbyte_cdk.sources.declarative.extractors.record_filter import RecordFilter
19
22
  from airbyte_cdk.sources.declarative.migrations.state_migration import StateMigration
@@ -285,6 +288,10 @@ class GoogleAdsHttpRequester(HttpRequester):
285
288
 
286
289
  schema_loader: InlineSchemaLoader = None
287
290
 
291
+ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
292
+ super().__post_init__(parameters)
293
+ self.stream_response = True
294
+
288
295
  def get_request_body_json(
289
296
  self,
290
297
  *,
@@ -408,54 +415,6 @@ class KeysToSnakeCaseGoogleAdsTransformation(RecordTransformation):
408
415
  return "_".join(token.lower() for token in tokens)
409
416
 
410
417
 
411
- @dataclass
412
- class ChangeStatusRetriever(SimpleRetriever):
413
- """
414
- Retrieves change status records from the Google Ads API.
415
- ChangeStatus stream requires custom retriever because Google Ads API requires limit for this stream to be set to 10,000.
416
- When the number of records exceeds this limit, we need to adjust the start date to the last record's cursor.
417
- """
418
-
419
- QUERY_LIMIT = 10000
420
- cursor_field: str = "change_status.last_change_date_time"
421
-
422
- def _read_pages(
423
- self,
424
- records_generator_fn: Callable[[Optional[Mapping]], Iterable[Record]],
425
- stream_state: StreamState,
426
- stream_slice: StreamSlice,
427
- ) -> Iterable[Record]:
428
- """
429
- Since this stream doesn’t support “real” pagination, we treat each HTTP
430
- call as a slice defined by a start_date / end_date. If we hit the
431
- QUERY_LIMIT exactly, we assume there may be more data at the end of that
432
- slice, so we bump start_date forward to the last-record cursor and retry.
433
- """
434
- while True:
435
- record_count = 0
436
- last_record = None
437
- response = self._fetch_next_page(stream_state, stream_slice)
438
-
439
- # Yield everything we got
440
- for rec in records_generator_fn(response):
441
- record_count += 1
442
- last_record = rec
443
- yield rec
444
-
445
- if record_count < self.QUERY_LIMIT:
446
- break
447
-
448
- # Update the stream slice start time to the last record's cursor
449
- last_cursor = last_record[self.cursor_field]
450
- cursor_slice = stream_slice.cursor_slice
451
- cursor_slice["start_time"] = last_cursor
452
- stream_slice = StreamSlice(
453
- partition=stream_slice.partition,
454
- cursor_slice=cursor_slice,
455
- extra_fields=stream_slice.extra_fields,
456
- )
457
-
458
-
459
418
  @dataclass
460
419
  class ChangeStatusRequester(GoogleAdsHttpRequester):
461
420
  CURSOR_FIELD: str = "change_status.last_change_date_time"
@@ -501,7 +460,6 @@ class CriterionRetriever(SimpleRetriever):
501
460
  def _read_pages(
502
461
  self,
503
462
  records_generator_fn: Callable[[Optional[Mapping]], Iterable[Record]],
504
- stream_state: StreamState,
505
463
  stream_slice: StreamSlice,
506
464
  ) -> Iterable[Record]:
507
465
  """
@@ -531,6 +489,7 @@ class CriterionRetriever(SimpleRetriever):
531
489
  self.primary_key[0]: _id,
532
490
  "deleted_at": ts,
533
491
  },
492
+ associated_slice=stream_slice,
534
493
  stream_name=self.name,
535
494
  )
536
495
  else:
@@ -550,7 +509,7 @@ class CriterionRetriever(SimpleRetriever):
550
509
  cursor_slice=stream_slice.cursor_slice,
551
510
  extra_fields={"change_status.last_change_date_time": updated_times},
552
511
  )
553
- response = self._fetch_next_page(stream_state, new_slice)
512
+ response = self._fetch_next_page(new_slice)
554
513
  for rec in records_generator_fn(response):
555
514
  # attach timestamp from ChangeStatus
556
515
  rec.data[self.cursor_field] = time_map.get(rec.data.get(self.primary_key[0]))
@@ -618,13 +577,26 @@ class GoogleAdsCriterionParentStateMigration(StateMigration):
618
577
  """
619
578
 
620
579
  def should_migrate(self, stream_state: Mapping[str, Any]) -> bool:
621
- return stream_state and "parent_state" not in stream_state
580
+ return stream_state and not stream_state.get("parent_state")
622
581
 
623
582
  def migrate(self, stream_state: Mapping[str, Any]) -> Mapping[str, Any]:
624
583
  if not self.should_migrate(stream_state):
625
584
  return stream_state
626
585
 
627
- return {"parent_state": stream_state}
586
+ return {"parent_state": {"change_status": stream_state}}
587
+
588
+
589
+ class GoogleAdsGlobalStateMigration(StateMigration):
590
+ """
591
+ Migrates global state to include use_global_cursor key. Previously legacy GlobalSubstreamCursor was used.
592
+ """
593
+
594
+ def should_migrate(self, stream_state: Mapping[str, Any]) -> bool:
595
+ return stream_state and not stream_state.get("use_global_cursor")
596
+
597
+ def migrate(self, stream_state: Mapping[str, Any]) -> Mapping[str, Any]:
598
+ stream_state["use_global_cursor"] = True
599
+ return stream_state
628
600
 
629
601
 
630
602
  @dataclass(repr=False, eq=False, frozen=True)
@@ -889,3 +861,212 @@ class CustomGAQuerySchemaLoader(SchemaLoader):
889
861
  internal_message=f"The provided query is invalid: {query}. Please refer to the Google Ads API documentation for the correct syntax: https://developers.google.com/google-ads/api/fields/v20/overview and test validate your query using the Google Ads Query Builder: https://developers.google.com/google-ads/api/fields/v20/query_validator",
890
862
  message=f"The provided query is invalid: {query}. Please refer to the Google Ads API documentation for the correct syntax: https://developers.google.com/google-ads/api/fields/v20/overview and test validate your query using the Google Ads Query Builder: https://developers.google.com/google-ads/api/fields/v20/query_validator",
891
863
  )
864
+
865
+
866
+ @dataclass
867
+ class StringParseState:
868
+ inside_string: bool = False
869
+ escape_next_character: bool = False
870
+ collected_string_chars: List[str] = field(default_factory=list)
871
+ last_parsed_key: Optional[str] = None
872
+
873
+
874
+ @dataclass
875
+ class TopLevelObjectState:
876
+ depth: int = 0
877
+
878
+
879
+ @dataclass
880
+ class ResultsArrayState:
881
+ inside_results_array: bool = False
882
+ array_nesting_depth: int = 0
883
+ expecting_results_array_start: bool = False
884
+
885
+
886
+ @dataclass
887
+ class RecordParseState:
888
+ inside_record: bool = False
889
+ record_text_buffer: List[str] = field(default_factory=list)
890
+ record_nesting_depth: int = 0
891
+
892
+
893
+ @dataclass
894
+ class GoogleAdsStreamingDecoder(Decoder):
895
+ """
896
+ JSON streaming decoder optimized for Google Ads API responses.
897
+
898
+ Uses a fast JSON parse when the full payload fits within max_direct_decode_bytes;
899
+ otherwise streams records incrementally from the `results` array.
900
+ Ensures truncated or structurally invalid JSON is detected and reported.
901
+ """
902
+
903
+ chunk_size: int = 5 * 1024 * 1024 # 5 MB
904
+ # Fast-path threshold: if whole body < 20 MB, decode with json.loads
905
+ max_direct_decode_bytes: int = 20 * 1024 * 1024 # 20 MB
906
+
907
+ def __post_init__(self):
908
+ self.parser = JsonParser()
909
+
910
+ def is_stream_response(self) -> bool:
911
+ return True
912
+
913
+ def decode(self, response: requests.Response) -> Generator[MutableMapping[str, Any], None, None]:
914
+ data, complete = self._buffer_up_to_limit(response)
915
+ if complete:
916
+ yield from self.parser.parse(io.BytesIO(data))
917
+ return
918
+
919
+ records_batch: List[Dict[str, Any]] = []
920
+ for record in self._parse_records_from_stream(data):
921
+ records_batch.append(record)
922
+ if len(records_batch) >= 100:
923
+ yield {"results": records_batch}
924
+ records_batch = []
925
+
926
+ if records_batch:
927
+ yield {"results": records_batch}
928
+
929
+ def _buffer_up_to_limit(self, response: requests.Response) -> Tuple[Union[bytes, Iterable[bytes]], bool]:
930
+ buf = bytearray()
931
+ response_stream = response.iter_content(chunk_size=self.chunk_size)
932
+
933
+ while chunk := next(response_stream, None):
934
+ buf.extend(chunk)
935
+ if len(buf) >= self.max_direct_decode_bytes:
936
+ return (self._chain_prefix_and_stream(bytes(buf), response_stream), False)
937
+ return (bytes(buf), True)
938
+
939
+ @staticmethod
940
+ def _chain_prefix_and_stream(prefix: bytes, rest_stream: Iterable[bytes]) -> Iterable[bytes]:
941
+ yield prefix
942
+ yield from rest_stream
943
+
944
+ def _parse_records_from_stream(self, byte_iter: Iterable[bytes], encoding: str = "utf-8") -> Generator[Dict[str, Any], None, None]:
945
+ string_state = StringParseState()
946
+ results_state = ResultsArrayState()
947
+ record_state = RecordParseState()
948
+ top_level_state = TopLevelObjectState()
949
+
950
+ for chunk in byte_iter:
951
+ for char in chunk.decode(encoding, errors="replace"):
952
+ self._append_to_current_record_if_any(char, record_state)
953
+
954
+ if self._update_string_state(char, string_state):
955
+ continue
956
+
957
+ # Track outer braces only outside results array
958
+ if not results_state.inside_results_array:
959
+ if char == "{":
960
+ top_level_state.depth += 1
961
+ elif char == "}":
962
+ top_level_state.depth = max(0, top_level_state.depth - 1)
963
+
964
+ if not results_state.inside_results_array:
965
+ self._detect_results_array(char, string_state, results_state)
966
+ continue
967
+
968
+ record = self._parse_record_structure(char, results_state, record_state)
969
+ if record is not None:
970
+ yield record
971
+
972
+ # EOF validation
973
+ if (
974
+ string_state.inside_string
975
+ or record_state.inside_record
976
+ or record_state.record_nesting_depth != 0
977
+ or results_state.inside_results_array
978
+ or results_state.array_nesting_depth != 0
979
+ or top_level_state.depth != 0
980
+ ):
981
+ raise AirbyteTracedException(
982
+ message="Response JSON stream ended prematurely and is incomplete.",
983
+ internal_message=(
984
+ "Detected truncated JSON stream: one or more structural elements were not fully closed before the response ended."
985
+ ),
986
+ failure_type=FailureType.system_error,
987
+ )
988
+
989
+ def _update_string_state(self, char: str, state: StringParseState) -> bool:
990
+ """Return True if char was handled as part of string parsing."""
991
+ if state.inside_string:
992
+ if state.escape_next_character:
993
+ state.escape_next_character = False
994
+ return True
995
+ if char == "\\":
996
+ state.escape_next_character = True
997
+ return True
998
+ if char == '"':
999
+ state.inside_string = False
1000
+ state.last_parsed_key = "".join(state.collected_string_chars)
1001
+ state.collected_string_chars.clear()
1002
+ return True
1003
+ state.collected_string_chars.append(char)
1004
+ return True
1005
+
1006
+ if char == '"':
1007
+ state.inside_string = True
1008
+ state.collected_string_chars.clear()
1009
+ return True
1010
+
1011
+ return False
1012
+
1013
+ def _detect_results_array(self, char: str, string_state: StringParseState, results_state: ResultsArrayState) -> None:
1014
+ if char == ":" and string_state.last_parsed_key == "results":
1015
+ results_state.expecting_results_array_start = True
1016
+ elif char == "[" and results_state.expecting_results_array_start:
1017
+ results_state.inside_results_array = True
1018
+ results_state.array_nesting_depth = 1
1019
+ results_state.expecting_results_array_start = False
1020
+
1021
+ def _parse_record_structure(
1022
+ self, char: str, results_state: ResultsArrayState, record_state: RecordParseState
1023
+ ) -> Optional[Dict[str, Any]]:
1024
+ if char == "{":
1025
+ if record_state.inside_record:
1026
+ record_state.record_nesting_depth += 1
1027
+ else:
1028
+ self._start_record(record_state)
1029
+ return None
1030
+
1031
+ if char == "}":
1032
+ if record_state.inside_record:
1033
+ record_state.record_nesting_depth -= 1
1034
+ if record_state.record_nesting_depth == 0:
1035
+ return self._finish_record(record_state)
1036
+ return None
1037
+
1038
+ if char == "[":
1039
+ if record_state.inside_record:
1040
+ record_state.record_nesting_depth += 1
1041
+ else:
1042
+ results_state.array_nesting_depth += 1
1043
+ return None
1044
+
1045
+ if char == "]":
1046
+ if record_state.inside_record:
1047
+ record_state.record_nesting_depth -= 1
1048
+ else:
1049
+ results_state.array_nesting_depth -= 1
1050
+ if results_state.array_nesting_depth == 0:
1051
+ results_state.inside_results_array = False
1052
+
1053
+ return None
1054
+
1055
+ @staticmethod
1056
+ def _append_to_current_record_if_any(char: str, record_state: RecordParseState):
1057
+ if record_state.inside_record:
1058
+ record_state.record_text_buffer.append(char)
1059
+
1060
+ @staticmethod
1061
+ def _start_record(record_state: RecordParseState):
1062
+ record_state.inside_record = True
1063
+ record_state.record_text_buffer = ["{"]
1064
+ record_state.record_nesting_depth = 1
1065
+
1066
+ @staticmethod
1067
+ def _finish_record(record_state: RecordParseState) -> Optional[Dict[str, Any]]:
1068
+ text = "".join(record_state.record_text_buffer).strip()
1069
+ record_state.inside_record = False
1070
+ record_state.record_text_buffer.clear()
1071
+ record_state.record_nesting_depth = 0
1072
+ return json.loads(text) if text else None
@@ -38,7 +38,7 @@ definitions:
38
38
  $ref: "#/schemas"
39
39
  authenticator:
40
40
  $ref: "#/definitions/authenticator"
41
- url_base: "https://googleads.googleapis.com/v20/{{ stream_partition['customer_id'] }}/googleAds:search"
41
+ url_base: "https://googleads.googleapis.com/v20/{{ stream_partition['customer_id'] }}/googleAds:searchStream"
42
42
  http_method: POST
43
43
  error_handler:
44
44
  $ref: "#/definitions/base_error_handler"
@@ -56,7 +56,7 @@ definitions:
56
56
  action: IGNORE
57
57
  http_codes:
58
58
  - 403
59
- error_message_contains: "The customer account can\\'t be accessed because it is not yet enabled or has been deactivated."
59
+ # error_message_contains: "The customer account can\\'t be accessed because it is not yet enabled or has been deactivated."
60
60
 
61
61
  base_selector:
62
62
  type: RecordSelector
@@ -285,6 +285,8 @@ definitions:
285
285
  state_migrations:
286
286
  - type: CustomStateMigration
287
287
  class_name: source_google_ads.components.GoogleAdsCriterionParentStateMigration
288
+ - type: CustomStateMigration
289
+ class_name: source_google_ads.components.GoogleAdsGlobalStateMigration
288
290
 
289
291
  accessible_accounts:
290
292
  $ref: "#/definitions/stream_base"
@@ -401,7 +403,7 @@ definitions:
401
403
  class_name: "source_google_ads.components.CustomGAQueryHttpRequester"
402
404
  authenticator:
403
405
  $ref: "#/definitions/authenticator"
404
- url_base: "https://googleads.googleapis.com/v20/{{ stream_partition['customer_id'] }}/googleAds:search"
406
+ url_base: "https://googleads.googleapis.com/v20/{{ stream_partition['customer_id'] }}/googleAds:searchStream"
405
407
  http_method: POST
406
408
  error_handler:
407
409
  $ref: "#/definitions/base_error_handler"
@@ -420,8 +422,11 @@ definitions:
420
422
  parent_key: "clientCustomer"
421
423
  partition_field: "customer_id"
422
424
  stream: "#/definitions/customer_client"
425
+ decoder:
426
+ type: CustomDecoder
427
+ class_name: "source_google_ads.components.GoogleAdsStreamingDecoder"
423
428
  paginator:
424
- $ref: "#/definitions/cursor_paginator"
429
+ type: NoPagination
425
430
  transformations:
426
431
  - type: CustomTransformation
427
432
  class_name: "source_google_ads.components.KeysToSnakeCaseGoogleAdsTransformation"
@@ -482,6 +487,13 @@ definitions:
482
487
 
483
488
  ad_group_ad_stream:
484
489
  $ref: "#/definitions/incremental_stream_base"
490
+ retriever:
491
+ $ref: "#/definitions/incremental_stream_base/retriever"
492
+ paginator:
493
+ type: NoPagination
494
+ decoder:
495
+ type: CustomDecoder
496
+ class_name: "source_google_ads.components.GoogleAdsStreamingDecoder"
485
497
  name: ad_group_ad
486
498
  primary_key:
487
499
  - ad_group.id
@@ -833,8 +845,7 @@ definitions:
833
845
  $parameters:
834
846
  url_base: "https://googleads.googleapis.com/v20/{{ stream_partition['customer_id'] }}/googleAds:search"
835
847
  retriever:
836
- type: CustomRetriever
837
- class_name: "source_google_ads.components.ChangeStatusRetriever"
848
+ type: SimpleRetriever
838
849
  requester:
839
850
  type: CustomRequester
840
851
  class_name: "source_google_ads.components.ChangeStatusRequester"
@@ -850,6 +861,12 @@ definitions:
850
861
  name: change_status
851
862
  paginator:
852
863
  $ref: "#/definitions/cursor_paginator"
864
+ pagination_reset:
865
+ type: PaginationReset
866
+ action: SPLIT_USING_CURSOR
867
+ limits:
868
+ type: PaginationResetLimits
869
+ number_of_records: 10000
853
870
  record_selector:
854
871
  type: RecordSelector
855
872
  $parameters: