airbyte-cdk 6.18.2__py3-none-any.whl → 6.19.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -678,7 +678,7 @@ definitions:
678
678
  properties:
679
679
  type:
680
680
  type: string
681
- enum: [ CustomSchemaNormalization ]
681
+ enum: [CustomSchemaNormalization]
682
682
  class_name:
683
683
  title: Class Name
684
684
  description: Fully-qualified name of the class that will be implementing the custom normalization. The format is `source_<name>.<package>.<class_name>`.
@@ -2886,6 +2886,7 @@ definitions:
2886
2886
  parser:
2887
2887
  anyOf:
2888
2888
  - "$ref": "#/definitions/GzipParser"
2889
+ - "$ref": "#/definitions/JsonParser"
2889
2890
  - "$ref": "#/definitions/JsonLineParser"
2890
2891
  - "$ref": "#/definitions/CsvParser"
2891
2892
  # PARSERS
@@ -2902,6 +2903,20 @@ definitions:
2902
2903
  anyOf:
2903
2904
  - "$ref": "#/definitions/JsonLineParser"
2904
2905
  - "$ref": "#/definitions/CsvParser"
2906
+ - "$ref": "#/definitions/JsonParser"
2907
+ JsonParser:
2908
+ title: JsonParser
2909
+ description: Parser used for parsing str, bytes, or bytearray data and returning data in a dictionary format.
2910
+ type: object
2911
+ required:
2912
+ - type
2913
+ properties:
2914
+ type:
2915
+ type: string
2916
+ enum: [JsonParser]
2917
+ encoding:
2918
+ type: string
2919
+ default: utf-8
2905
2920
  JsonLineParser:
2906
2921
  type: object
2907
2922
  required:
@@ -7,9 +7,12 @@ from dataclasses import dataclass
7
7
  from io import BufferedIOBase, TextIOWrapper
8
8
  from typing import Any, Generator, MutableMapping, Optional
9
9
 
10
+ import orjson
10
11
  import requests
11
12
 
13
+ from airbyte_cdk.models import FailureType
12
14
  from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
15
+ from airbyte_cdk.utils import AirbyteTracedException
13
16
 
14
17
  logger = logging.getLogger("airbyte")
15
18
 
@@ -42,6 +45,46 @@ class GzipParser(Parser):
42
45
  yield from self.inner_parser.parse(gzipobj)
43
46
 
44
47
 
48
+ @dataclass
49
+ class JsonParser(Parser):
50
+ encoding: str = "utf-8"
51
+
52
+ def parse(self, data: BufferedIOBase) -> Generator[MutableMapping[str, Any], None, None]:
53
+ """
54
+ Attempts to deserialize data using orjson library. As an extra layer of safety we fallback on the json library to deserialize the data.
55
+ """
56
+ raw_data = data.read()
57
+ body_json = self._parse_orjson(raw_data) or self._parse_json(raw_data)
58
+
59
+ if body_json is None:
60
+ raise AirbyteTracedException(
61
+ message="Response JSON data failed to be parsed. See logs for more information.",
62
+ internal_message=f"Response JSON data failed to be parsed.",
63
+ failure_type=FailureType.system_error,
64
+ )
65
+
66
+ if isinstance(body_json, list):
67
+ yield from body_json
68
+ else:
69
+ yield from [body_json]
70
+
71
+ def _parse_orjson(self, raw_data: bytes) -> Optional[Any]:
72
+ try:
73
+ return orjson.loads(raw_data.decode(self.encoding))
74
+ except Exception as exc:
75
+ logger.debug(
76
+ f"Failed to parse JSON data using orjson library. Falling back to json library. {exc}"
77
+ )
78
+ return None
79
+
80
+ def _parse_json(self, raw_data: bytes) -> Optional[Any]:
81
+ try:
82
+ return json.loads(raw_data.decode(self.encoding))
83
+ except Exception as exc:
84
+ logger.error(f"Failed to parse JSON data using json library. {exc}")
85
+ return None
86
+
87
+
45
88
  @dataclass
46
89
  class JsonLineParser(Parser):
47
90
  encoding: Optional[str] = "utf-8"
@@ -1201,6 +1201,14 @@ class LegacySessionTokenAuthenticator(BaseModel):
1201
1201
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1202
1202
 
1203
1203
 
1204
+ class JsonParser(BaseModel):
1205
+ class Config:
1206
+ extra = Extra.allow
1207
+
1208
+ type: Literal["JsonParser"]
1209
+ encoding: Optional[str] = "utf-8"
1210
+
1211
+
1204
1212
  class JsonLineParser(BaseModel):
1205
1213
  type: Literal["JsonLineParser"]
1206
1214
  encoding: Optional[str] = "utf-8"
@@ -1599,7 +1607,7 @@ class RecordSelector(BaseModel):
1599
1607
 
1600
1608
  class GzipParser(BaseModel):
1601
1609
  type: Literal["GzipParser"]
1602
- inner_parser: Union[JsonLineParser, CsvParser]
1610
+ inner_parser: Union[JsonLineParser, CsvParser, JsonParser]
1603
1611
 
1604
1612
 
1605
1613
  class Spec(BaseModel):
@@ -1634,7 +1642,7 @@ class CompositeErrorHandler(BaseModel):
1634
1642
 
1635
1643
  class CompositeRawDecoder(BaseModel):
1636
1644
  type: Literal["CompositeRawDecoder"]
1637
- parser: Union[GzipParser, JsonLineParser, CsvParser]
1645
+ parser: Union[GzipParser, JsonParser, JsonLineParser, CsvParser]
1638
1646
 
1639
1647
 
1640
1648
  class DeclarativeSource1(BaseModel):
@@ -72,6 +72,8 @@ from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
72
72
  CsvParser,
73
73
  GzipParser,
74
74
  JsonLineParser,
75
+ JsonParser,
76
+ Parser,
75
77
  )
76
78
  from airbyte_cdk.sources.declarative.extractors import (
77
79
  DpathExtractor,
@@ -247,6 +249,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
247
249
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
248
250
  JsonLineParser as JsonLineParserModel,
249
251
  )
252
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
253
+ JsonParser as JsonParserModel,
254
+ )
250
255
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
251
256
  JwtAuthenticator as JwtAuthenticatorModel,
252
257
  )
@@ -522,6 +527,7 @@ class ModelToComponentFactory:
522
527
  JsonDecoderModel: self.create_json_decoder,
523
528
  JsonlDecoderModel: self.create_jsonl_decoder,
524
529
  JsonLineParserModel: self.create_json_line_parser,
530
+ JsonParserModel: self.create_json_parser,
525
531
  GzipJsonDecoderModel: self.create_gzipjson_decoder,
526
532
  GzipParserModel: self.create_gzip_parser,
527
533
  KeysToLowerModel: self.create_keys_to_lower_transformation,
@@ -1032,17 +1038,17 @@ class ModelToComponentFactory:
1032
1038
  self, model: CursorPaginationModel, config: Config, decoder: Decoder, **kwargs: Any
1033
1039
  ) -> CursorPaginationStrategy:
1034
1040
  if isinstance(decoder, PaginationDecoderDecorator):
1035
- if not isinstance(decoder.decoder, (JsonDecoder, XmlDecoder)):
1036
- raise ValueError(
1037
- f"Provided decoder of {type(decoder.decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
1038
- )
1041
+ inner_decoder = decoder.decoder
1042
+ else:
1043
+ inner_decoder = decoder
1044
+ decoder = PaginationDecoderDecorator(decoder=decoder)
1045
+
1046
+ if self._is_supported_decoder_for_pagination(inner_decoder):
1039
1047
  decoder_to_use = decoder
1040
1048
  else:
1041
- if not isinstance(decoder, (JsonDecoder, XmlDecoder)):
1042
- raise ValueError(
1043
- f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
1044
- )
1045
- decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
1049
+ raise ValueError(
1050
+ self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder))
1051
+ )
1046
1052
 
1047
1053
  return CursorPaginationStrategy(
1048
1054
  cursor_value=model.cursor_value,
@@ -1515,11 +1521,10 @@ class ModelToComponentFactory:
1515
1521
  cursor_used_for_stop_condition: Optional[DeclarativeCursor] = None,
1516
1522
  ) -> Union[DefaultPaginator, PaginatorTestReadDecorator]:
1517
1523
  if decoder:
1518
- if not isinstance(decoder, (JsonDecoder, XmlDecoder)):
1519
- raise ValueError(
1520
- f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
1521
- )
1522
- decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
1524
+ if self._is_supported_decoder_for_pagination(decoder):
1525
+ decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
1526
+ else:
1527
+ raise ValueError(self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(decoder)))
1523
1528
  else:
1524
1529
  decoder_to_use = PaginationDecoderDecorator(decoder=JsonDecoder(parameters={}))
1525
1530
  page_size_option = (
@@ -1748,6 +1753,11 @@ class ModelToComponentFactory:
1748
1753
  def create_json_decoder(model: JsonDecoderModel, config: Config, **kwargs: Any) -> JsonDecoder:
1749
1754
  return JsonDecoder(parameters={})
1750
1755
 
1756
+ @staticmethod
1757
+ def create_json_parser(model: JsonParserModel, config: Config, **kwargs: Any) -> JsonParser:
1758
+ encoding = model.encoding if model.encoding else "utf-8"
1759
+ return JsonParser(encoding=encoding)
1760
+
1751
1761
  @staticmethod
1752
1762
  def create_jsonl_decoder(
1753
1763
  model: JsonlDecoderModel, config: Config, **kwargs: Any
@@ -1940,22 +1950,22 @@ class ModelToComponentFactory:
1940
1950
  message_repository=self._message_repository,
1941
1951
  )
1942
1952
 
1943
- @staticmethod
1944
1953
  def create_offset_increment(
1945
- model: OffsetIncrementModel, config: Config, decoder: Decoder, **kwargs: Any
1954
+ self, model: OffsetIncrementModel, config: Config, decoder: Decoder, **kwargs: Any
1946
1955
  ) -> OffsetIncrement:
1947
1956
  if isinstance(decoder, PaginationDecoderDecorator):
1948
- if not isinstance(decoder.decoder, (JsonDecoder, XmlDecoder)):
1949
- raise ValueError(
1950
- f"Provided decoder of {type(decoder.decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
1951
- )
1957
+ inner_decoder = decoder.decoder
1958
+ else:
1959
+ inner_decoder = decoder
1960
+ decoder = PaginationDecoderDecorator(decoder=decoder)
1961
+
1962
+ if self._is_supported_decoder_for_pagination(inner_decoder):
1952
1963
  decoder_to_use = decoder
1953
1964
  else:
1954
- if not isinstance(decoder, (JsonDecoder, XmlDecoder)):
1955
- raise ValueError(
1956
- f"Provided decoder of {type(decoder)=} is not supported. Please set JsonDecoder or XmlDecoder instead."
1957
- )
1958
- decoder_to_use = PaginationDecoderDecorator(decoder=decoder)
1965
+ raise ValueError(
1966
+ self._UNSUPPORTED_DECODER_ERROR.format(decoder_type=type(inner_decoder))
1967
+ )
1968
+
1959
1969
  return OffsetIncrement(
1960
1970
  page_size=model.page_size,
1961
1971
  config=config,
@@ -2555,3 +2565,25 @@ class ModelToComponentFactory:
2555
2565
  components_mapping=components_mapping,
2556
2566
  parameters=model.parameters or {},
2557
2567
  )
2568
+
2569
+ _UNSUPPORTED_DECODER_ERROR = (
2570
+ "Specified decoder of {decoder_type} is not supported for pagination."
2571
+ "Please set as `JsonDecoder`, `XmlDecoder`, or a `CompositeRawDecoder` with an inner_parser of `JsonParser` or `GzipParser` instead."
2572
+ "If using `GzipParser`, please ensure that the lowest level inner_parser is a `JsonParser`."
2573
+ )
2574
+
2575
+ def _is_supported_decoder_for_pagination(self, decoder: Decoder) -> bool:
2576
+ if isinstance(decoder, (JsonDecoder, XmlDecoder)):
2577
+ return True
2578
+ elif isinstance(decoder, CompositeRawDecoder):
2579
+ return self._is_supported_parser_for_pagination(decoder.parser)
2580
+ else:
2581
+ return False
2582
+
2583
+ def _is_supported_parser_for_pagination(self, parser: Parser) -> bool:
2584
+ if isinstance(parser, JsonParser):
2585
+ return True
2586
+ elif isinstance(parser, GzipParser):
2587
+ return isinstance(parser.inner_parser, JsonParser)
2588
+ else:
2589
+ return False
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: airbyte-cdk
3
- Version: 6.18.2
3
+ Version: 6.19.0
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  License: MIT
6
6
  Keywords: airbyte,connector-development-kit,cdk
@@ -66,11 +66,11 @@ airbyte_cdk/sources/declarative/concurrent_declarative_source.py,sha256=tSTCSmyM
66
66
  airbyte_cdk/sources/declarative/datetime/__init__.py,sha256=l9LG7Qm6e5r_qgqfVKnx3mXYtg1I9MmMjomVIPfU4XA,177
67
67
  airbyte_cdk/sources/declarative/datetime/datetime_parser.py,sha256=SX9JjdesN1edN2WVUVMzU_ptqp2QB1OnsnjZ4mwcX7w,2579
68
68
  airbyte_cdk/sources/declarative/datetime/min_max_datetime.py,sha256=0BHBtDNQZfvwM45-tY5pNlTcKAFSGGNxemoi0Jic-0E,5785
69
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=00X3palFmBp9WqQugXgtzFVn7s17KYWKTrn83ObmBzc,134673
69
+ airbyte_cdk/sources/declarative/declarative_component_schema.yaml,sha256=IVkRHPGvhEvB2kokL8CAfrvCHpxE_b9Ox5um42br41U,135095
70
70
  airbyte_cdk/sources/declarative/declarative_source.py,sha256=nF7wBqFd3AQmEKAm4CnIo29CJoQL562cJGSCeL8U8bA,1531
71
71
  airbyte_cdk/sources/declarative/declarative_stream.py,sha256=JRyNeOIpsFu4ztVZsN6sncqUEIqIE-bUkD2TPgbMgk0,10375
72
72
  airbyte_cdk/sources/declarative/decoders/__init__.py,sha256=edGj4fGxznBk4xzRQyCA1rGfbpqe7z-RE0K3kQQWbgA,858
73
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py,sha256=-aO3ujXX9YTP2ZDvI2BP-x0VOKdAq2TlHo4zG8DCTlY,2748
73
+ airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py,sha256=kQfUVMVhChKe5OngwIQrs0F9KGnRUN-CKVFakCU23DQ,4354
74
74
  airbyte_cdk/sources/declarative/decoders/decoder.py,sha256=sl-Gt8lXi7yD2Q-sD8je5QS2PbgrgsYjxRLWsay7DMc,826
75
75
  airbyte_cdk/sources/declarative/decoders/json_decoder.py,sha256=qdbjeR6RffKaah_iWvMsOcDolYuxJY5DaI3b9AMTZXg,3327
76
76
  airbyte_cdk/sources/declarative/decoders/noop_decoder.py,sha256=iZh0yKY_JzgBnJWiubEusf5c0o6Khd-8EWFWT-8EgFo,542
@@ -106,12 +106,12 @@ airbyte_cdk/sources/declarative/migrations/__init__.py,sha256=47DEQpj8HBSa-_TImW
106
106
  airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py,sha256=iemy3fKLczcU0-Aor7tx5jcT6DRedKMqyK7kCOp01hg,3924
107
107
  airbyte_cdk/sources/declarative/migrations/state_migration.py,sha256=KWPjealMLKSMtajXgkdGgKg7EmTLR-CqqD7UIh0-eDU,794
108
108
  airbyte_cdk/sources/declarative/models/__init__.py,sha256=nUFxNCiKeYRVXuZEKA7GD-lTHxsiKcQ8FitZjKhPIvE,100
109
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=1wrAW9XeEq2xdUAAkmHcelka-LOwyYb-izRcACkNPKM,94915
109
+ airbyte_cdk/sources/declarative/models/declarative_component_schema.py,sha256=dy9CSSvW1gAoMCAXkoOxLJTRVTrcHpYFENYgLqaUOwM,95087
110
110
  airbyte_cdk/sources/declarative/parsers/__init__.py,sha256=ZnqYNxHsKCgO38IwB34RQyRMXTs4GTvlRi3ImKnIioo,61
111
111
  airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=Rir9_z3Kcd5Es0-LChrzk-0qubAsiK_RSEnLmK2OXm8,553
112
112
  airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=CXwTfD3wSQq3okcqwigpprbHhSURUokh4GK2OmOyKC8,9132
113
113
  airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=IWUOdF03o-aQn0Occo1BJCxU0Pz-QILk5L67nzw2thw,6803
114
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=NElLb7eLDVmxDgtTX9fQ-ZPrpfH3d7RpMDaQiLtvuuQ,110550
114
+ airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=v0Rd3V2b6AWJpQTOVlJbP58jRfNUwuhH22Q2fiA0itc,111475
115
115
  airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=HJ-Syp3p7RpyR_OK0X_a2kSyISfu3W-PKrRI16iY0a8,957
116
116
  airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=n82J15S8bjeMZ5uROu--P3hnbQoxkY5v7RPHYx7g7ro,2929
117
117
  airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=c5cuVFM6NFkuQqG8Z5IwkBuwDrvXZN1CunUOM_L0ezg,6892
@@ -343,8 +343,8 @@ airbyte_cdk/utils/slice_hasher.py,sha256=-pHexlNYoWYPnXNH-M7HEbjmeJe9Zk7SJijdQ7d
343
343
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=-5HTuNsnDBAhj-oLeQXwpTGA0HdcjFOf2zTEMUTTg_Y,816
344
344
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
345
345
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
346
- airbyte_cdk-6.18.2.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
347
- airbyte_cdk-6.18.2.dist-info/METADATA,sha256=Uwnd37XQTCsP3n7GzWDM5mQJsphngbGzpyvqnRyMt7I,6000
348
- airbyte_cdk-6.18.2.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
349
- airbyte_cdk-6.18.2.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
350
- airbyte_cdk-6.18.2.dist-info/RECORD,,
346
+ airbyte_cdk-6.19.0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
347
+ airbyte_cdk-6.19.0.dist-info/METADATA,sha256=QX8m8I4zsR63ujqpxXOBwVISjVjAB8YN-K5e0b_bJAQ,6000
348
+ airbyte_cdk-6.19.0.dist-info/WHEEL,sha256=IYZQI976HJqqOpQU6PHkJ8fb3tMNBFjg-Cn-pwAbaFM,88
349
+ airbyte_cdk-6.19.0.dist-info/entry_points.txt,sha256=fj-e3PAQvsxsQzyyq8UkG1k8spunWnD4BAH2AwlR6NM,95
350
+ airbyte_cdk-6.19.0.dist-info/RECORD,,