airbyte-cdk 6.20.2.dev0__py3-none-any.whl → 6.21.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. airbyte_cdk/sources/declarative/auth/oauth.py +34 -0
  2. airbyte_cdk/sources/declarative/checks/__init__.py +18 -2
  3. airbyte_cdk/sources/declarative/checks/check_dynamic_stream.py +51 -0
  4. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +16 -80
  5. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +123 -21
  6. airbyte_cdk/sources/declarative/decoders/__init__.py +9 -1
  7. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +43 -0
  8. airbyte_cdk/sources/declarative/decoders/zipfile_decoder.py +59 -0
  9. airbyte_cdk/sources/declarative/extractors/record_filter.py +5 -3
  10. airbyte_cdk/sources/declarative/incremental/__init__.py +0 -6
  11. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +0 -3
  12. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +0 -15
  13. airbyte_cdk/sources/declarative/manifest_declarative_source.py +2 -1
  14. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +112 -27
  15. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +127 -106
  16. airbyte_cdk/sources/declarative/requesters/README.md +56 -0
  17. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +33 -4
  18. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +1 -1
  19. airbyte_cdk/sources/declarative/schema/dynamic_schema_loader.py +13 -3
  20. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +11 -0
  21. airbyte_cdk/sources/file_based/exceptions.py +34 -0
  22. airbyte_cdk/sources/file_based/file_based_source.py +28 -5
  23. airbyte_cdk/sources/file_based/file_based_stream_reader.py +18 -4
  24. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +25 -2
  25. airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +30 -2
  26. airbyte_cdk/sources/streams/concurrent/cursor.py +21 -30
  27. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +33 -4
  28. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +42 -4
  29. airbyte_cdk/sources/types.py +3 -0
  30. airbyte_cdk/sources/utils/transform.py +29 -3
  31. {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/METADATA +1 -1
  32. {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/RECORD +35 -33
  33. airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +0 -331
  34. {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/LICENSE.txt +0 -0
  35. {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/WHEEL +0 -0
  36. {airbyte_cdk-6.20.2.dev0.dist-info → airbyte_cdk-6.21.0.dist-info}/entry_points.txt +0 -0
@@ -7,9 +7,12 @@ from dataclasses import dataclass
7
7
  from io import BufferedIOBase, TextIOWrapper
8
8
  from typing import Any, Generator, MutableMapping, Optional
9
9
 
10
+ import orjson
10
11
  import requests
11
12
 
13
+ from airbyte_cdk.models import FailureType
12
14
  from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
15
+ from airbyte_cdk.utils import AirbyteTracedException
13
16
 
14
17
  logger = logging.getLogger("airbyte")
15
18
 
@@ -42,6 +45,46 @@ class GzipParser(Parser):
42
45
  yield from self.inner_parser.parse(gzipobj)
43
46
 
44
47
 
48
+ @dataclass
49
+ class JsonParser(Parser):
50
+ encoding: str = "utf-8"
51
+
52
+ def parse(self, data: BufferedIOBase) -> Generator[MutableMapping[str, Any], None, None]:
53
+ """
54
+ Attempts to deserialize data using orjson library. As an extra layer of safety we fallback on the json library to deserialize the data.
55
+ """
56
+ raw_data = data.read()
57
+ body_json = self._parse_orjson(raw_data) or self._parse_json(raw_data)
58
+
59
+ if body_json is None:
60
+ raise AirbyteTracedException(
61
+ message="Response JSON data failed to be parsed. See logs for more information.",
62
+ internal_message=f"Response JSON data failed to be parsed.",
63
+ failure_type=FailureType.system_error,
64
+ )
65
+
66
+ if isinstance(body_json, list):
67
+ yield from body_json
68
+ else:
69
+ yield from [body_json]
70
+
71
+ def _parse_orjson(self, raw_data: bytes) -> Optional[Any]:
72
+ try:
73
+ return orjson.loads(raw_data.decode(self.encoding))
74
+ except Exception as exc:
75
+ logger.debug(
76
+ f"Failed to parse JSON data using orjson library. Falling back to json library. {exc}"
77
+ )
78
+ return None
79
+
80
+ def _parse_json(self, raw_data: bytes) -> Optional[Any]:
81
+ try:
82
+ return json.loads(raw_data.decode(self.encoding))
83
+ except Exception as exc:
84
+ logger.error(f"Failed to parse JSON data using json library. {exc}")
85
+ return None
86
+
87
+
45
88
  @dataclass
46
89
  class JsonLineParser(Parser):
47
90
  encoding: Optional[str] = "utf-8"
@@ -0,0 +1,59 @@
1
+ #
2
+ # Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3
+ #
4
+
5
+ import logging
6
+ import zipfile
7
+ from dataclasses import dataclass
8
+ from io import BytesIO
9
+ from typing import Any, Generator, MutableMapping
10
+
11
+ import orjson
12
+ import requests
13
+
14
+ from airbyte_cdk.models import FailureType
15
+ from airbyte_cdk.sources.declarative.decoders import Decoder
16
+ from airbyte_cdk.sources.declarative.decoders.composite_raw_decoder import (
17
+ Parser,
18
+ )
19
+ from airbyte_cdk.utils import AirbyteTracedException
20
+
21
+ logger = logging.getLogger("airbyte")
22
+
23
+
24
+ @dataclass
25
+ class ZipfileDecoder(Decoder):
26
+ parser: Parser
27
+
28
+ def is_stream_response(self) -> bool:
29
+ return False
30
+
31
+ def decode(
32
+ self, response: requests.Response
33
+ ) -> Generator[MutableMapping[str, Any], None, None]:
34
+ try:
35
+ with zipfile.ZipFile(BytesIO(response.content)) as zip_file:
36
+ for file_name in zip_file.namelist():
37
+ unzipped_content = zip_file.read(file_name)
38
+ buffered_content = BytesIO(unzipped_content)
39
+ try:
40
+ yield from self.parser.parse(buffered_content)
41
+ except Exception as e:
42
+ logger.error(
43
+ f"Failed to parse file: {file_name} from zip file: {response.request.url} with exception {e}."
44
+ )
45
+ raise AirbyteTracedException(
46
+ message=f"Failed to parse file: {file_name} from zip file.",
47
+ internal_message=f"Failed to parse file: {file_name} from zip file: {response.request.url}.",
48
+ failure_type=FailureType.system_error,
49
+ ) from e
50
+ except zipfile.BadZipFile as e:
51
+ logger.error(
52
+ f"Received an invalid zip file in response to URL: {response.request.url}. "
53
+ f"The size of the response body is: {len(response.content)}"
54
+ )
55
+ raise AirbyteTracedException(
56
+ message="Received an invalid zip file in response.",
57
+ internal_message=f"Received an invalid zip file in response to URL: {response.request.url}.",
58
+ failure_type=FailureType.system_error,
59
+ ) from e
@@ -59,11 +59,13 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
59
59
 
60
60
  def __init__(
61
61
  self,
62
- cursor: Union[DatetimeBasedCursor, PerPartitionWithGlobalCursor, GlobalSubstreamCursor],
62
+ date_time_based_cursor: DatetimeBasedCursor,
63
+ substream_cursor: Optional[Union[PerPartitionWithGlobalCursor, GlobalSubstreamCursor]],
63
64
  **kwargs: Any,
64
65
  ):
65
66
  super().__init__(**kwargs)
66
- self._cursor = cursor
67
+ self._date_time_based_cursor = date_time_based_cursor
68
+ self._substream_cursor = substream_cursor
67
69
 
68
70
  def filter_records(
69
71
  self,
@@ -75,7 +77,7 @@ class ClientSideIncrementalRecordFilterDecorator(RecordFilter):
75
77
  records = (
76
78
  record
77
79
  for record in records
78
- if self._cursor.should_be_synced(
80
+ if (self._substream_cursor or self._date_time_based_cursor).should_be_synced(
79
81
  # Record is created on the fly to align with cursors interface; stream name is ignored as we don't need it here
80
82
  # Record stream name is empty cause it is not used durig the filtering
81
83
  Record(data=record, associated_slice=stream_slice, stream_name="")
@@ -2,10 +2,6 @@
2
2
  # Copyright (c) 2022 Airbyte, Inc., all rights reserved.
3
3
  #
4
4
 
5
- from airbyte_cdk.sources.declarative.incremental.concurrent_partition_cursor import (
6
- ConcurrentCursorFactory,
7
- ConcurrentPerPartitionCursor,
8
- )
9
5
  from airbyte_cdk.sources.declarative.incremental.datetime_based_cursor import DatetimeBasedCursor
10
6
  from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
11
7
  from airbyte_cdk.sources.declarative.incremental.global_substream_cursor import (
@@ -25,8 +21,6 @@ from airbyte_cdk.sources.declarative.incremental.resumable_full_refresh_cursor i
25
21
 
26
22
  __all__ = [
27
23
  "CursorFactory",
28
- "ConcurrentCursorFactory",
29
- "ConcurrentPerPartitionCursor",
30
24
  "DatetimeBasedCursor",
31
25
  "DeclarativeCursor",
32
26
  "GlobalSubstreamCursor",
@@ -64,9 +64,6 @@ class Timer:
64
64
  else:
65
65
  raise RuntimeError("Global substream cursor timer not started")
66
66
 
67
- def is_running(self) -> bool:
68
- return self._start is not None
69
-
70
67
 
71
68
  class GlobalSubstreamCursor(DeclarativeCursor):
72
69
  """
@@ -303,21 +303,6 @@ class PerPartitionCursor(DeclarativeCursor):
303
303
  raise ValueError("A partition needs to be provided in order to get request body json")
304
304
 
305
305
  def should_be_synced(self, record: Record) -> bool:
306
- if (
307
- record.associated_slice
308
- and self._to_partition_key(record.associated_slice.partition)
309
- not in self._cursor_per_partition
310
- ):
311
- partition_state = (
312
- self._state_to_migrate_from
313
- if self._state_to_migrate_from
314
- else self._NO_CURSOR_STATE
315
- )
316
- cursor = self._create_cursor(partition_state)
317
-
318
- self._cursor_per_partition[
319
- self._to_partition_key(record.associated_slice.partition)
320
- ] = cursor
321
306
  return self._get_cursor(record).should_be_synced(
322
307
  self._convert_record_to_cursor_record(record)
323
308
  )
@@ -22,6 +22,7 @@ from airbyte_cdk.models import (
22
22
  ConnectorSpecification,
23
23
  FailureType,
24
24
  )
25
+ from airbyte_cdk.sources.declarative.checks import COMPONENTS_CHECKER_TYPE_MAPPING
25
26
  from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker
26
27
  from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
27
28
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
@@ -107,7 +108,7 @@ class ManifestDeclarativeSource(DeclarativeSource):
107
108
  if "type" not in check:
108
109
  check["type"] = "CheckStream"
109
110
  check_stream = self._constructor.create_component(
110
- CheckStreamModel,
111
+ COMPONENTS_CHECKER_TYPE_MAPPING[check["type"]],
111
112
  check,
112
113
  dict(),
113
114
  emit_connector_builder_messages=self._emit_connector_builder_messages,
@@ -52,6 +52,15 @@ class CheckStream(BaseModel):
52
52
  )
53
53
 
54
54
 
55
+ class CheckDynamicStream(BaseModel):
56
+ type: Literal["CheckDynamicStream"]
57
+ stream_count: int = Field(
58
+ ...,
59
+ description="Numbers of the streams to try reading from when running a check operation.",
60
+ title="Stream Count",
61
+ )
62
+
63
+
55
64
  class ConcurrencyLevel(BaseModel):
56
65
  type: Optional[Literal["ConcurrencyLevel"]] = None
57
66
  default_concurrency: Union[int, str] = Field(
@@ -481,12 +490,24 @@ class RefreshTokenUpdater(BaseModel):
481
490
 
482
491
  class OAuthAuthenticator(BaseModel):
483
492
  type: Literal["OAuthAuthenticator"]
493
+ client_id_name: Optional[str] = Field(
494
+ "client_id",
495
+ description="The name of the property to use to refresh the `access_token`.",
496
+ examples=["custom_app_id"],
497
+ title="Client ID Property Name",
498
+ )
484
499
  client_id: str = Field(
485
500
  ...,
486
501
  description="The OAuth client ID. Fill it in the user inputs.",
487
502
  examples=["{{ config['client_id }}", "{{ config['credentials']['client_id }}"],
488
503
  title="Client ID",
489
504
  )
505
+ client_secret_name: Optional[str] = Field(
506
+ "client_secret",
507
+ description="The name of the property to use to refresh the `access_token`.",
508
+ examples=["custom_app_secret"],
509
+ title="Client Secret Property Name",
510
+ )
490
511
  client_secret: str = Field(
491
512
  ...,
492
513
  description="The OAuth client secret. Fill it in the user inputs.",
@@ -496,6 +517,12 @@ class OAuthAuthenticator(BaseModel):
496
517
  ],
497
518
  title="Client Secret",
498
519
  )
520
+ refresh_token_name: Optional[str] = Field(
521
+ "refresh_token",
522
+ description="The name of the property to use to refresh the `access_token`.",
523
+ examples=["custom_app_refresh_value"],
524
+ title="Refresh Token Property Name",
525
+ )
499
526
  refresh_token: Optional[str] = Field(
500
527
  None,
501
528
  description="Credential artifact used to get a new access token.",
@@ -529,6 +556,12 @@ class OAuthAuthenticator(BaseModel):
529
556
  examples=["expires_in"],
530
557
  title="Token Expiry Property Name",
531
558
  )
559
+ grant_type_name: Optional[str] = Field(
560
+ "grant_type",
561
+ description="The name of the property to use to refresh the `access_token`.",
562
+ examples=["custom_grant_type"],
563
+ title="Grant Type Property Name",
564
+ )
532
565
  grant_type: Optional[str] = Field(
533
566
  "refresh_token",
534
567
  description="Specifies the OAuth2 grant type. If set to refresh_token, the refresh_token needs to be provided as well. For client_credentials, only client id and secret are required. Other grant types are not officially supported.",
@@ -547,6 +580,17 @@ class OAuthAuthenticator(BaseModel):
547
580
  ],
548
581
  title="Refresh Request Body",
549
582
  )
583
+ refresh_request_headers: Optional[Dict[str, Any]] = Field(
584
+ None,
585
+ description="Headers of the request sent to get a new access token.",
586
+ examples=[
587
+ {
588
+ "Authorization": "<AUTH_TOKEN>",
589
+ "Content-Type": "application/x-www-form-urlencoded",
590
+ }
591
+ ],
592
+ title="Refresh Request Headers",
593
+ )
550
594
  scopes: Optional[List[str]] = Field(
551
595
  None,
552
596
  description="List of scopes that should be granted to the access token.",
@@ -675,6 +719,7 @@ class HttpResponseFilter(BaseModel):
675
719
  class TypesMap(BaseModel):
676
720
  target_type: Union[str, List[str]]
677
721
  current_type: Union[str, List[str]]
722
+ condition: Optional[str]
678
723
 
679
724
 
680
725
  class SchemaTypeIdentifier(BaseModel):
@@ -737,33 +782,43 @@ class KeysToSnakeCase(BaseModel):
737
782
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
738
783
 
739
784
 
785
+ class FlattenFields(BaseModel):
786
+ type: Literal["FlattenFields"]
787
+ flatten_lists: Optional[bool] = Field(
788
+ True,
789
+ description="Whether to flatten lists or leave it as is. Default is True.",
790
+ title="Flatten Lists",
791
+ )
792
+ parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
793
+
794
+
740
795
  class KeysReplace(BaseModel):
741
796
  type: Literal["KeysReplace"]
742
797
  old: str = Field(
743
798
  ...,
744
799
  description="Old value to replace.",
745
- examples=[" ", "{{ record.id }}", "{{ config['id'] }}", "{{ stream_slice['id'] }}"],
800
+ examples=[
801
+ " ",
802
+ "{{ record.id }}",
803
+ "{{ config['id'] }}",
804
+ "{{ stream_slice['id'] }}",
805
+ ],
746
806
  title="Old value",
747
807
  )
748
808
  new: str = Field(
749
809
  ...,
750
810
  description="New value to set.",
751
- examples=["_", "{{ record.id }}", "{{ config['id'] }}", "{{ stream_slice['id'] }}"],
811
+ examples=[
812
+ "_",
813
+ "{{ record.id }}",
814
+ "{{ config['id'] }}",
815
+ "{{ stream_slice['id'] }}",
816
+ ],
752
817
  title="New value",
753
818
  )
754
819
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
755
820
 
756
821
 
757
- class FlattenFields(BaseModel):
758
- type: Literal["FlattenFields"]
759
- flatten_lists: Optional[bool] = Field(
760
- True,
761
- description="Whether to flatten lists or leave it as is. Default is True.",
762
- title="Flatten Lists",
763
- )
764
- parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
765
-
766
-
767
822
  class IterableDecoder(BaseModel):
768
823
  type: Literal["IterableDecoder"]
769
824
 
@@ -849,8 +904,8 @@ class OauthConnectorInputSpecification(BaseModel):
849
904
  ...,
850
905
  description="The DeclarativeOAuth Specific string URL string template to initiate the authentication.\nThe placeholders are replaced during the processing to provide neccessary values.",
851
906
  examples=[
852
- "https://domain.host.com/marketing_api/auth?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{state_key}={{state_key}}",
853
- "https://endpoint.host.com/oauth2/authorize?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{scope_key}={urlEncoder:{{scope_key}}}&{state_key}={{state_key}}&subdomain={subdomain}",
907
+ "https://domain.host.com/marketing_api/auth?{{client_id_key}}={{client_id_value}}&{{redirect_uri_key}}={{{{redirect_uri_value}} | urlEncoder}}&{{state_key}}={{state_value}}",
908
+ "https://endpoint.host.com/oauth2/authorize?{{client_id_key}}={{client_id_value}}&{{redirect_uri_key}}={{{{redirect_uri_value}} | urlEncoder}}&{{scope_key}}={{{{scope_value}} | urlEncoder}}&{{state_key}}={{state_value}}&subdomain={{subdomain}}",
854
909
  ],
855
910
  title="Consent URL",
856
911
  )
@@ -864,14 +919,18 @@ class OauthConnectorInputSpecification(BaseModel):
864
919
  ...,
865
920
  description="The DeclarativeOAuth Specific URL templated string to obtain the `access_token`, `refresh_token` etc.\nThe placeholders are replaced during the processing to provide neccessary values.",
866
921
  examples=[
867
- "https://auth.host.com/oauth2/token?{client_id_key}={{client_id_key}}&{client_secret_key}={{client_secret_key}}&{auth_code_key}={{auth_code_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}"
922
+ "https://auth.host.com/oauth2/token?{{client_id_key}}={{client_id_value}}&{{client_secret_key}}={{client_secret_value}}&{{auth_code_key}}={{auth_code_value}}&{{redirect_uri_key}}={{{{redirect_uri_value}} | urlEncoder}}"
868
923
  ],
869
924
  title="Access Token URL",
870
925
  )
871
926
  access_token_headers: Optional[Dict[str, Any]] = Field(
872
927
  None,
873
928
  description="The DeclarativeOAuth Specific optional headers to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.",
874
- examples=[{"Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}"}],
929
+ examples=[
930
+ {
931
+ "Authorization": "Basic {{ {{ client_id_value }}:{{ client_secret_value }} | base64Encoder }}"
932
+ }
933
+ ],
875
934
  title="Access Token Headers",
876
935
  )
877
936
  access_token_params: Optional[Dict[str, Any]] = Field(
@@ -879,15 +938,15 @@ class OauthConnectorInputSpecification(BaseModel):
879
938
  description="The DeclarativeOAuth Specific optional query parameters to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.\nWhen this property is provided, the query params will be encoded as `Json` and included in the outgoing API request.",
880
939
  examples=[
881
940
  {
882
- "{auth_code_key}": "{{auth_code_key}}",
883
- "{client_id_key}": "{{client_id_key}}",
884
- "{client_secret_key}": "{{client_secret_key}}",
941
+ "{{ auth_code_key }}": "{{ auth_code_value }}",
942
+ "{{ client_id_key }}": "{{ client_id_value }}",
943
+ "{{ client_secret_key }}": "{{ client_secret_value }}",
885
944
  }
886
945
  ],
887
946
  title="Access Token Query Params (Json Encoded)",
888
947
  )
889
- extract_output: List[str] = Field(
890
- ...,
948
+ extract_output: Optional[List[str]] = Field(
949
+ None,
891
950
  description="The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config.",
892
951
  examples=[["access_token", "refresh_token", "other_field"]],
893
952
  title="Extract Output",
@@ -956,7 +1015,7 @@ class OAuthConfigSpecification(BaseModel):
956
1015
  )
957
1016
  oauth_connector_input_specification: Optional[OauthConnectorInputSpecification] = Field(
958
1017
  None,
959
- description='The DeclarativeOAuth specific blob.\nPertains to the fields defined by the connector relating to the OAuth flow.\n\nInterpolation capabilities:\n- The variables placeholders are declared as `{my_var}`.\n- The nested resolution variables like `{{my_nested_var}}` is allowed as well.\n\n- The allowed interpolation context is:\n + base64Encoder - encode to `base64`, {base64Encoder:{my_var_a}:{my_var_b}}\n + base64Decorer - decode from `base64` encoded string, {base64Decoder:{my_string_variable_or_string_value}}\n + urlEncoder - encode the input string to URL-like format, {urlEncoder:https://test.host.com/endpoint}\n + urlDecorer - decode the input url-encoded string into text format, {urlDecoder:https%3A%2F%2Fairbyte.io}\n + codeChallengeS256 - get the `codeChallenge` encoded value to provide additional data-provider specific authorisation values, {codeChallengeS256:{state_value}}\n\nExamples:\n - The TikTok Marketing DeclarativeOAuth spec:\n {\n "oauth_connector_input_specification": {\n "type": "object",\n "additionalProperties": false,\n "properties": {\n "consent_url": "https://ads.tiktok.com/marketing_api/auth?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{state_key}={{state_key}}",\n "access_token_url": "https://business-api.tiktok.com/open_api/v1.3/oauth2/access_token/",\n "access_token_params": {\n "{auth_code_key}": "{{auth_code_key}}",\n "{client_id_key}": "{{client_id_key}}",\n "{client_secret_key}": "{{client_secret_key}}"\n },\n "access_token_headers": {\n "Content-Type": "application/json",\n "Accept": "application/json"\n },\n "extract_output": ["data.access_token"],\n "client_id_key": "app_id",\n "client_secret_key": "secret",\n "auth_code_key": "auth_code"\n }\n }\n }',
1018
+ description='The DeclarativeOAuth specific blob.\nPertains to the fields defined by the connector relating to the OAuth flow.\n\nInterpolation capabilities:\n- The variables placeholders are declared as `{{my_var}}`.\n- The nested resolution variables like `{{ {{my_nested_var}} }}` is allowed as well.\n\n- The allowed interpolation context is:\n + base64Encoder - encode to `base64`, {{ {{my_var_a}}:{{my_var_b}} | base64Encoder }}\n + base64Decorer - decode from `base64` encoded string, {{ {{my_string_variable_or_string_value}} | base64Decoder }}\n + urlEncoder - encode the input string to URL-like format, {{ https://test.host.com/endpoint | urlEncoder}}\n + urlDecorer - decode the input url-encoded string into text format, {{ urlDecoder:https%3A%2F%2Fairbyte.io | urlDecoder}}\n + codeChallengeS256 - get the `codeChallenge` encoded value to provide additional data-provider specific authorisation values, {{ {{state_value}} | codeChallengeS256 }}\n\nExamples:\n - The TikTok Marketing DeclarativeOAuth spec:\n {\n "oauth_connector_input_specification": {\n "type": "object",\n "additionalProperties": false,\n "properties": {\n "consent_url": "https://ads.tiktok.com/marketing_api/auth?{{client_id_key}}={{client_id_value}}&{{redirect_uri_key}}={{ {{redirect_uri_value}} | urlEncoder}}&{{state_key}}={{state_value}}",\n "access_token_url": "https://business-api.tiktok.com/open_api/v1.3/oauth2/access_token/",\n "access_token_params": {\n "{{ auth_code_key }}": "{{ auth_code_value }}",\n "{{ client_id_key }}": "{{ client_id_value }}",\n "{{ client_secret_key }}": "{{ client_secret_value }}"\n },\n "access_token_headers": {\n "Content-Type": "application/json",\n "Accept": "application/json"\n },\n "extract_output": ["data.access_token"],\n "client_id_key": "app_id",\n "client_secret_key": "secret",\n "auth_code_key": "auth_code"\n }\n }\n }',
960
1019
  title="DeclarativeOAuth Connector Specification",
961
1020
  )
962
1021
  complete_oauth_output_specification: Optional[Dict[str, Any]] = Field(
@@ -1163,6 +1222,11 @@ class LegacySessionTokenAuthenticator(BaseModel):
1163
1222
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1164
1223
 
1165
1224
 
1225
+ class JsonParser(BaseModel):
1226
+ type: Literal["JsonParser"]
1227
+ encoding: Optional[str] = "utf-8"
1228
+
1229
+
1166
1230
  class JsonLineParser(BaseModel):
1167
1231
  type: Literal["JsonLineParser"]
1168
1232
  encoding: Optional[str] = "utf-8"
@@ -1561,7 +1625,7 @@ class RecordSelector(BaseModel):
1561
1625
 
1562
1626
  class GzipParser(BaseModel):
1563
1627
  type: Literal["GzipParser"]
1564
- inner_parser: Union[JsonLineParser, CsvParser]
1628
+ inner_parser: Union[JsonLineParser, CsvParser, JsonParser]
1565
1629
 
1566
1630
 
1567
1631
  class Spec(BaseModel):
@@ -1594,9 +1658,21 @@ class CompositeErrorHandler(BaseModel):
1594
1658
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1595
1659
 
1596
1660
 
1661
+ class ZipfileDecoder(BaseModel):
1662
+ class Config:
1663
+ extra = Extra.allow
1664
+
1665
+ type: Literal["ZipfileDecoder"]
1666
+ parser: Union[GzipParser, JsonParser, JsonLineParser, CsvParser] = Field(
1667
+ ...,
1668
+ description="Parser to parse the decompressed data from the zipfile(s).",
1669
+ title="Parser",
1670
+ )
1671
+
1672
+
1597
1673
  class CompositeRawDecoder(BaseModel):
1598
1674
  type: Literal["CompositeRawDecoder"]
1599
- parser: Union[GzipParser, JsonLineParser, CsvParser]
1675
+ parser: Union[GzipParser, JsonParser, JsonLineParser, CsvParser]
1600
1676
 
1601
1677
 
1602
1678
  class DeclarativeSource1(BaseModel):
@@ -1604,7 +1680,7 @@ class DeclarativeSource1(BaseModel):
1604
1680
  extra = Extra.forbid
1605
1681
 
1606
1682
  type: Literal["DeclarativeSource"]
1607
- check: CheckStream
1683
+ check: Union[CheckStream, CheckDynamicStream]
1608
1684
  streams: List[DeclarativeStream]
1609
1685
  dynamic_streams: Optional[List[DynamicDeclarativeStream]] = None
1610
1686
  version: str = Field(
@@ -1630,7 +1706,7 @@ class DeclarativeSource2(BaseModel):
1630
1706
  extra = Extra.forbid
1631
1707
 
1632
1708
  type: Literal["DeclarativeSource"]
1633
- check: CheckStream
1709
+ check: Union[CheckStream, CheckDynamicStream]
1634
1710
  streams: Optional[List[DeclarativeStream]] = None
1635
1711
  dynamic_streams: List[DynamicDeclarativeStream]
1636
1712
  version: str = Field(
@@ -1799,7 +1875,7 @@ class SessionTokenAuthenticator(BaseModel):
1799
1875
  description="Authentication method to use for requests sent to the API, specifying how to inject the session token.",
1800
1876
  title="Data Request Authentication",
1801
1877
  )
1802
- decoder: Optional[Union[JsonDecoder, XmlDecoder]] = Field(
1878
+ decoder: Optional[Union[JsonDecoder, XmlDecoder, CompositeRawDecoder]] = Field(
1803
1879
  None, description="Component used to decode the response.", title="Decoder"
1804
1880
  )
1805
1881
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
@@ -2004,6 +2080,7 @@ class SimpleRetriever(BaseModel):
2004
2080
  XmlDecoder,
2005
2081
  GzipJsonDecoder,
2006
2082
  CompositeRawDecoder,
2083
+ ZipfileDecoder,
2007
2084
  ]
2008
2085
  ] = Field(
2009
2086
  None,
@@ -2040,6 +2117,10 @@ class AsyncRetriever(BaseModel):
2040
2117
  ...,
2041
2118
  description="Requester component that describes how to prepare HTTP requests to send to the source API to fetch the status of the running async job.",
2042
2119
  )
2120
+ url_requester: Optional[Union[CustomRequester, HttpRequester]] = Field(
2121
+ None,
2122
+ description="Requester component that describes how to prepare HTTP requests to send to the source API to extract the url from polling response by the completed async job.",
2123
+ )
2043
2124
  download_requester: Union[CustomRequester, HttpRequester] = Field(
2044
2125
  ...,
2045
2126
  description="Requester component that describes how to prepare HTTP requests to send to the source API to download the data provided by the completed async job.",
@@ -2076,6 +2157,8 @@ class AsyncRetriever(BaseModel):
2076
2157
  IterableDecoder,
2077
2158
  XmlDecoder,
2078
2159
  GzipJsonDecoder,
2160
+ CompositeRawDecoder,
2161
+ ZipfileDecoder,
2079
2162
  ]
2080
2163
  ] = Field(
2081
2164
  None,
@@ -2090,6 +2173,8 @@ class AsyncRetriever(BaseModel):
2090
2173
  IterableDecoder,
2091
2174
  XmlDecoder,
2092
2175
  GzipJsonDecoder,
2176
+ CompositeRawDecoder,
2177
+ ZipfileDecoder,
2093
2178
  ]
2094
2179
  ] = Field(
2095
2180
  None,