airbyte-cdk 6.7.1rc4__py3-none-any.whl → 6.7.2.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (124) hide show
  1. airbyte_cdk/cli/source_declarative_manifest/_run.py +2 -1
  2. airbyte_cdk/config_observation.py +2 -1
  3. airbyte_cdk/connector.py +1 -0
  4. airbyte_cdk/connector_builder/connector_builder_handler.py +1 -1
  5. airbyte_cdk/connector_builder/main.py +2 -1
  6. airbyte_cdk/destinations/destination.py +2 -1
  7. airbyte_cdk/destinations/vector_db_based/config.py +2 -1
  8. airbyte_cdk/destinations/vector_db_based/document_processor.py +4 -3
  9. airbyte_cdk/destinations/vector_db_based/embedder.py +5 -4
  10. airbyte_cdk/entrypoint.py +3 -2
  11. airbyte_cdk/logger.py +2 -1
  12. airbyte_cdk/models/__init__.py +2 -0
  13. airbyte_cdk/models/airbyte_protocol.py +2 -1
  14. airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +3 -3
  15. airbyte_cdk/sources/concurrent_source/concurrent_source.py +1 -1
  16. airbyte_cdk/sources/config.py +2 -1
  17. airbyte_cdk/sources/declarative/auth/jwt.py +1 -0
  18. airbyte_cdk/sources/declarative/auth/oauth.py +1 -0
  19. airbyte_cdk/sources/declarative/auth/selective_authenticator.py +1 -0
  20. airbyte_cdk/sources/declarative/auth/token.py +2 -1
  21. airbyte_cdk/sources/declarative/auth/token_provider.py +3 -2
  22. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +66 -8
  23. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +196 -0
  24. airbyte_cdk/sources/declarative/decoders/json_decoder.py +3 -2
  25. airbyte_cdk/sources/declarative/decoders/noop_decoder.py +1 -0
  26. airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +1 -0
  27. airbyte_cdk/sources/declarative/decoders/xml_decoder.py +1 -0
  28. airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +1 -0
  29. airbyte_cdk/sources/declarative/extractors/http_selector.py +1 -0
  30. airbyte_cdk/sources/declarative/extractors/record_filter.py +6 -48
  31. airbyte_cdk/sources/declarative/extractors/record_selector.py +32 -4
  32. airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +7 -2
  33. airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +2 -1
  34. airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +5 -2
  35. airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +5 -2
  36. airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +1 -3
  37. airbyte_cdk/sources/declarative/interpolation/jinja.py +5 -4
  38. airbyte_cdk/sources/declarative/manifest_declarative_source.py +4 -3
  39. airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +1 -1
  40. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +144 -0
  41. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +45 -4
  42. airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +1 -0
  43. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +1 -0
  44. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/exponential_backoff_strategy.py +1 -0
  45. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +1 -0
  46. airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +1 -0
  47. airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +1 -0
  48. airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +1 -0
  49. airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +1 -0
  50. airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +1 -0
  51. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +3 -2
  52. airbyte_cdk/sources/declarative/requesters/http_requester.py +1 -0
  53. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +1 -0
  54. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +1 -0
  55. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +1 -0
  56. airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +1 -0
  57. airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +1 -0
  58. airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +1 -0
  59. airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +1 -0
  60. airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +9 -3
  61. airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +2 -1
  62. airbyte_cdk/sources/declarative/requesters/requester.py +1 -0
  63. airbyte_cdk/sources/declarative/retrievers/async_retriever.py +2 -1
  64. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +12 -7
  65. airbyte_cdk/sources/declarative/stream_slicers/declarative_partition_generator.py +7 -4
  66. airbyte_cdk/sources/declarative/transformations/add_fields.py +1 -0
  67. airbyte_cdk/sources/declarative/transformations/remove_fields.py +1 -0
  68. airbyte_cdk/sources/declarative/yaml_declarative_source.py +1 -0
  69. airbyte_cdk/sources/embedded/tools.py +1 -0
  70. airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +2 -1
  71. airbyte_cdk/sources/file_based/config/avro_format.py +2 -1
  72. airbyte_cdk/sources/file_based/config/csv_format.py +2 -1
  73. airbyte_cdk/sources/file_based/config/excel_format.py +2 -1
  74. airbyte_cdk/sources/file_based/config/file_based_stream_config.py +2 -1
  75. airbyte_cdk/sources/file_based/config/jsonl_format.py +2 -1
  76. airbyte_cdk/sources/file_based/config/parquet_format.py +2 -1
  77. airbyte_cdk/sources/file_based/config/unstructured_format.py +2 -1
  78. airbyte_cdk/sources/file_based/file_based_source.py +2 -1
  79. airbyte_cdk/sources/file_based/file_based_stream_reader.py +2 -1
  80. airbyte_cdk/sources/file_based/file_types/avro_parser.py +1 -0
  81. airbyte_cdk/sources/file_based/file_types/csv_parser.py +2 -1
  82. airbyte_cdk/sources/file_based/file_types/excel_parser.py +5 -5
  83. airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +2 -1
  84. airbyte_cdk/sources/file_based/file_types/parquet_parser.py +2 -1
  85. airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +9 -8
  86. airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +2 -1
  87. airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +5 -4
  88. airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +1 -1
  89. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +1 -1
  90. airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +1 -1
  91. airbyte_cdk/sources/http_logger.py +1 -0
  92. airbyte_cdk/sources/streams/call_rate.py +1 -2
  93. airbyte_cdk/sources/streams/concurrent/abstract_stream.py +2 -1
  94. airbyte_cdk/sources/streams/concurrent/adapters.py +8 -4
  95. airbyte_cdk/sources/streams/concurrent/availability_strategy.py +2 -1
  96. airbyte_cdk/sources/streams/concurrent/cursor.py +52 -9
  97. airbyte_cdk/sources/streams/concurrent/default_stream.py +1 -0
  98. airbyte_cdk/sources/streams/concurrent/partitions/partition.py +1 -1
  99. airbyte_cdk/sources/streams/concurrent/partitions/types.py +1 -1
  100. airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +1 -1
  101. airbyte_cdk/sources/streams/core.py +2 -1
  102. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +2 -1
  103. airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +1 -0
  104. airbyte_cdk/sources/streams/http/error_handlers/json_error_message_parser.py +1 -0
  105. airbyte_cdk/sources/streams/http/error_handlers/response_models.py +2 -1
  106. airbyte_cdk/sources/streams/http/http.py +3 -2
  107. airbyte_cdk/sources/streams/http/http_client.py +58 -11
  108. airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +2 -1
  109. airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +1 -0
  110. airbyte_cdk/sources/types.py +14 -1
  111. airbyte_cdk/sources/utils/schema_helpers.py +3 -2
  112. airbyte_cdk/sql/secrets.py +2 -1
  113. airbyte_cdk/sql/shared/sql_processor.py +8 -6
  114. airbyte_cdk/test/entrypoint_wrapper.py +4 -3
  115. airbyte_cdk/test/mock_http/mocker.py +1 -0
  116. airbyte_cdk/utils/schema_inferrer.py +2 -1
  117. airbyte_cdk/utils/slice_hasher.py +1 -1
  118. airbyte_cdk/utils/traced_exception.py +2 -1
  119. {airbyte_cdk-6.7.1rc4.dist-info → airbyte_cdk-6.7.2.dev0.dist-info}/METADATA +9 -2
  120. {airbyte_cdk-6.7.1rc4.dist-info → airbyte_cdk-6.7.2.dev0.dist-info}/RECORD +123 -124
  121. airbyte_cdk/sources/streams/concurrent/partitions/record.py +0 -35
  122. {airbyte_cdk-6.7.1rc4.dist-info → airbyte_cdk-6.7.2.dev0.dist-info}/LICENSE.txt +0 -0
  123. {airbyte_cdk-6.7.1rc4.dist-info → airbyte_cdk-6.7.2.dev0.dist-info}/WHEEL +0 -0
  124. {airbyte_cdk-6.7.1rc4.dist-info → airbyte_cdk-6.7.2.dev0.dist-info}/entry_points.txt +0 -0
@@ -567,6 +567,11 @@ class DpathExtractor(BaseModel):
567
567
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
568
568
 
569
569
 
570
+ class ResponseToFileExtractor(BaseModel):
571
+ type: Literal["ResponseToFileExtractor"]
572
+ parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
573
+
574
+
570
575
  class ExponentialBackoffStrategy(BaseModel):
571
576
  type: Literal["ExponentialBackoffStrategy"]
572
577
  factor: Optional[Union[float, str]] = Field(
@@ -748,6 +753,123 @@ class NoPagination(BaseModel):
748
753
  type: Literal["NoPagination"]
749
754
 
750
755
 
756
+ class State(BaseModel):
757
+ class Config:
758
+ extra = Extra.allow
759
+
760
+ min: int
761
+ max: int
762
+
763
+
764
+ class OauthConnectorInputSpecification(BaseModel):
765
+ class Config:
766
+ extra = Extra.allow
767
+
768
+ consent_url: str = Field(
769
+ ...,
770
+ description="The DeclarativeOAuth Specific string URL string template to initiate the authentication.\nThe placeholders are replaced during the processing to provide neccessary values.",
771
+ examples=[
772
+ {
773
+ "consent_url": "https://domain.host.com/marketing_api/auth?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{state_key}={{state_key}}"
774
+ },
775
+ {
776
+ "consent_url": "https://endpoint.host.com/oauth2/authorize?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{scope_key}={urlEncoder:{{scope_key}}}&{state_key}={{state_key}}&subdomain={subdomain}"
777
+ },
778
+ ],
779
+ title="DeclarativeOAuth Consent URL",
780
+ )
781
+ scope: Optional[str] = Field(
782
+ None,
783
+ description="The DeclarativeOAuth Specific string of the scopes needed to be grant for authenticated user.",
784
+ examples=[{"scope": "user:read user:read_orders workspaces:read"}],
785
+ title="(Optional) DeclarativeOAuth Scope",
786
+ )
787
+ access_token_url: str = Field(
788
+ ...,
789
+ description="The DeclarativeOAuth Specific URL templated string to obtain the `access_token`, `refresh_token` etc.\nThe placeholders are replaced during the processing to provide neccessary values.",
790
+ examples=[
791
+ {
792
+ "access_token_url": "https://auth.host.com/oauth2/token?{client_id_key}={{client_id_key}}&{client_secret_key}={{client_secret_key}}&{auth_code_key}={{auth_code_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}"
793
+ }
794
+ ],
795
+ title="DeclarativeOAuth Access Token URL",
796
+ )
797
+ access_token_headers: Optional[Dict[str, Any]] = Field(
798
+ None,
799
+ description="The DeclarativeOAuth Specific optional headers to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.",
800
+ examples=[
801
+ {
802
+ "access_token_headers": {
803
+ "Authorization": "Basic {base64Encoder:{client_id}:{client_secret}}"
804
+ }
805
+ }
806
+ ],
807
+ title="(Optional) DeclarativeOAuth Access Token Headers",
808
+ )
809
+ access_token_params: Optional[Dict[str, Any]] = Field(
810
+ None,
811
+ description="The DeclarativeOAuth Specific optional query parameters to inject while exchanging the `auth_code` to `access_token` during `completeOAuthFlow` step.\nWhen this property is provided, the query params will be encoded as `Json` and included in the outgoing API request.",
812
+ examples=[
813
+ {
814
+ "access_token_params": {
815
+ "{auth_code_key}": "{{auth_code_key}}",
816
+ "{client_id_key}": "{{client_id_key}}",
817
+ "{client_secret_key}": "{{client_secret_key}}",
818
+ }
819
+ }
820
+ ],
821
+ title="(Optional) DeclarativeOAuth Access Token Query Params (Json Encoded)",
822
+ )
823
+ extract_output: List[str] = Field(
824
+ ...,
825
+ description="The DeclarativeOAuth Specific list of strings to indicate which keys should be extracted and returned back to the input config. ",
826
+ examples=[{"extract_output": ["access_token", "refresh_token", "other_field"]}],
827
+ title="DeclarativeOAuth Extract Output",
828
+ )
829
+ state: Optional[State] = Field(
830
+ None,
831
+ description="The DeclarativeOAuth Specific object to provide the criteria of how the `state` query param should be constructed,\nincluding length and complexity. ",
832
+ examples=[{"state": {"min": 7, "max": 128}}],
833
+ title="(Optional) DeclarativeOAuth Configurable State Query Param",
834
+ )
835
+ client_id_key: Optional[str] = Field(
836
+ None,
837
+ description="The DeclarativeOAuth Specific optional override to provide the custom `client_id` key name, if required by data-provider.",
838
+ examples=[{"client_id_key": "my_custom_client_id_key_name"}],
839
+ title="(Optional) DeclarativeOAuth Client ID Key Override",
840
+ )
841
+ client_secret_key: Optional[str] = Field(
842
+ None,
843
+ description="The DeclarativeOAuth Specific optional override to provide the custom `client_secret` key name, if required by data-provider.",
844
+ examples=[{"client_secret_key": "my_custom_client_secret_key_name"}],
845
+ title="(Optional) DeclarativeOAuth Client Secret Key Override",
846
+ )
847
+ scope_key: Optional[str] = Field(
848
+ None,
849
+ description="The DeclarativeOAuth Specific optional override to provide the custom `scope` key name, if required by data-provider.",
850
+ examples=[{"scope_key": "my_custom_scope_key_key_name"}],
851
+ title="(Optional) DeclarativeOAuth Scope Key Override",
852
+ )
853
+ state_key: Optional[str] = Field(
854
+ None,
855
+ description="The DeclarativeOAuth Specific optional override to provide the custom `state` key name, if required by data-provider. ",
856
+ examples=[{"state_key": "my_custom_state_key_key_name"}],
857
+ title="(Optional) DeclarativeOAuth State Key Override",
858
+ )
859
+ auth_code_key: Optional[str] = Field(
860
+ None,
861
+ description="The DeclarativeOAuth Specific optional override to provide the custom `code` key name to something like `auth_code` or `custom_auth_code`, if required by data-provider. ",
862
+ examples=[{"auth_code_key": "my_custom_auth_code_key_name"}],
863
+ title="(Optional) DeclarativeOAuth Auth Code Key Override",
864
+ )
865
+ redirect_uri_key: Optional[str] = Field(
866
+ None,
867
+ description="The DeclarativeOAuth Specific optional override to provide the custom `redirect_uri` key name to something like `callback_uri`, if required by data-provider.",
868
+ examples=[{"redirect_uri_key": "my_custom_redirect_uri_key_name"}],
869
+ title="(Optional) DeclarativeOAuth Redirect URI Key Override",
870
+ )
871
+
872
+
751
873
  class OAuthConfigSpecification(BaseModel):
752
874
  class Config:
753
875
  extra = Extra.allow
@@ -766,6 +888,11 @@ class OAuthConfigSpecification(BaseModel):
766
888
  ],
767
889
  title="OAuth user input",
768
890
  )
891
+ oauth_connector_input_specification: Optional[OauthConnectorInputSpecification] = Field(
892
+ None,
893
+ description='The DeclarativeOAuth specific blob.\nPertains to the fields defined by the connector relating to the OAuth flow.\n\nInterpolation capabilities:\n- The variables placeholders are declared as `{my_var}`.\n- The nested resolution variables like `{{my_nested_var}}` is allowed as well.\n\n- The allowed interpolation context is:\n + base64Encoder - encode to `base64`, {base64Encoder:{my_var_a}:{my_var_b}}\n + base64Decorer - decode from `base64` encoded string, {base64Decoder:{my_string_variable_or_string_value}}\n + urlEncoder - encode the input string to URL-like format, {urlEncoder:https://test.host.com/endpoint}\n + urlDecorer - decode the input url-encoded string into text format, {urlDecoder:https%3A%2F%2Fairbyte.io}\n + codeChallengeS256 - get the `codeChallenge` encoded value to provide additional data-provider specific authorisation values, {codeChallengeS256:{state_value}}\n\nExamples:\n - The TikTok Marketing DeclarativeOAuth spec:\n {\n "oauth_connector_input_specification": {\n "type": "object",\n "additionalProperties": false,\n "properties": {\n "consent_url": "https://ads.tiktok.com/marketing_api/auth?{client_id_key}={{client_id_key}}&{redirect_uri_key}={urlEncoder:{{redirect_uri_key}}}&{state_key}={{state_key}}",\n "access_token_url": "https://business-api.tiktok.com/open_api/v1.3/oauth2/access_token/",\n "access_token_params": {\n "{auth_code_key}": "{{auth_code_key}}",\n "{client_id_key}": "{{client_id_key}}",\n "{client_secret_key}": "{{client_secret_key}}"\n },\n "access_token_headers": {\n "Content-Type": "application/json",\n "Accept": "application/json"\n },\n "extract_output": ["data.access_token"],\n "client_id_key": "app_id",\n "client_secret_key": "secret",\n "auth_code_key": "auth_code"\n }\n }\n }',
894
+ title="DeclarativeOAuth Connector Specification",
895
+ )
769
896
  complete_oauth_output_specification: Optional[Dict[str, Any]] = Field(
770
897
  None,
771
898
  description="OAuth specific blob. This is a Json Schema used to validate Json configurations produced by the OAuth flows as they are\nreturned by the distant OAuth APIs.\nMust be a valid JSON describing the fields to merge back to `ConnectorSpecification.connectionSpecification`.\nFor each field, a special annotation `path_in_connector_config` can be specified to determine where to merge it,\nExamples:\n complete_oauth_output_specification={\n refresh_token: {\n type: string,\n path_in_connector_config: ['credentials', 'refresh_token']\n }\n }",
@@ -1676,6 +1803,9 @@ class AsyncRetriever(BaseModel):
1676
1803
  ...,
1677
1804
  description="Responsible for fetching the final result `urls` provided by the completed / finished / ready async job.",
1678
1805
  )
1806
+ download_extractor: Optional[
1807
+ Union[CustomRecordExtractor, DpathExtractor, ResponseToFileExtractor]
1808
+ ] = Field(None, description="Responsible for fetching the records from provided urls.")
1679
1809
  creation_requester: Union[CustomRequester, HttpRequester] = Field(
1680
1810
  ...,
1681
1811
  description="Requester component that describes how to prepare HTTP requests to send to the source API to create the async server-side job.",
@@ -1726,6 +1856,20 @@ class AsyncRetriever(BaseModel):
1726
1856
  description="Component decoding the response so records can be extracted.",
1727
1857
  title="Decoder",
1728
1858
  )
1859
+ download_decoder: Optional[
1860
+ Union[
1861
+ CustomDecoder,
1862
+ JsonDecoder,
1863
+ JsonlDecoder,
1864
+ IterableDecoder,
1865
+ XmlDecoder,
1866
+ GzipJsonDecoder,
1867
+ ]
1868
+ ] = Field(
1869
+ None,
1870
+ description="Component decoding the download response so records can be extracted.",
1871
+ title="Download Decoder",
1872
+ )
1729
1873
  parameters: Optional[Dict[str, Any]] = Field(None, alias="$parameters")
1730
1874
 
1731
1875
 
@@ -25,6 +25,9 @@ from typing import (
25
25
  get_type_hints,
26
26
  )
27
27
 
28
+ from isodate import parse_duration
29
+ from pydantic.v1 import BaseModel
30
+
28
31
  from airbyte_cdk.models import FailureType, Level
29
32
  from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
30
33
  from airbyte_cdk.sources.declarative.async_job.job_orchestrator import AsyncJobOrchestrator
@@ -267,6 +270,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
267
270
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
268
271
  RequestPath as RequestPathModel,
269
272
  )
273
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
274
+ ResponseToFileExtractor as ResponseToFileExtractorModel,
275
+ )
270
276
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
271
277
  SelectiveAuthenticator as SelectiveAuthenticatorModel,
272
278
  )
@@ -367,8 +373,6 @@ from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_sta
367
373
  from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction
368
374
  from airbyte_cdk.sources.types import Config
369
375
  from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
370
- from isodate import parse_duration
371
- from pydantic.v1 import BaseModel
372
376
 
373
377
  ComponentDefinition = Mapping[str, Any]
374
378
 
@@ -383,6 +387,7 @@ class ModelToComponentFactory:
383
387
  emit_connector_builder_messages: bool = False,
384
388
  disable_retries: bool = False,
385
389
  disable_cache: bool = False,
390
+ disable_resumable_full_refresh: bool = False,
386
391
  message_repository: Optional[MessageRepository] = None,
387
392
  ):
388
393
  self._init_mappings()
@@ -391,6 +396,7 @@ class ModelToComponentFactory:
391
396
  self._emit_connector_builder_messages = emit_connector_builder_messages
392
397
  self._disable_retries = disable_retries
393
398
  self._disable_cache = disable_cache
399
+ self._disable_resumable_full_refresh = disable_resumable_full_refresh
394
400
  self._message_repository = message_repository or InMemoryMessageRepository( # type: ignore
395
401
  self._evaluate_log_level(emit_connector_builder_messages)
396
402
  )
@@ -426,6 +432,7 @@ class ModelToComponentFactory:
426
432
  DefaultErrorHandlerModel: self.create_default_error_handler,
427
433
  DefaultPaginatorModel: self.create_default_paginator,
428
434
  DpathExtractorModel: self.create_dpath_extractor,
435
+ ResponseToFileExtractorModel: self.create_response_to_file_extractor,
429
436
  ExponentialBackoffStrategyModel: self.create_exponential_backoff_strategy,
430
437
  SessionTokenAuthenticatorModel: self.create_session_token_authenticator,
431
438
  HttpRequesterModel: self.create_http_requester,
@@ -1334,6 +1341,8 @@ class ModelToComponentFactory:
1334
1341
  if model.incremental_sync
1335
1342
  else None
1336
1343
  )
1344
+ elif self._disable_resumable_full_refresh:
1345
+ return stream_slicer
1337
1346
  elif stream_slicer:
1338
1347
  # For the Full-Refresh sub-streams, we use the nested `ChildPartitionResumableFullRefreshCursor`
1339
1348
  return PerPartitionCursor(
@@ -1446,6 +1455,13 @@ class ModelToComponentFactory:
1446
1455
  parameters=model.parameters or {},
1447
1456
  )
1448
1457
 
1458
+ def create_response_to_file_extractor(
1459
+ self,
1460
+ model: ResponseToFileExtractorModel,
1461
+ **kwargs: Any,
1462
+ ) -> ResponseToFileExtractor:
1463
+ return ResponseToFileExtractor(parameters=model.parameters or {})
1464
+
1449
1465
  @staticmethod
1450
1466
  def create_exponential_backoff_strategy(
1451
1467
  model: ExponentialBackoffStrategyModel, config: Config
@@ -1780,6 +1796,7 @@ class ModelToComponentFactory:
1780
1796
  self,
1781
1797
  model: RecordSelectorModel,
1782
1798
  config: Config,
1799
+ name: str,
1783
1800
  *,
1784
1801
  transformations: List[RecordTransformation],
1785
1802
  decoder: Optional[Decoder] = None,
@@ -1810,6 +1827,7 @@ class ModelToComponentFactory:
1810
1827
 
1811
1828
  return RecordSelector(
1812
1829
  extractor=extractor,
1830
+ name=name,
1813
1831
  config=config,
1814
1832
  record_filter=record_filter,
1815
1833
  transformations=transformations,
@@ -1880,6 +1898,7 @@ class ModelToComponentFactory:
1880
1898
  )
1881
1899
  record_selector = self._create_component_from_model(
1882
1900
  model=model.record_selector,
1901
+ name=name,
1883
1902
  config=config,
1884
1903
  decoder=decoder,
1885
1904
  transformations=transformations,
@@ -2007,6 +2026,7 @@ class ModelToComponentFactory:
2007
2026
  model=model.record_selector,
2008
2027
  config=config,
2009
2028
  decoder=decoder,
2029
+ name=name,
2010
2030
  transformations=transformations,
2011
2031
  client_side_incremental_sync=client_side_incremental_sync,
2012
2032
  )
@@ -2024,16 +2044,37 @@ class ModelToComponentFactory:
2024
2044
  name=f"job polling - {name}",
2025
2045
  )
2026
2046
  job_download_components_name = f"job download - {name}"
2047
+ download_decoder = (
2048
+ self._create_component_from_model(model=model.download_decoder, config=config)
2049
+ if model.download_decoder
2050
+ else JsonDecoder(parameters={})
2051
+ )
2052
+ download_extractor = (
2053
+ self._create_component_from_model(
2054
+ model=model.download_extractor,
2055
+ config=config,
2056
+ decoder=download_decoder,
2057
+ parameters=model.parameters,
2058
+ )
2059
+ if model.download_extractor
2060
+ else DpathExtractor(
2061
+ [],
2062
+ config=config,
2063
+ decoder=download_decoder,
2064
+ parameters=model.parameters or {},
2065
+ )
2066
+ )
2027
2067
  download_requester = self._create_component_from_model(
2028
2068
  model=model.download_requester,
2029
- decoder=decoder,
2069
+ decoder=download_decoder,
2030
2070
  config=config,
2031
2071
  name=job_download_components_name,
2032
2072
  )
2033
2073
  download_retriever = SimpleRetriever(
2034
2074
  requester=download_requester,
2035
2075
  record_selector=RecordSelector(
2036
- extractor=ResponseToFileExtractor(),
2076
+ extractor=download_extractor,
2077
+ name=name,
2037
2078
  record_filter=None,
2038
2079
  transformations=[],
2039
2080
  schema_normalization=TypeTransformer(TransformConfig.NoTransform),
@@ -7,6 +7,7 @@ from dataclasses import InitVar, dataclass
7
7
  from typing import TYPE_CHECKING, Any, Iterable, List, Mapping, Optional, Union
8
8
 
9
9
  import dpath
10
+
10
11
  from airbyte_cdk.models import AirbyteMessage
11
12
  from airbyte_cdk.models import Type as MessageType
12
13
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
@@ -6,6 +6,7 @@ from dataclasses import InitVar, dataclass
6
6
  from typing import Any, Mapping, Optional, Union
7
7
 
8
8
  import requests
9
+
9
10
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
10
11
  from airbyte_cdk.sources.streams.http.error_handlers import BackoffStrategy
11
12
  from airbyte_cdk.sources.types import Config
@@ -6,6 +6,7 @@ from dataclasses import InitVar, dataclass
6
6
  from typing import Any, Mapping, Optional, Union
7
7
 
8
8
  import requests
9
+
9
10
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
10
11
  from airbyte_cdk.sources.streams.http.error_handlers import BackoffStrategy
11
12
  from airbyte_cdk.sources.types import Config
@@ -7,6 +7,7 @@ from dataclasses import InitVar, dataclass
7
7
  from typing import Any, Mapping, Optional, Union
8
8
 
9
9
  import requests
10
+
10
11
  from airbyte_cdk.models import FailureType
11
12
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
12
13
  from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies.header_helper import (
@@ -9,6 +9,7 @@ from dataclasses import InitVar, dataclass
9
9
  from typing import Any, Mapping, Optional, Union
10
10
 
11
11
  import requests
12
+
12
13
  from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
13
14
  from airbyte_cdk.sources.declarative.requesters.error_handlers.backoff_strategies.header_helper import (
14
15
  get_numeric_value_from_header,
@@ -6,6 +6,7 @@ from dataclasses import InitVar, dataclass
6
6
  from typing import Any, List, Mapping, Optional, Union
7
7
 
8
8
  import requests
9
+
9
10
  from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler
10
11
  from airbyte_cdk.sources.streams.http.error_handlers.response_models import (
11
12
  ErrorResolution,
@@ -6,6 +6,7 @@ from dataclasses import InitVar, dataclass, field
6
6
  from typing import Any, List, Mapping, MutableMapping, Optional, Union
7
7
 
8
8
  import requests
9
+
9
10
  from airbyte_cdk.sources.declarative.requesters.error_handlers.default_http_response_filter import (
10
11
  DefaultHttpResponseFilter,
11
12
  )
@@ -5,6 +5,7 @@
5
5
  from typing import Optional, Union
6
6
 
7
7
  import requests
8
+
8
9
  from airbyte_cdk.sources.declarative.requesters.error_handlers.http_response_filter import (
9
10
  HttpResponseFilter,
10
11
  )
@@ -6,6 +6,7 @@ from dataclasses import InitVar, dataclass
6
6
  from typing import Any, Mapping, Optional, Set, Union
7
7
 
8
8
  import requests
9
+
9
10
  from airbyte_cdk.models import FailureType
10
11
  from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
11
12
  from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
@@ -6,6 +6,8 @@ from datetime import timedelta
6
6
  from typing import Any, Dict, Iterable, Mapping, Optional
7
7
 
8
8
  import requests
9
+ from requests import Response
10
+
9
11
  from airbyte_cdk import AirbyteMessage
10
12
  from airbyte_cdk.logger import lazy_log
11
13
  from airbyte_cdk.models import FailureType, Type
@@ -23,7 +25,6 @@ from airbyte_cdk.sources.declarative.requesters.requester import Requester
23
25
  from airbyte_cdk.sources.declarative.retrievers.simple_retriever import SimpleRetriever
24
26
  from airbyte_cdk.sources.types import Record, StreamSlice
25
27
  from airbyte_cdk.utils import AirbyteTracedException
26
- from requests import Response
27
28
 
28
29
  LOGGER = logging.getLogger("airbyte")
29
30
 
@@ -41,7 +42,7 @@ class AsyncHttpJobRepository(AsyncJobRepository):
41
42
 
42
43
  job_timeout: Optional[timedelta] = None
43
44
  record_extractor: RecordExtractor = field(
44
- init=False, repr=False, default_factory=lambda: ResponseToFileExtractor()
45
+ init=False, repr=False, default_factory=lambda: ResponseToFileExtractor({})
45
46
  )
46
47
 
47
48
  def __post_init__(self) -> None:
@@ -9,6 +9,7 @@ from typing import Any, Callable, Mapping, MutableMapping, Optional, Union
9
9
  from urllib.parse import urljoin
10
10
 
11
11
  import requests
12
+
12
13
  from airbyte_cdk.sources.declarative.auth.declarative_authenticator import (
13
14
  DeclarativeAuthenticator,
14
15
  NoAuth,
@@ -6,6 +6,7 @@ from dataclasses import InitVar, dataclass, field
6
6
  from typing import Any, Mapping, MutableMapping, Optional, Union
7
7
 
8
8
  import requests
9
+
9
10
  from airbyte_cdk.sources.declarative.decoders import (
10
11
  Decoder,
11
12
  JsonDecoder,
@@ -6,6 +6,7 @@ from dataclasses import InitVar, dataclass
6
6
  from typing import Any, Mapping, MutableMapping, Optional, Union
7
7
 
8
8
  import requests
9
+
9
10
  from airbyte_cdk.sources.declarative.requesters.paginators.paginator import Paginator
10
11
  from airbyte_cdk.sources.types import Record, StreamSlice, StreamState
11
12
 
@@ -7,6 +7,7 @@ from dataclasses import dataclass
7
7
  from typing import Any, Mapping, Optional
8
8
 
9
9
  import requests
10
+
10
11
  from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import (
11
12
  RequestOptionsProvider,
12
13
  )
@@ -6,6 +6,7 @@ from dataclasses import InitVar, dataclass, field
6
6
  from typing import Any, Dict, Mapping, Optional, Union
7
7
 
8
8
  import requests
9
+
9
10
  from airbyte_cdk.sources.declarative.decoders import (
10
11
  Decoder,
11
12
  JsonDecoder,
@@ -6,6 +6,7 @@ from dataclasses import InitVar, dataclass, field
6
6
  from typing import Any, Mapping, Optional, Union
7
7
 
8
8
  import requests
9
+
9
10
  from airbyte_cdk.sources.declarative.decoders import (
10
11
  Decoder,
11
12
  JsonDecoder,
@@ -6,6 +6,7 @@ from dataclasses import InitVar, dataclass
6
6
  from typing import Any, Mapping, Optional, Union
7
7
 
8
8
  import requests
9
+
9
10
  from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
10
11
  from airbyte_cdk.sources.declarative.requesters.paginators.strategies.pagination_strategy import (
11
12
  PaginationStrategy,
@@ -7,6 +7,7 @@ from dataclasses import dataclass
7
7
  from typing import Any, Optional
8
8
 
9
9
  import requests
10
+
10
11
  from airbyte_cdk.sources.types import Record
11
12
 
12
13
 
@@ -6,10 +6,12 @@ from abc import ABC, abstractmethod
6
6
  from typing import Any, Optional
7
7
 
8
8
  import requests
9
+
9
10
  from airbyte_cdk.sources.declarative.incremental.declarative_cursor import DeclarativeCursor
10
11
  from airbyte_cdk.sources.declarative.requesters.paginators.strategies.pagination_strategy import (
11
12
  PaginationStrategy,
12
13
  )
14
+ from airbyte_cdk.sources.streams.concurrent.cursor import ConcurrentCursor
13
15
  from airbyte_cdk.sources.types import Record
14
16
 
15
17
 
@@ -25,7 +27,11 @@ class PaginationStopCondition(ABC):
25
27
 
26
28
 
27
29
  class CursorStopCondition(PaginationStopCondition):
28
- def __init__(self, cursor: DeclarativeCursor):
30
+ def __init__(
31
+ self,
32
+ cursor: DeclarativeCursor
33
+ | ConcurrentCursor, # migrate to use both old and concurrent versions
34
+ ):
29
35
  self._cursor = cursor
30
36
 
31
37
  def is_met(self, record: Record) -> bool:
@@ -46,8 +52,8 @@ class StopConditionPaginationStrategyDecorator(PaginationStrategy):
46
52
  return None
47
53
  return self._delegate.next_page_token(response, last_page_size, last_record)
48
54
 
49
- def reset(self) -> None:
50
- self._delegate.reset()
55
+ def reset(self, reset_value: Optional[Any] = None) -> None:
56
+ self._delegate.reset(reset_value)
51
57
 
52
58
  def get_page_size(self) -> Optional[int]:
53
59
  return self._delegate.get_page_size()
@@ -5,6 +5,8 @@
5
5
  from dataclasses import InitVar, dataclass, field
6
6
  from typing import Any, Mapping, MutableMapping, Optional, Union
7
7
 
8
+ from deprecated import deprecated
9
+
8
10
  from airbyte_cdk.sources.declarative.interpolation.interpolated_nested_mapping import NestedMapping
9
11
  from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_nested_request_input_provider import (
10
12
  InterpolatedNestedRequestInputProvider,
@@ -17,7 +19,6 @@ from airbyte_cdk.sources.declarative.requesters.request_options.request_options_
17
19
  )
18
20
  from airbyte_cdk.sources.source import ExperimentalClassWarning
19
21
  from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
20
- from deprecated import deprecated
21
22
 
22
23
  RequestInput = Union[str, Mapping[str, str]]
23
24
  ValidRequestTypes = (str, list)
@@ -7,6 +7,7 @@ from enum import Enum
7
7
  from typing import Any, Callable, Mapping, MutableMapping, Optional, Union
8
8
 
9
9
  import requests
10
+
10
11
  from airbyte_cdk.sources.declarative.auth.declarative_authenticator import DeclarativeAuthenticator
11
12
  from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import (
12
13
  RequestOptionsProvider,
@@ -4,6 +4,8 @@
4
4
  from dataclasses import InitVar, dataclass, field
5
5
  from typing import Any, Callable, Iterable, Mapping, Optional
6
6
 
7
+ from deprecated.classic import deprecated
8
+
7
9
  from airbyte_cdk.models import FailureType
8
10
  from airbyte_cdk.sources.declarative.async_job.job_orchestrator import (
9
11
  AsyncJobOrchestrator,
@@ -17,7 +19,6 @@ from airbyte_cdk.sources.source import ExperimentalClassWarning
17
19
  from airbyte_cdk.sources.streams.core import StreamData
18
20
  from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
19
21
  from airbyte_cdk.utils.traced_exception import AirbyteTracedException
20
- from deprecated.classic import deprecated
21
22
 
22
23
 
23
24
  @deprecated("This class is experimental. Use at your own risk.", category=ExperimentalClassWarning)
@@ -20,6 +20,7 @@ from typing import (
20
20
  )
21
21
 
22
22
  import requests
23
+
23
24
  from airbyte_cdk.models import AirbyteMessage
24
25
  from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector
25
26
  from airbyte_cdk.sources.declarative.incremental import ResumableFullRefreshCursor
@@ -360,9 +361,6 @@ class SimpleRetriever(Retriever):
360
361
  next_page_token = self._next_page_token(response)
361
362
  if not next_page_token:
362
363
  pagination_complete = True
363
- # Closing the response to avoid memory issues. Note that this assumes the caller as completely consumed the response before
364
- # iterating on another one
365
- response.close()
366
364
 
367
365
  # Always return an empty generator just in case no records were ever yielded
368
366
  yield from []
@@ -470,8 +468,9 @@ class SimpleRetriever(Retriever):
470
468
  else:
471
469
  return None
472
470
 
473
- @staticmethod
474
- def _extract_record(stream_data: StreamData, stream_slice: StreamSlice) -> Optional[Record]:
471
+ def _extract_record(
472
+ self, stream_data: StreamData, stream_slice: StreamSlice
473
+ ) -> Optional[Record]:
475
474
  """
476
475
  As we allow the output of _read_pages to be StreamData, it can be multiple things. Therefore, we need to filter out and normalize
477
476
  to data to streamline the rest of the process.
@@ -480,9 +479,15 @@ class SimpleRetriever(Retriever):
480
479
  # Record is not part of `StreamData` but is the most common implementation of `Mapping[str, Any]` which is part of `StreamData`
481
480
  return stream_data
482
481
  elif isinstance(stream_data, (dict, Mapping)):
483
- return Record(dict(stream_data), stream_slice)
482
+ return Record(
483
+ data=dict(stream_data), associated_slice=stream_slice, stream_name=self.name
484
+ )
484
485
  elif isinstance(stream_data, AirbyteMessage) and stream_data.record:
485
- return Record(stream_data.record.data, stream_slice)
486
+ return Record(
487
+ data=stream_data.record.data, # type:ignore # AirbyteMessage always has record.data
488
+ associated_slice=stream_slice,
489
+ stream_name=self.name,
490
+ )
486
491
  return None
487
492
 
488
493
  # stream_slices is defined with arguments on http stream and fixing this has a long tail of dependencies. Will be resolved by the decoupling of http stream and simple retriever