airbyte-cdk 6.5.3rc2__py3-none-any.whl → 6.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/__init__.py +17 -2
- airbyte_cdk/config_observation.py +10 -3
- airbyte_cdk/connector.py +19 -9
- airbyte_cdk/connector_builder/connector_builder_handler.py +28 -8
- airbyte_cdk/connector_builder/main.py +26 -6
- airbyte_cdk/connector_builder/message_grouper.py +95 -25
- airbyte_cdk/destinations/destination.py +47 -14
- airbyte_cdk/destinations/vector_db_based/config.py +36 -14
- airbyte_cdk/destinations/vector_db_based/document_processor.py +49 -11
- airbyte_cdk/destinations/vector_db_based/embedder.py +52 -11
- airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
- airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
- airbyte_cdk/destinations/vector_db_based/writer.py +15 -4
- airbyte_cdk/entrypoint.py +82 -26
- airbyte_cdk/exception_handler.py +13 -3
- airbyte_cdk/logger.py +10 -2
- airbyte_cdk/models/airbyte_protocol.py +11 -5
- airbyte_cdk/models/airbyte_protocol_serializers.py +9 -3
- airbyte_cdk/models/well_known_types.py +1 -1
- airbyte_cdk/sources/abstract_source.py +63 -17
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +47 -14
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +25 -7
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +27 -6
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +9 -3
- airbyte_cdk/sources/connector_state_manager.py +32 -10
- airbyte_cdk/sources/declarative/async_job/job.py +3 -1
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +68 -14
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +24 -6
- airbyte_cdk/sources/declarative/async_job/repository.py +3 -1
- airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
- airbyte_cdk/sources/declarative/auth/jwt.py +27 -7
- airbyte_cdk/sources/declarative/auth/oauth.py +35 -11
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +3 -1
- airbyte_cdk/sources/declarative/auth/token.py +25 -8
- airbyte_cdk/sources/declarative/checks/check_stream.py +12 -4
- airbyte_cdk/sources/declarative/checks/connection_checker.py +3 -1
- airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +11 -3
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +106 -50
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +20 -6
- airbyte_cdk/sources/declarative/declarative_source.py +3 -1
- airbyte_cdk/sources/declarative/declarative_stream.py +27 -6
- airbyte_cdk/sources/declarative/decoders/decoder.py +3 -1
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +3 -1
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +3 -1
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +6 -2
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +6 -2
- airbyte_cdk/sources/declarative/extractors/record_filter.py +24 -7
- airbyte_cdk/sources/declarative/extractors/record_selector.py +10 -3
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +15 -5
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +96 -31
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +22 -8
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +46 -15
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +19 -5
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +3 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +20 -2
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +5 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +10 -3
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +6 -2
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +7 -1
- airbyte_cdk/sources/declarative/interpolation/jinja.py +6 -2
- airbyte_cdk/sources/declarative/interpolation/macros.py +19 -4
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +106 -24
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +7 -2
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +656 -678
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +13 -4
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +9 -2
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +782 -232
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +29 -7
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +25 -7
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +54 -15
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +6 -2
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +3 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +17 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +15 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +3 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +18 -8
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +16 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +51 -14
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +29 -8
- airbyte_cdk/sources/declarative/requesters/http_requester.py +58 -16
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +49 -14
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +17 -5
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +24 -7
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +9 -3
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +6 -2
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +19 -6
- airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +3 -1
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +21 -7
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +18 -6
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +27 -8
- airbyte_cdk/sources/declarative/requesters/requester.py +3 -1
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -5
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +105 -24
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +3 -1
- airbyte_cdk/sources/declarative/spec/spec.py +8 -2
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +3 -1
- airbyte_cdk/sources/declarative/transformations/add_fields.py +12 -3
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +6 -2
- airbyte_cdk/sources/declarative/types.py +8 -1
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +3 -1
- airbyte_cdk/sources/embedded/base_integration.py +14 -4
- airbyte_cdk/sources/embedded/catalog.py +16 -4
- airbyte_cdk/sources/embedded/runner.py +19 -3
- airbyte_cdk/sources/embedded/tools.py +3 -1
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +12 -4
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +27 -7
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +12 -6
- airbyte_cdk/sources/file_based/config/csv_format.py +21 -9
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +6 -2
- airbyte_cdk/sources/file_based/config/unstructured_format.py +10 -3
- airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
- airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
- airbyte_cdk/sources/file_based/exceptions.py +13 -15
- airbyte_cdk/sources/file_based/file_based_source.py +82 -24
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +16 -5
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +58 -17
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +89 -26
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +25 -7
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +8 -2
- airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +20 -6
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +57 -16
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +64 -15
- airbyte_cdk/sources/file_based/schema_helpers.py +33 -10
- airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
- airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +33 -10
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +47 -11
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +13 -22
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +53 -17
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +17 -5
- airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +26 -9
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +67 -21
- airbyte_cdk/sources/http_logger.py +5 -1
- airbyte_cdk/sources/message/repository.py +18 -4
- airbyte_cdk/sources/source.py +17 -7
- airbyte_cdk/sources/streams/availability_strategy.py +9 -3
- airbyte_cdk/sources/streams/call_rate.py +63 -19
- airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +31 -7
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +6 -2
- airbyte_cdk/sources/streams/concurrent/adapters.py +77 -22
- airbyte_cdk/sources/streams/concurrent/cursor.py +56 -20
- airbyte_cdk/sources/streams/concurrent/default_stream.py +9 -2
- airbyte_cdk/sources/streams/concurrent/helpers.py +6 -2
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +9 -2
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +4 -1
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +10 -2
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +6 -2
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +25 -10
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +32 -16
- airbyte_cdk/sources/streams/core.py +77 -22
- airbyte_cdk/sources/streams/http/availability_strategy.py +3 -1
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +4 -1
- airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +3 -1
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +16 -5
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +9 -3
- airbyte_cdk/sources/streams/http/exceptions.py +2 -2
- airbyte_cdk/sources/streams/http/http.py +133 -33
- airbyte_cdk/sources/streams/http/http_client.py +91 -29
- airbyte_cdk/sources/streams/http/rate_limiting.py +23 -7
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +19 -6
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +38 -11
- airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
- airbyte_cdk/sources/types.py +5 -1
- airbyte_cdk/sources/utils/record_helper.py +12 -3
- airbyte_cdk/sources/utils/schema_helpers.py +9 -3
- airbyte_cdk/sources/utils/slice_logger.py +4 -1
- airbyte_cdk/sources/utils/transform.py +24 -9
- airbyte_cdk/sql/exceptions.py +19 -6
- airbyte_cdk/sql/secrets.py +3 -1
- airbyte_cdk/sql/shared/catalog_providers.py +13 -4
- airbyte_cdk/sql/shared/sql_processor.py +44 -14
- airbyte_cdk/test/catalog_builder.py +19 -8
- airbyte_cdk/test/entrypoint_wrapper.py +27 -8
- airbyte_cdk/test/mock_http/mocker.py +41 -11
- airbyte_cdk/test/mock_http/request.py +9 -3
- airbyte_cdk/test/mock_http/response.py +3 -1
- airbyte_cdk/test/mock_http/response_builder.py +29 -7
- airbyte_cdk/test/state_builder.py +10 -2
- airbyte_cdk/test/utils/data.py +6 -2
- airbyte_cdk/test/utils/http_mocking.py +3 -1
- airbyte_cdk/utils/airbyte_secrets_utils.py +3 -1
- airbyte_cdk/utils/analytics_message.py +10 -2
- airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
- airbyte_cdk/utils/mapping_helpers.py +3 -1
- airbyte_cdk/utils/message_utils.py +11 -4
- airbyte_cdk/utils/print_buffer.py +6 -1
- airbyte_cdk/utils/schema_inferrer.py +30 -9
- airbyte_cdk/utils/spec_schema_transformations.py +3 -1
- airbyte_cdk/utils/traced_exception.py +35 -9
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.5.5.dist-info}/METADATA +7 -6
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.5.5.dist-info}/RECORD +198 -198
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.5.5.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.5.5.dist-info}/WHEEL +0 -0
@@ -11,7 +11,13 @@ from functools import cached_property, lru_cache
|
|
11
11
|
from typing import Any, Dict, Iterable, Iterator, List, Mapping, MutableMapping, Optional, Union
|
12
12
|
|
13
13
|
import airbyte_cdk.sources.utils.casing as casing
|
14
|
-
from airbyte_cdk.models import
|
14
|
+
from airbyte_cdk.models import (
|
15
|
+
AirbyteMessage,
|
16
|
+
AirbyteStream,
|
17
|
+
ConfiguredAirbyteStream,
|
18
|
+
DestinationSyncMode,
|
19
|
+
SyncMode,
|
20
|
+
)
|
15
21
|
from airbyte_cdk.models import Type as MessageType
|
16
22
|
from airbyte_cdk.sources.streams.checkpoint import (
|
17
23
|
CheckpointMode,
|
@@ -84,7 +90,10 @@ class CheckpointMixin(ABC):
|
|
84
90
|
"""State setter, accept state serialized by state getter."""
|
85
91
|
|
86
92
|
|
87
|
-
@deprecated(
|
93
|
+
@deprecated(
|
94
|
+
version="0.87.0",
|
95
|
+
reason="Deprecated in favor of the CheckpointMixin which offers similar functionality",
|
96
|
+
)
|
88
97
|
class IncrementalMixin(CheckpointMixin, ABC):
|
89
98
|
"""Mixin to make stream incremental.
|
90
99
|
|
@@ -192,9 +201,14 @@ class Stream(ABC):
|
|
192
201
|
for record_data_or_message in records:
|
193
202
|
yield record_data_or_message
|
194
203
|
if isinstance(record_data_or_message, Mapping) or (
|
195
|
-
hasattr(record_data_or_message, "type")
|
204
|
+
hasattr(record_data_or_message, "type")
|
205
|
+
and record_data_or_message.type == MessageType.RECORD
|
196
206
|
):
|
197
|
-
record_data =
|
207
|
+
record_data = (
|
208
|
+
record_data_or_message
|
209
|
+
if isinstance(record_data_or_message, Mapping)
|
210
|
+
else record_data_or_message.record
|
211
|
+
)
|
198
212
|
|
199
213
|
# Thanks I hate it. RFR fundamentally doesn't fit with the concept of the legacy Stream.get_updated_state()
|
200
214
|
# method because RFR streams rely on pagination as a cursor. Stream.get_updated_state() was designed to make
|
@@ -206,14 +220,23 @@ class Stream(ABC):
|
|
206
220
|
if self.cursor_field:
|
207
221
|
# Some connectors have streams that implement get_updated_state(), but do not define a cursor_field. This
|
208
222
|
# should be fixed on the stream implementation, but we should also protect against this in the CDK as well
|
209
|
-
stream_state_tracker = self.get_updated_state(
|
223
|
+
stream_state_tracker = self.get_updated_state(
|
224
|
+
stream_state_tracker, record_data
|
225
|
+
)
|
210
226
|
self._observe_state(checkpoint_reader, stream_state_tracker)
|
211
227
|
record_counter += 1
|
212
228
|
|
213
229
|
checkpoint_interval = self.state_checkpoint_interval
|
214
230
|
checkpoint = checkpoint_reader.get_checkpoint()
|
215
|
-
if
|
216
|
-
|
231
|
+
if (
|
232
|
+
should_checkpoint
|
233
|
+
and checkpoint_interval
|
234
|
+
and record_counter % checkpoint_interval == 0
|
235
|
+
and checkpoint is not None
|
236
|
+
):
|
237
|
+
airbyte_state_message = self._checkpoint_state(
|
238
|
+
checkpoint, state_manager=state_manager
|
239
|
+
)
|
217
240
|
yield airbyte_state_message
|
218
241
|
|
219
242
|
if internal_config.is_limit_reached(record_counter):
|
@@ -221,7 +244,9 @@ class Stream(ABC):
|
|
221
244
|
self._observe_state(checkpoint_reader)
|
222
245
|
checkpoint_state = checkpoint_reader.get_checkpoint()
|
223
246
|
if should_checkpoint and checkpoint_state is not None:
|
224
|
-
airbyte_state_message = self._checkpoint_state(
|
247
|
+
airbyte_state_message = self._checkpoint_state(
|
248
|
+
checkpoint_state, state_manager=state_manager
|
249
|
+
)
|
225
250
|
yield airbyte_state_message
|
226
251
|
|
227
252
|
next_slice = checkpoint_reader.next()
|
@@ -252,7 +277,9 @@ class Stream(ABC):
|
|
252
277
|
configured_stream=configured_stream,
|
253
278
|
logger=self.logger,
|
254
279
|
slice_logger=DebugSliceLogger(),
|
255
|
-
stream_state=dict(state)
|
280
|
+
stream_state=dict(state)
|
281
|
+
if state
|
282
|
+
else {}, # read() expects MutableMapping instead of Mapping which is used more often
|
256
283
|
state_manager=None,
|
257
284
|
internal_config=InternalConfig(),
|
258
285
|
)
|
@@ -378,7 +405,11 @@ class Stream(ABC):
|
|
378
405
|
"""
|
379
406
|
|
380
407
|
def stream_slices(
|
381
|
-
self,
|
408
|
+
self,
|
409
|
+
*,
|
410
|
+
sync_mode: SyncMode,
|
411
|
+
cursor_field: Optional[List[str]] = None,
|
412
|
+
stream_state: Optional[Mapping[str, Any]] = None,
|
382
413
|
) -> Iterable[Optional[Mapping[str, Any]]]:
|
383
414
|
"""
|
384
415
|
Override to define the slices for this stream. See the stream slicing section of the docs for more information.
|
@@ -449,12 +480,16 @@ class Stream(ABC):
|
|
449
480
|
mappings_or_slices = [{}]
|
450
481
|
|
451
482
|
slices_iterable_copy, iterable_for_detecting_format = itertools.tee(mappings_or_slices, 2)
|
452
|
-
stream_classification = self._classify_stream(
|
483
|
+
stream_classification = self._classify_stream(
|
484
|
+
mappings_or_slices=iterable_for_detecting_format
|
485
|
+
)
|
453
486
|
|
454
487
|
# Streams that override has_multiple_slices are explicitly indicating that they will iterate over
|
455
488
|
# multiple partitions. Inspecting slices to automatically apply the correct cursor is only needed as
|
456
489
|
# a backup. So if this value was already assigned to True by the stream, we don't need to reassign it
|
457
|
-
self.has_multiple_slices =
|
490
|
+
self.has_multiple_slices = (
|
491
|
+
self.has_multiple_slices or stream_classification.has_multiple_slices
|
492
|
+
)
|
458
493
|
|
459
494
|
cursor = self.get_cursor()
|
460
495
|
if cursor:
|
@@ -463,7 +498,9 @@ class Stream(ABC):
|
|
463
498
|
checkpoint_mode = self._checkpoint_mode
|
464
499
|
|
465
500
|
if cursor and stream_classification.is_legacy_format:
|
466
|
-
return LegacyCursorBasedCheckpointReader(
|
501
|
+
return LegacyCursorBasedCheckpointReader(
|
502
|
+
stream_slices=slices_iterable_copy, cursor=cursor, read_state_from_cursor=True
|
503
|
+
)
|
467
504
|
elif cursor:
|
468
505
|
return CursorBasedCheckpointReader(
|
469
506
|
stream_slices=slices_iterable_copy,
|
@@ -475,7 +512,9 @@ class Stream(ABC):
|
|
475
512
|
# not iterate over a static set of slices.
|
476
513
|
return ResumableFullRefreshCheckpointReader(stream_state=stream_state)
|
477
514
|
elif checkpoint_mode == CheckpointMode.INCREMENTAL:
|
478
|
-
return IncrementalCheckpointReader(
|
515
|
+
return IncrementalCheckpointReader(
|
516
|
+
stream_slices=slices_iterable_copy, stream_state=stream_state
|
517
|
+
)
|
479
518
|
else:
|
480
519
|
return FullRefreshCheckpointReader(stream_slices=slices_iterable_copy)
|
481
520
|
|
@@ -489,7 +528,9 @@ class Stream(ABC):
|
|
489
528
|
return CheckpointMode.FULL_REFRESH
|
490
529
|
|
491
530
|
@staticmethod
|
492
|
-
def _classify_stream(
|
531
|
+
def _classify_stream(
|
532
|
+
mappings_or_slices: Iterator[Optional[Union[Mapping[str, Any], StreamSlice]]],
|
533
|
+
) -> StreamClassification:
|
493
534
|
"""
|
494
535
|
This is a bit of a crazy solution, but also the only way we can detect certain attributes about the stream since Python
|
495
536
|
streams do not follow consistent implementation patterns. We care about the following two attributes:
|
@@ -506,7 +547,9 @@ class Stream(ABC):
|
|
506
547
|
raise ValueError("A stream should always have at least one slice")
|
507
548
|
try:
|
508
549
|
next_slice = next(mappings_or_slices)
|
509
|
-
if isinstance(next_slice, StreamSlice) and next_slice == StreamSlice(
|
550
|
+
if isinstance(next_slice, StreamSlice) and next_slice == StreamSlice(
|
551
|
+
partition={}, cursor_slice={}
|
552
|
+
):
|
510
553
|
is_legacy_format = False
|
511
554
|
slice_has_value = False
|
512
555
|
elif next_slice == {}:
|
@@ -526,7 +569,9 @@ class Stream(ABC):
|
|
526
569
|
if slice_has_value:
|
527
570
|
# If the first slice contained a partition value from the result of stream_slices(), this is a substream that might
|
528
571
|
# have multiple parent records to iterate over
|
529
|
-
return StreamClassification(
|
572
|
+
return StreamClassification(
|
573
|
+
is_legacy_format=is_legacy_format, has_multiple_slices=slice_has_value
|
574
|
+
)
|
530
575
|
|
531
576
|
try:
|
532
577
|
# If stream_slices() returns multiple slices, this is also a substream that can potentially generate empty slices
|
@@ -534,7 +579,9 @@ class Stream(ABC):
|
|
534
579
|
return StreamClassification(is_legacy_format=is_legacy_format, has_multiple_slices=True)
|
535
580
|
except StopIteration:
|
536
581
|
# If the result of stream_slices() only returns a single empty stream slice, then we know this is a regular stream
|
537
|
-
return StreamClassification(
|
582
|
+
return StreamClassification(
|
583
|
+
is_legacy_format=is_legacy_format, has_multiple_slices=False
|
584
|
+
)
|
538
585
|
|
539
586
|
def log_stream_sync_configuration(self) -> None:
|
540
587
|
"""
|
@@ -549,7 +596,9 @@ class Stream(ABC):
|
|
549
596
|
)
|
550
597
|
|
551
598
|
@staticmethod
|
552
|
-
def _wrapped_primary_key(
|
599
|
+
def _wrapped_primary_key(
|
600
|
+
keys: Optional[Union[str, List[str], List[List[str]]]],
|
601
|
+
) -> Optional[List[List[str]]]:
|
553
602
|
"""
|
554
603
|
:return: wrap the primary_key property in a list of list of strings required by the Airbyte Stream object.
|
555
604
|
"""
|
@@ -571,7 +620,9 @@ class Stream(ABC):
|
|
571
620
|
else:
|
572
621
|
raise ValueError(f"Element must be either list or str. Got: {type(keys)}")
|
573
622
|
|
574
|
-
def _observe_state(
|
623
|
+
def _observe_state(
|
624
|
+
self, checkpoint_reader: CheckpointReader, stream_state: Optional[Mapping[str, Any]] = None
|
625
|
+
) -> None:
|
575
626
|
"""
|
576
627
|
Convenience method that attempts to read the Stream's state using the recommended way of connector's managing their
|
577
628
|
own state via state setter/getter. But if we get back an AttributeError, then the legacy Stream.get_updated_state()
|
@@ -617,7 +668,9 @@ class Stream(ABC):
|
|
617
668
|
def configured_json_schema(self, json_schema: Dict[str, Any]) -> None:
|
618
669
|
self._configured_json_schema = self._filter_schema_invalid_properties(json_schema)
|
619
670
|
|
620
|
-
def _filter_schema_invalid_properties(
|
671
|
+
def _filter_schema_invalid_properties(
|
672
|
+
self, configured_catalog_json_schema: Dict[str, Any]
|
673
|
+
) -> Dict[str, Any]:
|
621
674
|
"""
|
622
675
|
Filters the properties in json_schema that are not present in the stream schema.
|
623
676
|
Configured Schemas can have very old fields, so we need to housekeeping ourselves.
|
@@ -639,6 +692,8 @@ class Stream(ABC):
|
|
639
692
|
valid_configured_schema_properties = {}
|
640
693
|
|
641
694
|
for configured_schema_property in valid_configured_schema_properties_keys:
|
642
|
-
valid_configured_schema_properties[configured_schema_property] =
|
695
|
+
valid_configured_schema_properties[configured_schema_property] = (
|
696
|
+
stream_schema_properties[configured_schema_property]
|
697
|
+
)
|
643
698
|
|
644
699
|
return {**configured_catalog_json_schema, "properties": valid_configured_schema_properties}
|
@@ -15,7 +15,9 @@ if typing.TYPE_CHECKING:
|
|
15
15
|
|
16
16
|
|
17
17
|
class HttpAvailabilityStrategy(AvailabilityStrategy):
|
18
|
-
def check_availability(
|
18
|
+
def check_availability(
|
19
|
+
self, stream: Stream, logger: logging.Logger, source: Optional["Source"] = None
|
20
|
+
) -> Tuple[bool, Optional[str]]:
|
19
21
|
"""
|
20
22
|
Check stream availability by attempting to read the first record of the
|
21
23
|
stream.
|
@@ -5,7 +5,10 @@
|
|
5
5
|
from typing import Mapping, Type, Union
|
6
6
|
|
7
7
|
from airbyte_cdk.models import FailureType
|
8
|
-
from airbyte_cdk.sources.streams.http.error_handlers.response_models import
|
8
|
+
from airbyte_cdk.sources.streams.http.error_handlers.response_models import (
|
9
|
+
ErrorResolution,
|
10
|
+
ResponseAction,
|
11
|
+
)
|
9
12
|
from requests.exceptions import InvalidSchema, InvalidURL, RequestException
|
10
13
|
|
11
14
|
DEFAULT_ERROR_MAPPING: Mapping[Union[int, str, Type[Exception]], ErrorResolution] = {
|
@@ -30,7 +30,9 @@ class ErrorHandler(ABC):
|
|
30
30
|
pass
|
31
31
|
|
32
32
|
@abstractmethod
|
33
|
-
def interpret_response(
|
33
|
+
def interpret_response(
|
34
|
+
self, response: Optional[Union[requests.Response, Exception]]
|
35
|
+
) -> ErrorResolution:
|
34
36
|
"""
|
35
37
|
Interpret the response or exception and return the corresponding response action, failure type, and error message.
|
36
38
|
|
@@ -8,9 +8,14 @@ from typing import Mapping, Optional, Union
|
|
8
8
|
|
9
9
|
import requests
|
10
10
|
from airbyte_cdk.models import FailureType
|
11
|
-
from airbyte_cdk.sources.streams.http.error_handlers.default_error_mapping import
|
11
|
+
from airbyte_cdk.sources.streams.http.error_handlers.default_error_mapping import (
|
12
|
+
DEFAULT_ERROR_MAPPING,
|
13
|
+
)
|
12
14
|
from airbyte_cdk.sources.streams.http.error_handlers.error_handler import ErrorHandler
|
13
|
-
from airbyte_cdk.sources.streams.http.error_handlers.response_models import
|
15
|
+
from airbyte_cdk.sources.streams.http.error_handlers.response_models import (
|
16
|
+
ErrorResolution,
|
17
|
+
ResponseAction,
|
18
|
+
)
|
14
19
|
|
15
20
|
|
16
21
|
class HttpStatusErrorHandler(ErrorHandler):
|
@@ -39,7 +44,9 @@ class HttpStatusErrorHandler(ErrorHandler):
|
|
39
44
|
def max_time(self) -> Optional[int]:
|
40
45
|
return self._max_time
|
41
46
|
|
42
|
-
def interpret_response(
|
47
|
+
def interpret_response(
|
48
|
+
self, response_or_exception: Optional[Union[requests.Response, Exception]] = None
|
49
|
+
) -> ErrorResolution:
|
43
50
|
"""
|
44
51
|
Interpret the response and return the corresponding response action, failure type, and error message.
|
45
52
|
|
@@ -48,12 +55,16 @@ class HttpStatusErrorHandler(ErrorHandler):
|
|
48
55
|
"""
|
49
56
|
|
50
57
|
if isinstance(response_or_exception, Exception):
|
51
|
-
mapped_error: Optional[ErrorResolution] = self._error_mapping.get(
|
58
|
+
mapped_error: Optional[ErrorResolution] = self._error_mapping.get(
|
59
|
+
response_or_exception.__class__
|
60
|
+
)
|
52
61
|
|
53
62
|
if mapped_error is not None:
|
54
63
|
return mapped_error
|
55
64
|
else:
|
56
|
-
self._logger.error(
|
65
|
+
self._logger.error(
|
66
|
+
f"Unexpected exception in error handler: {response_or_exception}"
|
67
|
+
)
|
57
68
|
return ErrorResolution(
|
58
69
|
response_action=ResponseAction.RETRY,
|
59
70
|
failure_type=FailureType.system_error,
|
@@ -33,13 +33,17 @@ def _format_response_error_message(response: requests.Response) -> str:
|
|
33
33
|
try:
|
34
34
|
response.raise_for_status()
|
35
35
|
except HTTPError as exception:
|
36
|
-
return filter_secrets(
|
36
|
+
return filter_secrets(
|
37
|
+
f"Response was not ok: `{str(exception)}`. Response content is: {response.text}"
|
38
|
+
)
|
37
39
|
# We purposefully do not add the response.content because the response is "ok" so there might be sensitive information in the payload.
|
38
40
|
# Feel free the
|
39
41
|
return f"Unexpected response with HTTP status {response.status_code}"
|
40
42
|
|
41
43
|
|
42
|
-
def create_fallback_error_resolution(
|
44
|
+
def create_fallback_error_resolution(
|
45
|
+
response_or_exception: Optional[Union[requests.Response, Exception]],
|
46
|
+
) -> ErrorResolution:
|
43
47
|
if response_or_exception is None:
|
44
48
|
# We do not expect this case to happen but if it does, it would be good to understand the cause and improve the error message
|
45
49
|
error_message = "Error handler did not receive a valid response or exception. This is unexpected please contact Airbyte Support"
|
@@ -55,4 +59,6 @@ def create_fallback_error_resolution(response_or_exception: Optional[Union[reque
|
|
55
59
|
)
|
56
60
|
|
57
61
|
|
58
|
-
SUCCESS_RESOLUTION = ErrorResolution(
|
62
|
+
SUCCESS_RESOLUTION = ErrorResolution(
|
63
|
+
response_action=ResponseAction.SUCCESS, failure_type=None, error_message=None
|
64
|
+
)
|
@@ -15,10 +15,10 @@ class BaseBackoffException(requests.exceptions.HTTPError):
|
|
15
15
|
response: Optional[Union[requests.Response, Exception]],
|
16
16
|
error_message: str = "",
|
17
17
|
):
|
18
|
-
|
19
18
|
if isinstance(response, requests.Response):
|
20
19
|
error_message = (
|
21
|
-
error_message
|
20
|
+
error_message
|
21
|
+
or f"Request URL: {request.url}, Response Code: {response.status_code}, Response Text: {response.text}"
|
22
22
|
)
|
23
23
|
super().__init__(error_message, request=request, response=response)
|
24
24
|
else:
|