airbyte-cdk 6.5.3rc2__py3-none-any.whl → 6.5.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/__init__.py +17 -2
- airbyte_cdk/config_observation.py +10 -3
- airbyte_cdk/connector.py +19 -9
- airbyte_cdk/connector_builder/connector_builder_handler.py +28 -8
- airbyte_cdk/connector_builder/main.py +26 -6
- airbyte_cdk/connector_builder/message_grouper.py +95 -25
- airbyte_cdk/destinations/destination.py +47 -14
- airbyte_cdk/destinations/vector_db_based/config.py +36 -14
- airbyte_cdk/destinations/vector_db_based/document_processor.py +49 -11
- airbyte_cdk/destinations/vector_db_based/embedder.py +52 -11
- airbyte_cdk/destinations/vector_db_based/test_utils.py +14 -4
- airbyte_cdk/destinations/vector_db_based/utils.py +8 -2
- airbyte_cdk/destinations/vector_db_based/writer.py +15 -4
- airbyte_cdk/entrypoint.py +82 -26
- airbyte_cdk/exception_handler.py +13 -3
- airbyte_cdk/logger.py +10 -2
- airbyte_cdk/models/airbyte_protocol.py +11 -5
- airbyte_cdk/models/airbyte_protocol_serializers.py +9 -3
- airbyte_cdk/models/well_known_types.py +1 -1
- airbyte_cdk/sources/abstract_source.py +63 -17
- airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py +47 -14
- airbyte_cdk/sources/concurrent_source/concurrent_source.py +25 -7
- airbyte_cdk/sources/concurrent_source/concurrent_source_adapter.py +27 -6
- airbyte_cdk/sources/concurrent_source/thread_pool_manager.py +9 -3
- airbyte_cdk/sources/connector_state_manager.py +32 -10
- airbyte_cdk/sources/declarative/async_job/job.py +3 -1
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +68 -14
- airbyte_cdk/sources/declarative/async_job/job_tracker.py +24 -6
- airbyte_cdk/sources/declarative/async_job/repository.py +3 -1
- airbyte_cdk/sources/declarative/auth/declarative_authenticator.py +3 -1
- airbyte_cdk/sources/declarative/auth/jwt.py +27 -7
- airbyte_cdk/sources/declarative/auth/oauth.py +35 -11
- airbyte_cdk/sources/declarative/auth/selective_authenticator.py +3 -1
- airbyte_cdk/sources/declarative/auth/token.py +25 -8
- airbyte_cdk/sources/declarative/checks/check_stream.py +12 -4
- airbyte_cdk/sources/declarative/checks/connection_checker.py +3 -1
- airbyte_cdk/sources/declarative/concurrency_level/concurrency_level.py +11 -3
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +106 -50
- airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +20 -6
- airbyte_cdk/sources/declarative/declarative_source.py +3 -1
- airbyte_cdk/sources/declarative/declarative_stream.py +27 -6
- airbyte_cdk/sources/declarative/decoders/decoder.py +3 -1
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +3 -1
- airbyte_cdk/sources/declarative/decoders/pagination_decoder_decorator.py +3 -1
- airbyte_cdk/sources/declarative/decoders/xml_decoder.py +6 -2
- airbyte_cdk/sources/declarative/extractors/dpath_extractor.py +6 -2
- airbyte_cdk/sources/declarative/extractors/record_filter.py +24 -7
- airbyte_cdk/sources/declarative/extractors/record_selector.py +10 -3
- airbyte_cdk/sources/declarative/extractors/response_to_file_extractor.py +15 -5
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +96 -31
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +22 -8
- airbyte_cdk/sources/declarative/incremental/per_partition_cursor.py +46 -15
- airbyte_cdk/sources/declarative/incremental/per_partition_with_global.py +19 -5
- airbyte_cdk/sources/declarative/incremental/resumable_full_refresh_cursor.py +3 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +20 -2
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +5 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +10 -3
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +6 -2
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +7 -1
- airbyte_cdk/sources/declarative/interpolation/jinja.py +6 -2
- airbyte_cdk/sources/declarative/interpolation/macros.py +19 -4
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +106 -24
- airbyte_cdk/sources/declarative/migrations/legacy_to_per_partition_state_migration.py +7 -2
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +656 -678
- airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py +13 -4
- airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py +9 -2
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +782 -232
- airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py +29 -7
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +25 -7
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +54 -15
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/constant_backoff_strategy.py +6 -2
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/header_helper.py +3 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py +17 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py +15 -5
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py +3 -1
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py +18 -8
- airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py +16 -7
- airbyte_cdk/sources/declarative/requesters/error_handlers/http_response_filter.py +51 -14
- airbyte_cdk/sources/declarative/requesters/http_job_repository.py +29 -8
- airbyte_cdk/sources/declarative/requesters/http_requester.py +58 -16
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +49 -14
- airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py +17 -5
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py +24 -7
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py +9 -3
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py +3 -1
- airbyte_cdk/sources/declarative/requesters/paginators/strategies/stop_condition.py +6 -2
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +19 -6
- airbyte_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py +3 -1
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +21 -7
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +18 -6
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +27 -8
- airbyte_cdk/sources/declarative/requesters/requester.py +3 -1
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +12 -5
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +105 -24
- airbyte_cdk/sources/declarative/schema/default_schema_loader.py +3 -1
- airbyte_cdk/sources/declarative/spec/spec.py +8 -2
- airbyte_cdk/sources/declarative/stream_slicers/stream_slicer.py +3 -1
- airbyte_cdk/sources/declarative/transformations/add_fields.py +12 -3
- airbyte_cdk/sources/declarative/transformations/remove_fields.py +6 -2
- airbyte_cdk/sources/declarative/types.py +8 -1
- airbyte_cdk/sources/declarative/yaml_declarative_source.py +3 -1
- airbyte_cdk/sources/embedded/base_integration.py +14 -4
- airbyte_cdk/sources/embedded/catalog.py +16 -4
- airbyte_cdk/sources/embedded/runner.py +19 -3
- airbyte_cdk/sources/embedded/tools.py +3 -1
- airbyte_cdk/sources/file_based/availability_strategy/abstract_file_based_availability_strategy.py +12 -4
- airbyte_cdk/sources/file_based/availability_strategy/default_file_based_availability_strategy.py +27 -7
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +12 -6
- airbyte_cdk/sources/file_based/config/csv_format.py +21 -9
- airbyte_cdk/sources/file_based/config/file_based_stream_config.py +6 -2
- airbyte_cdk/sources/file_based/config/unstructured_format.py +10 -3
- airbyte_cdk/sources/file_based/discovery_policy/abstract_discovery_policy.py +2 -4
- airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py +7 -2
- airbyte_cdk/sources/file_based/exceptions.py +13 -15
- airbyte_cdk/sources/file_based/file_based_source.py +82 -24
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +16 -5
- airbyte_cdk/sources/file_based/file_types/avro_parser.py +58 -17
- airbyte_cdk/sources/file_based/file_types/csv_parser.py +89 -26
- airbyte_cdk/sources/file_based/file_types/excel_parser.py +25 -7
- airbyte_cdk/sources/file_based/file_types/file_transfer.py +8 -2
- airbyte_cdk/sources/file_based/file_types/file_type_parser.py +4 -1
- airbyte_cdk/sources/file_based/file_types/jsonl_parser.py +20 -6
- airbyte_cdk/sources/file_based/file_types/parquet_parser.py +57 -16
- airbyte_cdk/sources/file_based/file_types/unstructured_parser.py +64 -15
- airbyte_cdk/sources/file_based/schema_helpers.py +33 -10
- airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py +3 -1
- airbyte_cdk/sources/file_based/schema_validation_policies/default_schema_validation_policies.py +16 -5
- airbyte_cdk/sources/file_based/stream/abstract_file_based_stream.py +33 -10
- airbyte_cdk/sources/file_based/stream/concurrent/adapters.py +47 -11
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/abstract_concurrent_file_based_cursor.py +13 -22
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_concurrent_cursor.py +53 -17
- airbyte_cdk/sources/file_based/stream/concurrent/cursor/file_based_final_state_cursor.py +17 -5
- airbyte_cdk/sources/file_based/stream/cursor/abstract_file_based_cursor.py +3 -1
- airbyte_cdk/sources/file_based/stream/cursor/default_file_based_cursor.py +26 -9
- airbyte_cdk/sources/file_based/stream/default_file_based_stream.py +67 -21
- airbyte_cdk/sources/http_logger.py +5 -1
- airbyte_cdk/sources/message/repository.py +18 -4
- airbyte_cdk/sources/source.py +17 -7
- airbyte_cdk/sources/streams/availability_strategy.py +9 -3
- airbyte_cdk/sources/streams/call_rate.py +63 -19
- airbyte_cdk/sources/streams/checkpoint/checkpoint_reader.py +31 -7
- airbyte_cdk/sources/streams/checkpoint/substream_resumable_full_refresh_cursor.py +6 -2
- airbyte_cdk/sources/streams/concurrent/adapters.py +77 -22
- airbyte_cdk/sources/streams/concurrent/cursor.py +56 -20
- airbyte_cdk/sources/streams/concurrent/default_stream.py +9 -2
- airbyte_cdk/sources/streams/concurrent/helpers.py +6 -2
- airbyte_cdk/sources/streams/concurrent/partition_enqueuer.py +9 -2
- airbyte_cdk/sources/streams/concurrent/partition_reader.py +4 -1
- airbyte_cdk/sources/streams/concurrent/partitions/record.py +10 -2
- airbyte_cdk/sources/streams/concurrent/partitions/types.py +6 -2
- airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +25 -10
- airbyte_cdk/sources/streams/concurrent/state_converters/datetime_stream_state_converter.py +32 -16
- airbyte_cdk/sources/streams/core.py +77 -22
- airbyte_cdk/sources/streams/http/availability_strategy.py +3 -1
- airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +4 -1
- airbyte_cdk/sources/streams/http/error_handlers/error_handler.py +3 -1
- airbyte_cdk/sources/streams/http/error_handlers/http_status_error_handler.py +16 -5
- airbyte_cdk/sources/streams/http/error_handlers/response_models.py +9 -3
- airbyte_cdk/sources/streams/http/exceptions.py +2 -2
- airbyte_cdk/sources/streams/http/http.py +133 -33
- airbyte_cdk/sources/streams/http/http_client.py +91 -29
- airbyte_cdk/sources/streams/http/rate_limiting.py +23 -7
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +19 -6
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +38 -11
- airbyte_cdk/sources/streams/http/requests_native_auth/token.py +13 -3
- airbyte_cdk/sources/types.py +5 -1
- airbyte_cdk/sources/utils/record_helper.py +12 -3
- airbyte_cdk/sources/utils/schema_helpers.py +9 -3
- airbyte_cdk/sources/utils/slice_logger.py +4 -1
- airbyte_cdk/sources/utils/transform.py +24 -9
- airbyte_cdk/sql/exceptions.py +19 -6
- airbyte_cdk/sql/secrets.py +3 -1
- airbyte_cdk/sql/shared/catalog_providers.py +13 -4
- airbyte_cdk/sql/shared/sql_processor.py +44 -14
- airbyte_cdk/test/catalog_builder.py +19 -8
- airbyte_cdk/test/entrypoint_wrapper.py +27 -8
- airbyte_cdk/test/mock_http/mocker.py +41 -11
- airbyte_cdk/test/mock_http/request.py +9 -3
- airbyte_cdk/test/mock_http/response.py +3 -1
- airbyte_cdk/test/mock_http/response_builder.py +29 -7
- airbyte_cdk/test/state_builder.py +10 -2
- airbyte_cdk/test/utils/data.py +6 -2
- airbyte_cdk/test/utils/http_mocking.py +3 -1
- airbyte_cdk/utils/airbyte_secrets_utils.py +3 -1
- airbyte_cdk/utils/analytics_message.py +10 -2
- airbyte_cdk/utils/datetime_format_inferrer.py +4 -1
- airbyte_cdk/utils/mapping_helpers.py +3 -1
- airbyte_cdk/utils/message_utils.py +11 -4
- airbyte_cdk/utils/print_buffer.py +6 -1
- airbyte_cdk/utils/schema_inferrer.py +30 -9
- airbyte_cdk/utils/spec_schema_transformations.py +3 -1
- airbyte_cdk/utils/traced_exception.py +35 -9
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.5.5.dist-info}/METADATA +7 -6
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.5.5.dist-info}/RECORD +198 -198
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.5.5.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.5.3rc2.dist-info → airbyte_cdk-6.5.5.dist-info}/WHEEL +0 -0
@@ -2,7 +2,12 @@
|
|
2
2
|
|
3
3
|
from typing import Any, Dict, List, Union, overload
|
4
4
|
|
5
|
-
from airbyte_cdk.models import
|
5
|
+
from airbyte_cdk.models import (
|
6
|
+
ConfiguredAirbyteCatalog,
|
7
|
+
ConfiguredAirbyteStream,
|
8
|
+
ConfiguredAirbyteStreamSerializer,
|
9
|
+
SyncMode,
|
10
|
+
)
|
6
11
|
|
7
12
|
|
8
13
|
class ConfiguredAirbyteStreamBuilder:
|
@@ -45,14 +50,16 @@ class CatalogBuilder:
|
|
45
50
|
self._streams: List[ConfiguredAirbyteStreamBuilder] = []
|
46
51
|
|
47
52
|
@overload
|
48
|
-
def with_stream(self, name: ConfiguredAirbyteStreamBuilder) -> "CatalogBuilder":
|
49
|
-
...
|
53
|
+
def with_stream(self, name: ConfiguredAirbyteStreamBuilder) -> "CatalogBuilder": ...
|
50
54
|
|
51
55
|
@overload
|
52
|
-
def with_stream(self, name: str, sync_mode: SyncMode) -> "CatalogBuilder":
|
53
|
-
...
|
56
|
+
def with_stream(self, name: str, sync_mode: SyncMode) -> "CatalogBuilder": ...
|
54
57
|
|
55
|
-
def with_stream(
|
58
|
+
def with_stream(
|
59
|
+
self,
|
60
|
+
name: Union[str, ConfiguredAirbyteStreamBuilder],
|
61
|
+
sync_mode: Union[SyncMode, None] = None,
|
62
|
+
) -> "CatalogBuilder":
|
56
63
|
# As we are introducing a fully fledge ConfiguredAirbyteStreamBuilder, we would like to deprecate the previous interface
|
57
64
|
# with_stream(str, SyncMode)
|
58
65
|
|
@@ -61,10 +68,14 @@ class CatalogBuilder:
|
|
61
68
|
builder = (
|
62
69
|
name_or_builder
|
63
70
|
if isinstance(name_or_builder, ConfiguredAirbyteStreamBuilder)
|
64
|
-
else ConfiguredAirbyteStreamBuilder()
|
71
|
+
else ConfiguredAirbyteStreamBuilder()
|
72
|
+
.with_name(name_or_builder)
|
73
|
+
.with_sync_mode(sync_mode)
|
65
74
|
)
|
66
75
|
self._streams.append(builder)
|
67
76
|
return self
|
68
77
|
|
69
78
|
def build(self) -> ConfiguredAirbyteCatalog:
|
70
|
-
return ConfiguredAirbyteCatalog(
|
79
|
+
return ConfiguredAirbyteCatalog(
|
80
|
+
streams=list(map(lambda builder: builder.build(), self._streams))
|
81
|
+
)
|
@@ -53,7 +53,11 @@ class EntrypointOutput:
|
|
53
53
|
raise ValueError("All messages are expected to be AirbyteMessage") from exception
|
54
54
|
|
55
55
|
if uncaught_exception:
|
56
|
-
self._messages.append(
|
56
|
+
self._messages.append(
|
57
|
+
assemble_uncaught_exception(
|
58
|
+
type(uncaught_exception), uncaught_exception
|
59
|
+
).as_airbyte_message()
|
60
|
+
)
|
57
61
|
|
58
62
|
@staticmethod
|
59
63
|
def _parse_message(message: str) -> AirbyteMessage:
|
@@ -61,7 +65,9 @@ class EntrypointOutput:
|
|
61
65
|
return AirbyteMessageSerializer.load(orjson.loads(message)) # type: ignore[no-any-return] # Serializer.load() always returns AirbyteMessage
|
62
66
|
except (orjson.JSONDecodeError, SchemaValidationError):
|
63
67
|
# The platform assumes that logs that are not of AirbyteMessage format are log messages
|
64
|
-
return AirbyteMessage(
|
68
|
+
return AirbyteMessage(
|
69
|
+
type=Type.LOG, log=AirbyteLogMessage(level=Level.INFO, message=message)
|
70
|
+
)
|
65
71
|
|
66
72
|
@property
|
67
73
|
def records_and_state_messages(self) -> List[AirbyteMessage]:
|
@@ -119,18 +125,26 @@ class EntrypointOutput:
|
|
119
125
|
return [message for message in self._messages if message.type in message_types]
|
120
126
|
|
121
127
|
def _get_trace_message_by_trace_type(self, trace_type: TraceType) -> List[AirbyteMessage]:
|
122
|
-
return [
|
128
|
+
return [
|
129
|
+
message
|
130
|
+
for message in self._get_message_by_types([Type.TRACE])
|
131
|
+
if message.trace.type == trace_type
|
132
|
+
] # type: ignore[union-attr] # trace has `type`
|
123
133
|
|
124
134
|
def is_in_logs(self, pattern: str) -> bool:
|
125
135
|
"""Check if any log message case-insensitive matches the pattern."""
|
126
|
-
return any(
|
136
|
+
return any(
|
137
|
+
re.search(pattern, entry.log.message, flags=re.IGNORECASE) for entry in self.logs
|
138
|
+
) # type: ignore[union-attr] # log has `message`
|
127
139
|
|
128
140
|
def is_not_in_logs(self, pattern: str) -> bool:
|
129
141
|
"""Check if no log message matches the case-insensitive pattern."""
|
130
142
|
return not self.is_in_logs(pattern)
|
131
143
|
|
132
144
|
|
133
|
-
def _run_command(
|
145
|
+
def _run_command(
|
146
|
+
source: Source, args: List[str], expecting_exception: bool = False
|
147
|
+
) -> EntrypointOutput:
|
134
148
|
log_capture_buffer = StringIO()
|
135
149
|
stream_handler = logging.StreamHandler(log_capture_buffer)
|
136
150
|
stream_handler.setLevel(logging.INFO)
|
@@ -174,7 +188,9 @@ def discover(
|
|
174
188
|
tmp_directory_path = Path(tmp_directory)
|
175
189
|
config_file = make_file(tmp_directory_path / "config.json", config)
|
176
190
|
|
177
|
-
return _run_command(
|
191
|
+
return _run_command(
|
192
|
+
source, ["discover", "--config", config_file, "--debug"], expecting_exception
|
193
|
+
)
|
178
194
|
|
179
195
|
|
180
196
|
def read(
|
@@ -194,7 +210,8 @@ def read(
|
|
194
210
|
tmp_directory_path = Path(tmp_directory)
|
195
211
|
config_file = make_file(tmp_directory_path / "config.json", config)
|
196
212
|
catalog_file = make_file(
|
197
|
-
tmp_directory_path / "catalog.json",
|
213
|
+
tmp_directory_path / "catalog.json",
|
214
|
+
orjson.dumps(ConfiguredAirbyteCatalogSerializer.dump(catalog)).decode(),
|
198
215
|
)
|
199
216
|
args = [
|
200
217
|
"read",
|
@@ -217,7 +234,9 @@ def read(
|
|
217
234
|
return _run_command(source, args, expecting_exception)
|
218
235
|
|
219
236
|
|
220
|
-
def make_file(
|
237
|
+
def make_file(
|
238
|
+
path: Path, file_contents: Optional[Union[str, Mapping[str, Any], List[Mapping[str, Any]]]]
|
239
|
+
) -> str:
|
221
240
|
if isinstance(file_contents, str):
|
222
241
|
path.write_text(file_contents)
|
223
242
|
else:
|
@@ -42,7 +42,12 @@ class HttpMocker(contextlib.ContextDecorator):
|
|
42
42
|
self._mocker.__enter__()
|
43
43
|
return self
|
44
44
|
|
45
|
-
def __exit__(
|
45
|
+
def __exit__(
|
46
|
+
self,
|
47
|
+
exc_type: Optional[BaseException],
|
48
|
+
exc_val: Optional[BaseException],
|
49
|
+
exc_tb: Optional[TracebackType],
|
50
|
+
) -> None:
|
46
51
|
self._mocker.__exit__(exc_type, exc_val, exc_tb)
|
47
52
|
|
48
53
|
def _validate_all_matchers_called(self) -> None:
|
@@ -51,7 +56,10 @@ class HttpMocker(contextlib.ContextDecorator):
|
|
51
56
|
raise ValueError(f"Invalid number of matches for `{matcher}`")
|
52
57
|
|
53
58
|
def _mock_request_method(
|
54
|
-
self,
|
59
|
+
self,
|
60
|
+
method: SupportedHttpMethods,
|
61
|
+
request: HttpRequest,
|
62
|
+
responses: Union[HttpResponse, List[HttpResponse]],
|
55
63
|
) -> None:
|
56
64
|
if isinstance(responses, HttpResponse):
|
57
65
|
responses = [responses]
|
@@ -65,37 +73,57 @@ class HttpMocker(contextlib.ContextDecorator):
|
|
65
73
|
requests_mock.ANY,
|
66
74
|
additional_matcher=self._matches_wrapper(matcher),
|
67
75
|
response_list=[
|
68
|
-
{
|
76
|
+
{
|
77
|
+
"text": response.body,
|
78
|
+
"status_code": response.status_code,
|
79
|
+
"headers": response.headers,
|
80
|
+
}
|
81
|
+
for response in responses
|
69
82
|
],
|
70
83
|
)
|
71
84
|
|
72
85
|
def get(self, request: HttpRequest, responses: Union[HttpResponse, List[HttpResponse]]) -> None:
|
73
86
|
self._mock_request_method(SupportedHttpMethods.GET, request, responses)
|
74
87
|
|
75
|
-
def patch(
|
88
|
+
def patch(
|
89
|
+
self, request: HttpRequest, responses: Union[HttpResponse, List[HttpResponse]]
|
90
|
+
) -> None:
|
76
91
|
self._mock_request_method(SupportedHttpMethods.PATCH, request, responses)
|
77
92
|
|
78
|
-
def post(
|
93
|
+
def post(
|
94
|
+
self, request: HttpRequest, responses: Union[HttpResponse, List[HttpResponse]]
|
95
|
+
) -> None:
|
79
96
|
self._mock_request_method(SupportedHttpMethods.POST, request, responses)
|
80
97
|
|
81
|
-
def delete(
|
98
|
+
def delete(
|
99
|
+
self, request: HttpRequest, responses: Union[HttpResponse, List[HttpResponse]]
|
100
|
+
) -> None:
|
82
101
|
self._mock_request_method(SupportedHttpMethods.DELETE, request, responses)
|
83
102
|
|
84
103
|
@staticmethod
|
85
|
-
def _matches_wrapper(
|
104
|
+
def _matches_wrapper(
|
105
|
+
matcher: HttpRequestMatcher,
|
106
|
+
) -> Callable[[requests_mock.request._RequestObjectProxy], bool]:
|
86
107
|
def matches(requests_mock_request: requests_mock.request._RequestObjectProxy) -> bool:
|
87
108
|
# query_params are provided as part of `requests_mock_request.url`
|
88
109
|
http_request = HttpRequest(
|
89
|
-
requests_mock_request.url,
|
110
|
+
requests_mock_request.url,
|
111
|
+
query_params={},
|
112
|
+
headers=requests_mock_request.headers,
|
113
|
+
body=requests_mock_request.body,
|
90
114
|
)
|
91
115
|
return matcher.matches(http_request)
|
92
116
|
|
93
117
|
return matches
|
94
118
|
|
95
119
|
def assert_number_of_calls(self, request: HttpRequest, number_of_calls: int) -> None:
|
96
|
-
corresponding_matchers = list(
|
120
|
+
corresponding_matchers = list(
|
121
|
+
filter(lambda matcher: matcher.request == request, self._matchers)
|
122
|
+
)
|
97
123
|
if len(corresponding_matchers) != 1:
|
98
|
-
raise ValueError(
|
124
|
+
raise ValueError(
|
125
|
+
f"Was expecting only one matcher to match the request but got `{corresponding_matchers}`"
|
126
|
+
)
|
99
127
|
|
100
128
|
assert corresponding_matchers[0].actual_number_of_matches == number_of_calls
|
101
129
|
|
@@ -110,7 +138,9 @@ class HttpMocker(contextlib.ContextDecorator):
|
|
110
138
|
try:
|
111
139
|
result = f(*args, **kwargs)
|
112
140
|
except requests_mock.NoMockAddress as no_mock_exception:
|
113
|
-
matchers_as_string = "\n\t".join(
|
141
|
+
matchers_as_string = "\n\t".join(
|
142
|
+
map(lambda matcher: str(matcher.request), self._matchers)
|
143
|
+
)
|
114
144
|
raise ValueError(
|
115
145
|
f"No matcher matches {no_mock_exception.args[0]} with headers `{no_mock_exception.request.headers}` "
|
116
146
|
f"and body `{no_mock_exception.request.body}`. "
|
@@ -24,7 +24,9 @@ class HttpRequest:
|
|
24
24
|
if not self._parsed_url.query and query_params:
|
25
25
|
self._parsed_url = urlparse(f"{url}?{self._encode_qs(query_params)}")
|
26
26
|
elif self._parsed_url.query and query_params:
|
27
|
-
raise ValueError(
|
27
|
+
raise ValueError(
|
28
|
+
"If query params are provided as part of the url, `query_params` should be empty"
|
29
|
+
)
|
28
30
|
|
29
31
|
self._headers = headers or {}
|
30
32
|
self._body = body
|
@@ -62,7 +64,9 @@ class HttpRequest:
|
|
62
64
|
return False
|
63
65
|
|
64
66
|
@staticmethod
|
65
|
-
def _to_mapping(
|
67
|
+
def _to_mapping(
|
68
|
+
body: Optional[Union[str, bytes, Mapping[str, Any]]],
|
69
|
+
) -> Optional[Mapping[str, Any]]:
|
66
70
|
if isinstance(body, Mapping):
|
67
71
|
return body
|
68
72
|
elif isinstance(body, bytes):
|
@@ -84,7 +88,9 @@ class HttpRequest:
|
|
84
88
|
return f"{self._parsed_url} with headers {self._headers} and body {self._body!r})"
|
85
89
|
|
86
90
|
def __repr__(self) -> str:
|
87
|
-
return
|
91
|
+
return (
|
92
|
+
f"HttpRequest(request={self._parsed_url}, headers={self._headers}, body={self._body!r})"
|
93
|
+
)
|
88
94
|
|
89
95
|
def __eq__(self, other: Any) -> bool:
|
90
96
|
if isinstance(other, HttpRequest):
|
@@ -5,7 +5,9 @@ from typing import Mapping
|
|
5
5
|
|
6
6
|
|
7
7
|
class HttpResponse:
|
8
|
-
def __init__(
|
8
|
+
def __init__(
|
9
|
+
self, body: str, status_code: int = 200, headers: Mapping[str, str] = MappingProxyType({})
|
10
|
+
):
|
9
11
|
self._body = body
|
10
12
|
self._status_code = status_code
|
11
13
|
self._headers = headers
|
@@ -91,7 +91,12 @@ class FieldUpdatePaginationStrategy(PaginationStrategy):
|
|
91
91
|
|
92
92
|
|
93
93
|
class RecordBuilder:
|
94
|
-
def __init__(
|
94
|
+
def __init__(
|
95
|
+
self,
|
96
|
+
template: Dict[str, Any],
|
97
|
+
id_path: Optional[Path],
|
98
|
+
cursor_path: Optional[Union[FieldPath, NestedPath]],
|
99
|
+
):
|
95
100
|
self._record = template
|
96
101
|
self._id_path = id_path
|
97
102
|
self._cursor_path = cursor_path
|
@@ -109,9 +114,13 @@ class RecordBuilder:
|
|
109
114
|
def _validate_field(self, field_name: str, path: Optional[Path]) -> None:
|
110
115
|
try:
|
111
116
|
if path and not path.extract(self._record):
|
112
|
-
raise ValueError(
|
117
|
+
raise ValueError(
|
118
|
+
f"{field_name} `{path}` was provided but it is not part of the template `{self._record}`"
|
119
|
+
)
|
113
120
|
except (IndexError, KeyError) as exception:
|
114
|
-
raise ValueError(
|
121
|
+
raise ValueError(
|
122
|
+
f"{field_name} `{path}` was provided but it is not part of the template `{self._record}`"
|
123
|
+
) from exception
|
115
124
|
|
116
125
|
def with_id(self, identifier: Any) -> "RecordBuilder":
|
117
126
|
self._set_field("id", self._id_path, identifier)
|
@@ -139,7 +148,10 @@ class RecordBuilder:
|
|
139
148
|
|
140
149
|
class HttpResponseBuilder:
|
141
150
|
def __init__(
|
142
|
-
self,
|
151
|
+
self,
|
152
|
+
template: Dict[str, Any],
|
153
|
+
records_path: Union[FieldPath, NestedPath],
|
154
|
+
pagination_strategy: Optional[PaginationStrategy],
|
143
155
|
):
|
144
156
|
self._response = template
|
145
157
|
self._records: List[RecordBuilder] = []
|
@@ -175,7 +187,13 @@ def _get_unit_test_folder(execution_folder: str) -> FilePath:
|
|
175
187
|
|
176
188
|
|
177
189
|
def find_template(resource: str, execution_folder: str) -> Dict[str, Any]:
|
178
|
-
response_template_filepath = str(
|
190
|
+
response_template_filepath = str(
|
191
|
+
get_unit_test_folder(execution_folder)
|
192
|
+
/ "resource"
|
193
|
+
/ "http"
|
194
|
+
/ "response"
|
195
|
+
/ f"{resource}.json"
|
196
|
+
)
|
179
197
|
with open(response_template_filepath, "r") as template_file:
|
180
198
|
return json.load(template_file) # type: ignore # we assume the dev correctly set up the resource file
|
181
199
|
|
@@ -198,10 +216,14 @@ def create_record_builder(
|
|
198
216
|
)
|
199
217
|
return RecordBuilder(record_template, record_id_path, record_cursor_path)
|
200
218
|
except (IndexError, KeyError):
|
201
|
-
raise ValueError(
|
219
|
+
raise ValueError(
|
220
|
+
f"Error while extracting records at path `{records_path}` from response template `{response_template}`"
|
221
|
+
)
|
202
222
|
|
203
223
|
|
204
224
|
def create_response_builder(
|
205
|
-
response_template: Dict[str, Any],
|
225
|
+
response_template: Dict[str, Any],
|
226
|
+
records_path: Union[FieldPath, NestedPath],
|
227
|
+
pagination_strategy: Optional[PaginationStrategy] = None,
|
206
228
|
) -> HttpResponseBuilder:
|
207
229
|
return HttpResponseBuilder(response_template, records_path, pagination_strategy)
|
@@ -2,7 +2,13 @@
|
|
2
2
|
|
3
3
|
from typing import Any, List
|
4
4
|
|
5
|
-
from airbyte_cdk.models import
|
5
|
+
from airbyte_cdk.models import (
|
6
|
+
AirbyteStateBlob,
|
7
|
+
AirbyteStateMessage,
|
8
|
+
AirbyteStateType,
|
9
|
+
AirbyteStreamState,
|
10
|
+
StreamDescriptor,
|
11
|
+
)
|
6
12
|
|
7
13
|
|
8
14
|
class StateBuilder:
|
@@ -14,7 +20,9 @@ class StateBuilder:
|
|
14
20
|
AirbyteStateMessage(
|
15
21
|
type=AirbyteStateType.STREAM,
|
16
22
|
stream=AirbyteStreamState(
|
17
|
-
stream_state=state
|
23
|
+
stream_state=state
|
24
|
+
if isinstance(state, AirbyteStateBlob)
|
25
|
+
else AirbyteStateBlob(state),
|
18
26
|
stream_descriptor=StreamDescriptor(**{"name": stream_name}),
|
19
27
|
),
|
20
28
|
)
|
airbyte_cdk/test/utils/data.py
CHANGED
@@ -7,14 +7,18 @@ def get_unit_test_folder(execution_folder: str) -> FilePath:
|
|
7
7
|
path = FilePath(execution_folder)
|
8
8
|
while path.name != "unit_tests":
|
9
9
|
if path.name == path.root or path.name == path.drive:
|
10
|
-
raise ValueError(
|
10
|
+
raise ValueError(
|
11
|
+
f"Could not find `unit_tests` folder as a parent of {execution_folder}"
|
12
|
+
)
|
11
13
|
path = path.parent
|
12
14
|
return path
|
13
15
|
|
14
16
|
|
15
17
|
def read_resource_file_contents(resource: str, test_location: str) -> str:
|
16
18
|
"""Read the contents of a test data file from the test resource folder."""
|
17
|
-
file_path = str(
|
19
|
+
file_path = str(
|
20
|
+
get_unit_test_folder(test_location) / "resource" / "http" / "response" / f"{resource}"
|
21
|
+
)
|
18
22
|
with open(file_path) as f:
|
19
23
|
response = f.read()
|
20
24
|
return response
|
@@ -6,7 +6,9 @@ from typing import Any, Mapping
|
|
6
6
|
from requests_mock import Mocker
|
7
7
|
|
8
8
|
|
9
|
-
def register_mock_responses(
|
9
|
+
def register_mock_responses(
|
10
|
+
mocker: Mocker, http_calls: list[Mapping[str, Mapping[str, Any]]]
|
11
|
+
) -> None:
|
10
12
|
"""Register a list of HTTP request-response pairs."""
|
11
13
|
for call in http_calls:
|
12
14
|
request, response = call["request"], call["response"]
|
@@ -36,7 +36,9 @@ def get_secret_paths(spec: Mapping[str, Any]) -> List[List[str]]:
|
|
36
36
|
return paths
|
37
37
|
|
38
38
|
|
39
|
-
def get_secrets(
|
39
|
+
def get_secrets(
|
40
|
+
connection_specification: Mapping[str, Any], config: Mapping[str, Any]
|
41
|
+
) -> List[Any]:
|
40
42
|
"""
|
41
43
|
Get a list of secret values from the source config based on the source specification
|
42
44
|
:type connection_specification: the connection_specification field of an AirbyteSpecification i.e the JSONSchema definition
|
@@ -3,7 +3,13 @@
|
|
3
3
|
import time
|
4
4
|
from typing import Any, Optional
|
5
5
|
|
6
|
-
from airbyte_cdk.models import
|
6
|
+
from airbyte_cdk.models import (
|
7
|
+
AirbyteAnalyticsTraceMessage,
|
8
|
+
AirbyteMessage,
|
9
|
+
AirbyteTraceMessage,
|
10
|
+
TraceType,
|
11
|
+
Type,
|
12
|
+
)
|
7
13
|
|
8
14
|
|
9
15
|
def create_analytics_message(type: str, value: Optional[Any]) -> AirbyteMessage:
|
@@ -12,6 +18,8 @@ def create_analytics_message(type: str, value: Optional[Any]) -> AirbyteMessage:
|
|
12
18
|
trace=AirbyteTraceMessage(
|
13
19
|
type=TraceType.ANALYTICS,
|
14
20
|
emitted_at=time.time() * 1000,
|
15
|
-
analytics=AirbyteAnalyticsTraceMessage(
|
21
|
+
analytics=AirbyteAnalyticsTraceMessage(
|
22
|
+
type=type, value=str(value) if value is not None else None
|
23
|
+
),
|
16
24
|
),
|
17
25
|
)
|
@@ -29,7 +29,10 @@ class DatetimeFormatInferrer:
|
|
29
29
|
"%Y-%m",
|
30
30
|
"%d-%m-%Y",
|
31
31
|
]
|
32
|
-
self._timestamp_heuristic_ranges = [
|
32
|
+
self._timestamp_heuristic_ranges = [
|
33
|
+
range(1_000_000_000, 2_000_000_000),
|
34
|
+
range(1_000_000_000_000, 2_000_000_000_000),
|
35
|
+
]
|
33
36
|
|
34
37
|
def _can_be_datetime(self, value: Any) -> bool:
|
35
38
|
"""Checks if the value can be a datetime.
|
@@ -6,7 +6,9 @@
|
|
6
6
|
from typing import Any, List, Mapping, Optional, Set, Union
|
7
7
|
|
8
8
|
|
9
|
-
def combine_mappings(
|
9
|
+
def combine_mappings(
|
10
|
+
mappings: List[Optional[Union[Mapping[str, Any], str]]],
|
11
|
+
) -> Union[Mapping[str, Any], str]:
|
10
12
|
"""
|
11
13
|
Combine multiple mappings into a single mapping. If any of the mappings are a string, return
|
12
14
|
that string. Raise errors in the following cases:
|
@@ -7,12 +7,19 @@ from airbyte_cdk.sources.connector_state_manager import HashableStreamDescriptor
|
|
7
7
|
def get_stream_descriptor(message: AirbyteMessage) -> HashableStreamDescriptor:
|
8
8
|
match message.type:
|
9
9
|
case Type.RECORD:
|
10
|
-
return HashableStreamDescriptor(
|
10
|
+
return HashableStreamDescriptor(
|
11
|
+
name=message.record.stream, namespace=message.record.namespace
|
12
|
+
) # type: ignore[union-attr] # record has `stream` and `namespace`
|
11
13
|
case Type.STATE:
|
12
14
|
if not message.state.stream or not message.state.stream.stream_descriptor: # type: ignore[union-attr] # state has `stream`
|
13
|
-
raise ValueError(
|
15
|
+
raise ValueError(
|
16
|
+
"State message was not in per-stream state format, which is required for record counts."
|
17
|
+
)
|
14
18
|
return HashableStreamDescriptor(
|
15
|
-
name=message.state.stream.stream_descriptor.name,
|
19
|
+
name=message.state.stream.stream_descriptor.name,
|
20
|
+
namespace=message.state.stream.stream_descriptor.namespace, # type: ignore[union-attr] # state has `stream`
|
16
21
|
)
|
17
22
|
case _:
|
18
|
-
raise NotImplementedError(
|
23
|
+
raise NotImplementedError(
|
24
|
+
f"get_stream_descriptor is not implemented for message type '{message.type}'."
|
25
|
+
)
|
@@ -65,6 +65,11 @@ class PrintBuffer:
|
|
65
65
|
sys.stderr = self
|
66
66
|
return self
|
67
67
|
|
68
|
-
def __exit__(
|
68
|
+
def __exit__(
|
69
|
+
self,
|
70
|
+
exc_type: Optional[BaseException],
|
71
|
+
exc_val: Optional[BaseException],
|
72
|
+
exc_tb: Optional[TracebackType],
|
73
|
+
) -> None:
|
69
74
|
self.flush()
|
70
75
|
sys.stdout, sys.stderr = self.old_stdout, self.old_stderr
|
@@ -55,9 +55,14 @@ InferredSchema = Dict[str, Any]
|
|
55
55
|
|
56
56
|
class SchemaValidationException(Exception):
|
57
57
|
@classmethod
|
58
|
-
def merge_exceptions(
|
58
|
+
def merge_exceptions(
|
59
|
+
cls, exceptions: List["SchemaValidationException"]
|
60
|
+
) -> "SchemaValidationException":
|
59
61
|
# We assume the schema is the same for all SchemaValidationException
|
60
|
-
return SchemaValidationException(
|
62
|
+
return SchemaValidationException(
|
63
|
+
exceptions[0].schema,
|
64
|
+
[x for exception in exceptions for x in exception._validation_errors],
|
65
|
+
)
|
61
66
|
|
62
67
|
def __init__(self, schema: InferredSchema, validation_errors: List[Exception]):
|
63
68
|
self._schema = schema
|
@@ -84,7 +89,9 @@ class SchemaInferrer:
|
|
84
89
|
|
85
90
|
stream_to_builder: Dict[str, SchemaBuilder]
|
86
91
|
|
87
|
-
def __init__(
|
92
|
+
def __init__(
|
93
|
+
self, pk: Optional[List[List[str]]] = None, cursor_field: Optional[List[List[str]]] = None
|
94
|
+
) -> None:
|
88
95
|
self.stream_to_builder = defaultdict(NoRequiredSchemaBuilder)
|
89
96
|
self._pk = [] if pk is None else pk
|
90
97
|
self._cursor_field = [] if cursor_field is None else cursor_field
|
@@ -105,7 +112,9 @@ class SchemaInferrer:
|
|
105
112
|
|
106
113
|
def _clean_any_of(self, node: InferredSchema) -> None:
|
107
114
|
if len(node[_ANY_OF]) == 2 and self._null_type_in_any_of(node):
|
108
|
-
real_type =
|
115
|
+
real_type = (
|
116
|
+
node[_ANY_OF][1] if node[_ANY_OF][0][_TYPE] == _NULL_TYPE else node[_ANY_OF][0]
|
117
|
+
)
|
109
118
|
node.update(real_type)
|
110
119
|
node[_TYPE] = [node[_TYPE], _NULL_TYPE]
|
111
120
|
node.pop(_ANY_OF)
|
@@ -189,7 +198,9 @@ class SchemaInferrer:
|
|
189
198
|
if errors:
|
190
199
|
raise SchemaValidationException(node, errors)
|
191
200
|
|
192
|
-
def _add_field_as_required(
|
201
|
+
def _add_field_as_required(
|
202
|
+
self, node: InferredSchema, path: List[str], traveled_path: Optional[List[str]] = None
|
203
|
+
) -> None:
|
193
204
|
"""
|
194
205
|
Take a nested key and travel the schema to mark every node as required.
|
195
206
|
"""
|
@@ -208,7 +219,9 @@ class SchemaInferrer:
|
|
208
219
|
|
209
220
|
next_node = path[0]
|
210
221
|
if next_node not in node[_PROPERTIES]:
|
211
|
-
raise ValueError(
|
222
|
+
raise ValueError(
|
223
|
+
f"Path {traveled_path} does not have field `{next_node}` in the schema and hence can't be marked as required."
|
224
|
+
)
|
212
225
|
|
213
226
|
if _TYPE not in node:
|
214
227
|
# We do not expect this case to happen but we added a specific error message just in case
|
@@ -216,8 +229,14 @@ class SchemaInferrer:
|
|
216
229
|
f"Unknown schema error: {traveled_path} is expected to have a type but did not. Schema inferrence is probably broken"
|
217
230
|
)
|
218
231
|
|
219
|
-
if node[_TYPE] not in [
|
220
|
-
|
232
|
+
if node[_TYPE] not in [
|
233
|
+
_OBJECT_TYPE,
|
234
|
+
[_NULL_TYPE, _OBJECT_TYPE],
|
235
|
+
[_OBJECT_TYPE, _NULL_TYPE],
|
236
|
+
]:
|
237
|
+
raise ValueError(
|
238
|
+
f"Path {traveled_path} is expected to be an object but was of type `{node['properties'][next_node]['type']}`"
|
239
|
+
)
|
221
240
|
|
222
241
|
if _REQUIRED not in node or not node[_REQUIRED]:
|
223
242
|
node[_REQUIRED] = [next_node]
|
@@ -242,7 +261,9 @@ class SchemaInferrer:
|
|
242
261
|
Returns the inferred JSON schema for the specified stream. Might be `None` if there were no records for the given stream name.
|
243
262
|
"""
|
244
263
|
return (
|
245
|
-
self._add_required_properties(
|
264
|
+
self._add_required_properties(
|
265
|
+
self._clean(self.stream_to_builder[stream_name].to_schema())
|
266
|
+
)
|
246
267
|
if stream_name in self.stream_to_builder
|
247
268
|
else None
|
248
269
|
)
|
@@ -17,7 +17,9 @@ def resolve_refs(schema: dict) -> dict:
|
|
17
17
|
str_schema = json.dumps(schema)
|
18
18
|
for ref_block in re.findall(r'{"\$ref": "#\/definitions\/.+?(?="})"}', str_schema):
|
19
19
|
ref = json.loads(ref_block)["$ref"]
|
20
|
-
str_schema = str_schema.replace(
|
20
|
+
str_schema = str_schema.replace(
|
21
|
+
ref_block, json.dumps(json_schema_ref_resolver.resolve(ref)[1])
|
22
|
+
)
|
21
23
|
pyschema: dict = json.loads(str_schema)
|
22
24
|
del pyschema["definitions"]
|
23
25
|
return pyschema
|