airbyte-cdk 6.34.1.dev0__py3-none-any.whl → 6.35.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- airbyte_cdk/connector_builder/connector_builder_handler.py +16 -12
- airbyte_cdk/connector_builder/test_reader/__init__.py +7 -0
- airbyte_cdk/connector_builder/test_reader/helpers.py +591 -0
- airbyte_cdk/connector_builder/test_reader/message_grouper.py +160 -0
- airbyte_cdk/connector_builder/test_reader/reader.py +441 -0
- airbyte_cdk/connector_builder/test_reader/types.py +75 -0
- airbyte_cdk/sources/declarative/async_job/job_orchestrator.py +7 -7
- airbyte_cdk/sources/declarative/auth/jwt.py +17 -11
- airbyte_cdk/sources/declarative/auth/oauth.py +6 -1
- airbyte_cdk/sources/declarative/auth/token.py +3 -8
- airbyte_cdk/sources/declarative/concurrent_declarative_source.py +30 -79
- airbyte_cdk/sources/declarative/declarative_component_schema.yaml +213 -100
- airbyte_cdk/sources/declarative/declarative_stream.py +3 -1
- airbyte_cdk/sources/declarative/decoders/__init__.py +0 -4
- airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +18 -3
- airbyte_cdk/sources/declarative/decoders/json_decoder.py +12 -58
- airbyte_cdk/sources/declarative/extractors/record_selector.py +12 -3
- airbyte_cdk/sources/declarative/incremental/concurrent_partition_cursor.py +56 -25
- airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +12 -6
- airbyte_cdk/sources/declarative/incremental/global_substream_cursor.py +6 -2
- airbyte_cdk/sources/declarative/interpolation/__init__.py +1 -1
- airbyte_cdk/sources/declarative/interpolation/filters.py +2 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_boolean.py +1 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_mapping.py +1 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_nested_mapping.py +1 -1
- airbyte_cdk/sources/declarative/interpolation/interpolated_string.py +1 -1
- airbyte_cdk/sources/declarative/interpolation/interpolation.py +2 -1
- airbyte_cdk/sources/declarative/interpolation/jinja.py +14 -1
- airbyte_cdk/sources/declarative/interpolation/macros.py +19 -4
- airbyte_cdk/sources/declarative/manifest_declarative_source.py +9 -0
- airbyte_cdk/sources/declarative/models/declarative_component_schema.py +150 -41
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +234 -84
- airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py +5 -5
- airbyte_cdk/sources/declarative/partition_routers/list_partition_router.py +4 -2
- airbyte_cdk/sources/declarative/partition_routers/substream_partition_router.py +26 -18
- airbyte_cdk/sources/declarative/requesters/http_requester.py +8 -2
- airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +16 -5
- airbyte_cdk/sources/declarative/requesters/request_option.py +83 -4
- airbyte_cdk/sources/declarative/requesters/request_options/datetime_based_request_options_provider.py +7 -6
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_nested_request_input_provider.py +1 -4
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_input_provider.py +0 -3
- airbyte_cdk/sources/declarative/requesters/request_options/interpolated_request_options_provider.py +2 -47
- airbyte_cdk/sources/declarative/retrievers/async_retriever.py +6 -12
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +4 -3
- airbyte_cdk/sources/declarative/transformations/add_fields.py +4 -4
- airbyte_cdk/sources/file_based/config/abstract_file_based_spec.py +2 -1
- airbyte_cdk/sources/file_based/config/validate_config_transfer_modes.py +81 -0
- airbyte_cdk/sources/file_based/file_based_source.py +70 -37
- airbyte_cdk/sources/file_based/file_based_stream_reader.py +107 -12
- airbyte_cdk/sources/file_based/stream/__init__.py +10 -1
- airbyte_cdk/sources/file_based/stream/identities_stream.py +47 -0
- airbyte_cdk/sources/file_based/stream/permissions_file_based_stream.py +85 -0
- airbyte_cdk/sources/specs/transfer_modes.py +26 -0
- airbyte_cdk/sources/streams/call_rate.py +185 -47
- airbyte_cdk/sources/streams/http/http.py +1 -2
- airbyte_cdk/sources/streams/http/requests_native_auth/abstract_oauth.py +217 -56
- airbyte_cdk/sources/streams/http/requests_native_auth/oauth.py +144 -73
- airbyte_cdk/sources/streams/permissions/identities_stream.py +75 -0
- airbyte_cdk/test/mock_http/mocker.py +9 -1
- airbyte_cdk/test/mock_http/response.py +6 -3
- airbyte_cdk/utils/datetime_helpers.py +48 -66
- airbyte_cdk/utils/mapping_helpers.py +126 -26
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.35.0.dist-info}/METADATA +1 -1
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.35.0.dist-info}/RECORD +68 -59
- airbyte_cdk/connector_builder/message_grouper.py +0 -448
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.35.0.dist-info}/LICENSE.txt +0 -0
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.35.0.dist-info}/LICENSE_SHORT +0 -0
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.35.0.dist-info}/WHEEL +0 -0
- {airbyte_cdk-6.34.1.dev0.dist-info → airbyte_cdk-6.35.0.dist-info}/entry_points.txt +0 -0
@@ -1,448 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3
|
-
#
|
4
|
-
|
5
|
-
import json
|
6
|
-
import logging
|
7
|
-
from copy import deepcopy
|
8
|
-
from json import JSONDecodeError
|
9
|
-
from typing import Any, Dict, Iterable, Iterator, List, Mapping, Optional, Union
|
10
|
-
|
11
|
-
from airbyte_cdk.connector_builder.models import (
|
12
|
-
AuxiliaryRequest,
|
13
|
-
HttpRequest,
|
14
|
-
HttpResponse,
|
15
|
-
LogMessage,
|
16
|
-
StreamRead,
|
17
|
-
StreamReadPages,
|
18
|
-
StreamReadSlices,
|
19
|
-
)
|
20
|
-
from airbyte_cdk.entrypoint import AirbyteEntrypoint
|
21
|
-
from airbyte_cdk.models import (
|
22
|
-
AirbyteControlMessage,
|
23
|
-
AirbyteLogMessage,
|
24
|
-
AirbyteMessage,
|
25
|
-
AirbyteStateMessage,
|
26
|
-
AirbyteTraceMessage,
|
27
|
-
ConfiguredAirbyteCatalog,
|
28
|
-
OrchestratorType,
|
29
|
-
TraceType,
|
30
|
-
)
|
31
|
-
from airbyte_cdk.models import Type as MessageType
|
32
|
-
from airbyte_cdk.sources.declarative.declarative_source import DeclarativeSource
|
33
|
-
from airbyte_cdk.sources.utils.slice_logger import SliceLogger
|
34
|
-
from airbyte_cdk.sources.utils.types import JsonType
|
35
|
-
from airbyte_cdk.utils import AirbyteTracedException
|
36
|
-
from airbyte_cdk.utils.datetime_format_inferrer import DatetimeFormatInferrer
|
37
|
-
from airbyte_cdk.utils.schema_inferrer import SchemaInferrer, SchemaValidationException
|
38
|
-
|
39
|
-
|
40
|
-
class MessageGrouper:
|
41
|
-
logger = logging.getLogger("airbyte.connector-builder")
|
42
|
-
|
43
|
-
def __init__(self, max_pages_per_slice: int, max_slices: int, max_record_limit: int = 1000):
|
44
|
-
self._max_pages_per_slice = max_pages_per_slice
|
45
|
-
self._max_slices = max_slices
|
46
|
-
self._max_record_limit = max_record_limit
|
47
|
-
|
48
|
-
def _pk_to_nested_and_composite_field(
|
49
|
-
self, field: Optional[Union[str, List[str], List[List[str]]]]
|
50
|
-
) -> List[List[str]]:
|
51
|
-
if not field:
|
52
|
-
return [[]]
|
53
|
-
|
54
|
-
if isinstance(field, str):
|
55
|
-
return [[field]]
|
56
|
-
|
57
|
-
is_composite_key = isinstance(field[0], str)
|
58
|
-
if is_composite_key:
|
59
|
-
return [[i] for i in field] # type: ignore # the type of field is expected to be List[str] here
|
60
|
-
|
61
|
-
return field # type: ignore # the type of field is expected to be List[List[str]] here
|
62
|
-
|
63
|
-
def _cursor_field_to_nested_and_composite_field(
|
64
|
-
self, field: Union[str, List[str]]
|
65
|
-
) -> List[List[str]]:
|
66
|
-
if not field:
|
67
|
-
return [[]]
|
68
|
-
|
69
|
-
if isinstance(field, str):
|
70
|
-
return [[field]]
|
71
|
-
|
72
|
-
is_nested_key = isinstance(field[0], str)
|
73
|
-
if is_nested_key:
|
74
|
-
return [field]
|
75
|
-
|
76
|
-
raise ValueError(f"Unknown type for cursor field `{field}")
|
77
|
-
|
78
|
-
def get_message_groups(
|
79
|
-
self,
|
80
|
-
source: DeclarativeSource,
|
81
|
-
config: Mapping[str, Any],
|
82
|
-
configured_catalog: ConfiguredAirbyteCatalog,
|
83
|
-
state: List[AirbyteStateMessage],
|
84
|
-
record_limit: Optional[int] = None,
|
85
|
-
) -> StreamRead:
|
86
|
-
if record_limit is not None and not (1 <= record_limit <= self._max_record_limit):
|
87
|
-
raise ValueError(
|
88
|
-
f"Record limit must be between 1 and {self._max_record_limit}. Got {record_limit}"
|
89
|
-
)
|
90
|
-
stream = source.streams(config)[
|
91
|
-
0
|
92
|
-
] # The connector builder currently only supports reading from a single stream at a time
|
93
|
-
schema_inferrer = SchemaInferrer(
|
94
|
-
self._pk_to_nested_and_composite_field(stream.primary_key),
|
95
|
-
self._cursor_field_to_nested_and_composite_field(stream.cursor_field),
|
96
|
-
)
|
97
|
-
datetime_format_inferrer = DatetimeFormatInferrer()
|
98
|
-
|
99
|
-
if record_limit is None:
|
100
|
-
record_limit = self._max_record_limit
|
101
|
-
else:
|
102
|
-
record_limit = min(record_limit, self._max_record_limit)
|
103
|
-
|
104
|
-
slices = []
|
105
|
-
log_messages = []
|
106
|
-
latest_config_update: AirbyteControlMessage = None
|
107
|
-
auxiliary_requests = []
|
108
|
-
for message_group in self._get_message_groups(
|
109
|
-
self._read_stream(source, config, configured_catalog, state),
|
110
|
-
schema_inferrer,
|
111
|
-
datetime_format_inferrer,
|
112
|
-
record_limit,
|
113
|
-
):
|
114
|
-
if isinstance(message_group, AirbyteLogMessage):
|
115
|
-
log_messages.append(
|
116
|
-
LogMessage(
|
117
|
-
**{"message": message_group.message, "level": message_group.level.value}
|
118
|
-
)
|
119
|
-
)
|
120
|
-
elif isinstance(message_group, AirbyteTraceMessage):
|
121
|
-
if message_group.type == TraceType.ERROR:
|
122
|
-
log_messages.append(
|
123
|
-
LogMessage(
|
124
|
-
**{
|
125
|
-
"message": message_group.error.message,
|
126
|
-
"level": "ERROR",
|
127
|
-
"internal_message": message_group.error.internal_message,
|
128
|
-
"stacktrace": message_group.error.stack_trace,
|
129
|
-
}
|
130
|
-
)
|
131
|
-
)
|
132
|
-
elif isinstance(message_group, AirbyteControlMessage):
|
133
|
-
if (
|
134
|
-
not latest_config_update
|
135
|
-
or latest_config_update.emitted_at <= message_group.emitted_at
|
136
|
-
):
|
137
|
-
latest_config_update = message_group
|
138
|
-
elif isinstance(message_group, AuxiliaryRequest):
|
139
|
-
auxiliary_requests.append(message_group)
|
140
|
-
elif isinstance(message_group, StreamReadSlices):
|
141
|
-
slices.append(message_group)
|
142
|
-
else:
|
143
|
-
raise ValueError(f"Unknown message group type: {type(message_group)}")
|
144
|
-
|
145
|
-
try:
|
146
|
-
# The connector builder currently only supports reading from a single stream at a time
|
147
|
-
configured_stream = configured_catalog.streams[0]
|
148
|
-
schema = schema_inferrer.get_stream_schema(configured_stream.stream.name)
|
149
|
-
except SchemaValidationException as exception:
|
150
|
-
for validation_error in exception.validation_errors:
|
151
|
-
log_messages.append(LogMessage(validation_error, "ERROR"))
|
152
|
-
schema = exception.schema
|
153
|
-
|
154
|
-
return StreamRead(
|
155
|
-
logs=log_messages,
|
156
|
-
slices=slices,
|
157
|
-
test_read_limit_reached=self._has_reached_limit(slices),
|
158
|
-
auxiliary_requests=auxiliary_requests,
|
159
|
-
inferred_schema=schema,
|
160
|
-
latest_config_update=self._clean_config(latest_config_update.connectorConfig.config)
|
161
|
-
if latest_config_update
|
162
|
-
else None,
|
163
|
-
inferred_datetime_formats=datetime_format_inferrer.get_inferred_datetime_formats(),
|
164
|
-
)
|
165
|
-
|
166
|
-
def _get_message_groups(
|
167
|
-
self,
|
168
|
-
messages: Iterator[AirbyteMessage],
|
169
|
-
schema_inferrer: SchemaInferrer,
|
170
|
-
datetime_format_inferrer: DatetimeFormatInferrer,
|
171
|
-
limit: int,
|
172
|
-
) -> Iterable[
|
173
|
-
Union[
|
174
|
-
StreamReadPages,
|
175
|
-
AirbyteControlMessage,
|
176
|
-
AirbyteLogMessage,
|
177
|
-
AirbyteTraceMessage,
|
178
|
-
AuxiliaryRequest,
|
179
|
-
]
|
180
|
-
]:
|
181
|
-
"""
|
182
|
-
Message groups are partitioned according to when request log messages are received. Subsequent response log messages
|
183
|
-
and record messages belong to the prior request log message and when we encounter another request, append the latest
|
184
|
-
message group, until <limit> records have been read.
|
185
|
-
|
186
|
-
Messages received from the CDK read operation will always arrive in the following order:
|
187
|
-
{type: LOG, log: {message: "request: ..."}}
|
188
|
-
{type: LOG, log: {message: "response: ..."}}
|
189
|
-
... 0 or more record messages
|
190
|
-
{type: RECORD, record: {data: ...}}
|
191
|
-
{type: RECORD, record: {data: ...}}
|
192
|
-
Repeats for each request/response made
|
193
|
-
|
194
|
-
Note: The exception is that normal log messages can be received at any time which are not incorporated into grouping
|
195
|
-
"""
|
196
|
-
records_count = 0
|
197
|
-
at_least_one_page_in_group = False
|
198
|
-
current_page_records: List[Mapping[str, Any]] = []
|
199
|
-
current_slice_descriptor: Optional[Dict[str, Any]] = None
|
200
|
-
current_slice_pages: List[StreamReadPages] = []
|
201
|
-
current_page_request: Optional[HttpRequest] = None
|
202
|
-
current_page_response: Optional[HttpResponse] = None
|
203
|
-
latest_state_message: Optional[Dict[str, Any]] = None
|
204
|
-
|
205
|
-
while records_count < limit and (message := next(messages, None)):
|
206
|
-
json_object = self._parse_json(message.log) if message.type == MessageType.LOG else None
|
207
|
-
if json_object is not None and not isinstance(json_object, dict):
|
208
|
-
raise ValueError(
|
209
|
-
f"Expected log message to be a dict, got {json_object} of type {type(json_object)}"
|
210
|
-
)
|
211
|
-
json_message: Optional[Dict[str, JsonType]] = json_object
|
212
|
-
if self._need_to_close_page(at_least_one_page_in_group, message, json_message):
|
213
|
-
self._close_page(
|
214
|
-
current_page_request,
|
215
|
-
current_page_response,
|
216
|
-
current_slice_pages,
|
217
|
-
current_page_records,
|
218
|
-
)
|
219
|
-
current_page_request = None
|
220
|
-
current_page_response = None
|
221
|
-
|
222
|
-
if (
|
223
|
-
at_least_one_page_in_group
|
224
|
-
and message.type == MessageType.LOG
|
225
|
-
and message.log.message.startswith(SliceLogger.SLICE_LOG_PREFIX) # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message
|
226
|
-
):
|
227
|
-
yield StreamReadSlices(
|
228
|
-
pages=current_slice_pages,
|
229
|
-
slice_descriptor=current_slice_descriptor,
|
230
|
-
state=[latest_state_message] if latest_state_message else [],
|
231
|
-
)
|
232
|
-
current_slice_descriptor = self._parse_slice_description(message.log.message) # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message
|
233
|
-
current_slice_pages = []
|
234
|
-
at_least_one_page_in_group = False
|
235
|
-
elif message.type == MessageType.LOG and message.log.message.startswith( # type: ignore[union-attr] # None doesn't have 'message'
|
236
|
-
SliceLogger.SLICE_LOG_PREFIX
|
237
|
-
):
|
238
|
-
# parsing the first slice
|
239
|
-
current_slice_descriptor = self._parse_slice_description(message.log.message) # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message
|
240
|
-
elif message.type == MessageType.LOG:
|
241
|
-
if json_message is not None and self._is_http_log(json_message):
|
242
|
-
if self._is_auxiliary_http_request(json_message):
|
243
|
-
airbyte_cdk = json_message.get("airbyte_cdk", {})
|
244
|
-
if not isinstance(airbyte_cdk, dict):
|
245
|
-
raise ValueError(
|
246
|
-
f"Expected airbyte_cdk to be a dict, got {airbyte_cdk} of type {type(airbyte_cdk)}"
|
247
|
-
)
|
248
|
-
stream = airbyte_cdk.get("stream", {})
|
249
|
-
if not isinstance(stream, dict):
|
250
|
-
raise ValueError(
|
251
|
-
f"Expected stream to be a dict, got {stream} of type {type(stream)}"
|
252
|
-
)
|
253
|
-
title_prefix = (
|
254
|
-
"Parent stream: " if stream.get("is_substream", False) else ""
|
255
|
-
)
|
256
|
-
http = json_message.get("http", {})
|
257
|
-
if not isinstance(http, dict):
|
258
|
-
raise ValueError(
|
259
|
-
f"Expected http to be a dict, got {http} of type {type(http)}"
|
260
|
-
)
|
261
|
-
yield AuxiliaryRequest(
|
262
|
-
title=title_prefix + str(http.get("title", None)),
|
263
|
-
description=str(http.get("description", None)),
|
264
|
-
request=self._create_request_from_log_message(json_message),
|
265
|
-
response=self._create_response_from_log_message(json_message),
|
266
|
-
)
|
267
|
-
else:
|
268
|
-
at_least_one_page_in_group = True
|
269
|
-
current_page_request = self._create_request_from_log_message(json_message)
|
270
|
-
current_page_response = self._create_response_from_log_message(json_message)
|
271
|
-
else:
|
272
|
-
yield message.log
|
273
|
-
elif message.type == MessageType.TRACE:
|
274
|
-
if message.trace.type == TraceType.ERROR: # type: ignore[union-attr] # AirbyteMessage with MessageType.TRACE has trace.type
|
275
|
-
yield message.trace
|
276
|
-
elif message.type == MessageType.RECORD:
|
277
|
-
current_page_records.append(message.record.data) # type: ignore[arg-type, union-attr] # AirbyteMessage with MessageType.RECORD has record.data
|
278
|
-
records_count += 1
|
279
|
-
schema_inferrer.accumulate(message.record)
|
280
|
-
datetime_format_inferrer.accumulate(message.record)
|
281
|
-
elif (
|
282
|
-
message.type == MessageType.CONTROL
|
283
|
-
and message.control.type == OrchestratorType.CONNECTOR_CONFIG # type: ignore[union-attr] # None doesn't have 'type'
|
284
|
-
):
|
285
|
-
yield message.control
|
286
|
-
elif message.type == MessageType.STATE:
|
287
|
-
latest_state_message = message.state # type: ignore[assignment]
|
288
|
-
else:
|
289
|
-
if current_page_request or current_page_response or current_page_records:
|
290
|
-
self._close_page(
|
291
|
-
current_page_request,
|
292
|
-
current_page_response,
|
293
|
-
current_slice_pages,
|
294
|
-
current_page_records,
|
295
|
-
)
|
296
|
-
yield StreamReadSlices(
|
297
|
-
pages=current_slice_pages,
|
298
|
-
slice_descriptor=current_slice_descriptor,
|
299
|
-
state=[latest_state_message] if latest_state_message else [],
|
300
|
-
)
|
301
|
-
|
302
|
-
@staticmethod
|
303
|
-
def _need_to_close_page(
|
304
|
-
at_least_one_page_in_group: bool,
|
305
|
-
message: AirbyteMessage,
|
306
|
-
json_message: Optional[Dict[str, Any]],
|
307
|
-
) -> bool:
|
308
|
-
return (
|
309
|
-
at_least_one_page_in_group
|
310
|
-
and message.type == MessageType.LOG
|
311
|
-
and (
|
312
|
-
MessageGrouper._is_page_http_request(json_message)
|
313
|
-
or message.log.message.startswith("slice:") # type: ignore[union-attr] # AirbyteMessage with MessageType.LOG has log.message
|
314
|
-
)
|
315
|
-
)
|
316
|
-
|
317
|
-
@staticmethod
|
318
|
-
def _is_page_http_request(json_message: Optional[Dict[str, Any]]) -> bool:
|
319
|
-
if not json_message:
|
320
|
-
return False
|
321
|
-
else:
|
322
|
-
return MessageGrouper._is_http_log(
|
323
|
-
json_message
|
324
|
-
) and not MessageGrouper._is_auxiliary_http_request(json_message)
|
325
|
-
|
326
|
-
@staticmethod
|
327
|
-
def _is_http_log(message: Dict[str, JsonType]) -> bool:
|
328
|
-
return bool(message.get("http", False))
|
329
|
-
|
330
|
-
@staticmethod
|
331
|
-
def _is_auxiliary_http_request(message: Optional[Dict[str, Any]]) -> bool:
|
332
|
-
"""
|
333
|
-
A auxiliary request is a request that is performed and will not directly lead to record for the specific stream it is being queried.
|
334
|
-
A couple of examples are:
|
335
|
-
* OAuth authentication
|
336
|
-
* Substream slice generation
|
337
|
-
"""
|
338
|
-
if not message:
|
339
|
-
return False
|
340
|
-
|
341
|
-
is_http = MessageGrouper._is_http_log(message)
|
342
|
-
return is_http and message.get("http", {}).get("is_auxiliary", False)
|
343
|
-
|
344
|
-
@staticmethod
|
345
|
-
def _close_page(
|
346
|
-
current_page_request: Optional[HttpRequest],
|
347
|
-
current_page_response: Optional[HttpResponse],
|
348
|
-
current_slice_pages: List[StreamReadPages],
|
349
|
-
current_page_records: List[Mapping[str, Any]],
|
350
|
-
) -> None:
|
351
|
-
"""
|
352
|
-
Close a page when parsing message groups
|
353
|
-
"""
|
354
|
-
current_slice_pages.append(
|
355
|
-
StreamReadPages(
|
356
|
-
request=current_page_request,
|
357
|
-
response=current_page_response,
|
358
|
-
records=deepcopy(current_page_records), # type: ignore [arg-type]
|
359
|
-
)
|
360
|
-
)
|
361
|
-
current_page_records.clear()
|
362
|
-
|
363
|
-
def _read_stream(
|
364
|
-
self,
|
365
|
-
source: DeclarativeSource,
|
366
|
-
config: Mapping[str, Any],
|
367
|
-
configured_catalog: ConfiguredAirbyteCatalog,
|
368
|
-
state: List[AirbyteStateMessage],
|
369
|
-
) -> Iterator[AirbyteMessage]:
|
370
|
-
# the generator can raise an exception
|
371
|
-
# iterate over the generated messages. if next raise an exception, catch it and yield it as an AirbyteLogMessage
|
372
|
-
try:
|
373
|
-
yield from AirbyteEntrypoint(source).read(
|
374
|
-
source.spec(self.logger), config, configured_catalog, state
|
375
|
-
)
|
376
|
-
except AirbyteTracedException as traced_exception:
|
377
|
-
# Look for this message which indicates that it is the "final exception" raised by AbstractSource.
|
378
|
-
# If it matches, don't yield this as we don't need to show this in the Builder.
|
379
|
-
# This is somewhat brittle as it relies on the message string, but if they drift then the worst case
|
380
|
-
# is that this message will be shown in the Builder.
|
381
|
-
if (
|
382
|
-
traced_exception.message is not None
|
383
|
-
and "During the sync, the following streams did not sync successfully"
|
384
|
-
in traced_exception.message
|
385
|
-
):
|
386
|
-
return
|
387
|
-
yield traced_exception.as_airbyte_message()
|
388
|
-
except Exception as e:
|
389
|
-
error_message = f"{e.args[0] if len(e.args) > 0 else str(e)}"
|
390
|
-
yield AirbyteTracedException.from_exception(
|
391
|
-
e, message=error_message
|
392
|
-
).as_airbyte_message()
|
393
|
-
|
394
|
-
@staticmethod
|
395
|
-
def _parse_json(log_message: AirbyteLogMessage) -> JsonType:
|
396
|
-
# TODO: As a temporary stopgap, the CDK emits request/response data as a log message string. Ideally this should come in the
|
397
|
-
# form of a custom message object defined in the Airbyte protocol, but this unblocks us in the immediate while the
|
398
|
-
# protocol change is worked on.
|
399
|
-
try:
|
400
|
-
json_object: JsonType = json.loads(log_message.message)
|
401
|
-
return json_object
|
402
|
-
except JSONDecodeError:
|
403
|
-
return None
|
404
|
-
|
405
|
-
@staticmethod
|
406
|
-
def _create_request_from_log_message(json_http_message: Dict[str, Any]) -> HttpRequest:
|
407
|
-
url = json_http_message.get("url", {}).get("full", "")
|
408
|
-
request = json_http_message.get("http", {}).get("request", {})
|
409
|
-
return HttpRequest(
|
410
|
-
url=url,
|
411
|
-
http_method=request.get("method", ""),
|
412
|
-
headers=request.get("headers"),
|
413
|
-
body=request.get("body", {}).get("content", ""),
|
414
|
-
)
|
415
|
-
|
416
|
-
@staticmethod
|
417
|
-
def _create_response_from_log_message(json_http_message: Dict[str, Any]) -> HttpResponse:
|
418
|
-
response = json_http_message.get("http", {}).get("response", {})
|
419
|
-
body = response.get("body", {}).get("content", "")
|
420
|
-
return HttpResponse(
|
421
|
-
status=response.get("status_code"), body=body, headers=response.get("headers")
|
422
|
-
)
|
423
|
-
|
424
|
-
def _has_reached_limit(self, slices: List[StreamReadSlices]) -> bool:
|
425
|
-
if len(slices) >= self._max_slices:
|
426
|
-
return True
|
427
|
-
|
428
|
-
record_count = 0
|
429
|
-
|
430
|
-
for _slice in slices:
|
431
|
-
if len(_slice.pages) >= self._max_pages_per_slice:
|
432
|
-
return True
|
433
|
-
for page in _slice.pages:
|
434
|
-
record_count += len(page.records)
|
435
|
-
if record_count >= self._max_record_limit:
|
436
|
-
return True
|
437
|
-
return False
|
438
|
-
|
439
|
-
def _parse_slice_description(self, log_message: str) -> Dict[str, Any]:
|
440
|
-
return json.loads(log_message.replace(SliceLogger.SLICE_LOG_PREFIX, "", 1)) # type: ignore
|
441
|
-
|
442
|
-
@staticmethod
|
443
|
-
def _clean_config(config: Dict[str, Any]) -> Dict[str, Any]:
|
444
|
-
cleaned_config = deepcopy(config)
|
445
|
-
for key in config.keys():
|
446
|
-
if key.startswith("__"):
|
447
|
-
del cleaned_config[key]
|
448
|
-
return cleaned_config
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|