airbyte-cdk 7.3.2.dev0__py3-none-any.whl → 7.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -122,6 +122,9 @@ from airbyte_cdk.sources.declarative.models.base_model_with_deprecations import
122
122
  DEPRECATION_LOGS_TAG,
123
123
  BaseModelWithDeprecations,
124
124
  )
125
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
126
+ Action1 as PaginationResetActionModel,
127
+ )
125
128
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
126
129
  AddedFieldDefinition as AddedFieldDefinitionModel,
127
130
  )
@@ -2213,6 +2216,14 @@ class ModelToComponentFactory:
2213
2216
  and stream_slicer
2214
2217
  and not isinstance(stream_slicer, SinglePartitionRouter)
2215
2218
  ):
2219
+ if isinstance(model.incremental_sync, IncrementingCountCursorModel):
2220
+ # We don't currently support usage of partition routing and IncrementingCountCursor at the
2221
+ # same time because we didn't solve for design questions like what the lookback window would
2222
+ # be as well as global cursor fall backs. We have not seen customers that have needed both
2223
+ # at the same time yet and are currently punting on this until we need to solve it.
2224
+ raise ValueError(
2225
+ f"The low-code framework does not currently support usage of a PartitionRouter and an IncrementingCountCursor at the same time. Please specify only one of these options for stream {stream_name}."
2226
+ )
2216
2227
  return self.create_concurrent_cursor_from_perpartition_cursor( # type: ignore # This is a known issue that we are creating and returning a ConcurrentCursor which does not technically implement the (low-code) StreamSlicer. However, (low-code) StreamSlicer and ConcurrentCursor both implement StreamSlicer.stream_slices() which is the primary method needed for checkpointing
2217
2228
  state_manager=self._connector_state_manager,
2218
2229
  model_type=DatetimeBasedCursorModel,
@@ -2400,21 +2411,12 @@ class ModelToComponentFactory:
2400
2411
 
2401
2412
  api_budget = self._api_budget
2402
2413
 
2403
- # Removes QueryProperties components from the interpolated mappings because it has been designed
2404
- # to be used by the SimpleRetriever and will be resolved from the provider from the slice directly
2405
- # instead of through jinja interpolation
2406
- request_parameters: Optional[Union[str, Mapping[str, str]]]
2407
- if isinstance(model.request_parameters, Mapping):
2408
- request_parameters = self._remove_query_properties(model.request_parameters)
2409
- else:
2410
- request_parameters = model.request_parameters
2411
-
2412
2414
  request_options_provider = InterpolatedRequestOptionsProvider(
2413
2415
  request_body=model.request_body,
2414
2416
  request_body_data=model.request_body_data,
2415
2417
  request_body_json=model.request_body_json,
2416
2418
  request_headers=model.request_headers,
2417
- request_parameters=request_parameters,
2419
+ request_parameters=model.request_parameters, # type: ignore # QueryProperties have been removed in `create_simple_retriever`
2418
2420
  query_properties_key=query_properties_key,
2419
2421
  config=config,
2420
2422
  parameters=model.parameters or {},
@@ -3208,7 +3210,8 @@ class ModelToComponentFactory:
3208
3210
 
3209
3211
  query_properties: Optional[QueryProperties] = None
3210
3212
  query_properties_key: Optional[str] = None
3211
- if self._query_properties_in_request_parameters(model.requester):
3213
+ self._ensure_query_properties_to_model(model.requester)
3214
+ if self._has_query_properties_in_request_parameters(model.requester):
3212
3215
  # It is better to be explicit about an error if PropertiesFromEndpoint is defined in multiple
3213
3216
  # places instead of default to request_parameters which isn't clearly documented
3214
3217
  if (
@@ -3220,7 +3223,7 @@ class ModelToComponentFactory:
3220
3223
  )
3221
3224
 
3222
3225
  query_properties_definitions = []
3223
- for key, request_parameter in model.requester.request_parameters.items(): # type: ignore # request_parameters is already validated to be a Mapping using _query_properties_in_request_parameters()
3226
+ for key, request_parameter in model.requester.request_parameters.items(): # type: ignore # request_parameters is already validated to be a Mapping using _has_query_properties_in_request_parameters()
3224
3227
  if isinstance(request_parameter, QueryPropertiesModel):
3225
3228
  query_properties_key = key
3226
3229
  query_properties_definitions.append(request_parameter)
@@ -3234,6 +3237,16 @@ class ModelToComponentFactory:
3234
3237
  query_properties = self._create_component_from_model(
3235
3238
  model=query_properties_definitions[0], config=config
3236
3239
  )
3240
+
3241
+ # Removes QueryProperties components from the interpolated mappings because it has been designed
3242
+ # to be used by the SimpleRetriever and will be resolved from the provider from the slice directly
3243
+ # instead of through jinja interpolation
3244
+ if hasattr(model.requester, "request_parameters") and isinstance(
3245
+ model.requester.request_parameters, Mapping
3246
+ ):
3247
+ model.requester.request_parameters = self._remove_query_properties(
3248
+ model.requester.request_parameters
3249
+ )
3237
3250
  elif (
3238
3251
  hasattr(model.requester, "fetch_properties_from_endpoint")
3239
3252
  and model.requester.fetch_properties_from_endpoint
@@ -3333,7 +3346,7 @@ class ModelToComponentFactory:
3333
3346
  and model.pagination_reset
3334
3347
  and model.pagination_reset.limits
3335
3348
  ):
3336
- raise ValueError("PaginationResetLimits are not support while having record filter.")
3349
+ raise ValueError("PaginationResetLimits are not supported while having record filter.")
3337
3350
 
3338
3351
  return SimpleRetriever(
3339
3352
  name=name,
@@ -3361,20 +3374,26 @@ class ModelToComponentFactory:
3361
3374
  return lambda: PaginationTracker()
3362
3375
 
3363
3376
  # Until we figure out a way to use any cursor for PaginationTracker, we will have to have this cursor selector logic
3364
- cursor_for_pagination_tracking = None
3365
- if isinstance(cursor, ConcurrentCursor):
3366
- cursor_for_pagination_tracking = cursor
3367
- elif isinstance(cursor, ConcurrentPerPartitionCursor):
3368
- cursor_for_pagination_tracking = cursor._cursor_factory.create( # type: ignore # if this becomes a problem, we would need to extract the cursor_factory instantiation logic and make it accessible here
3369
- {}, datetime.timedelta(0)
3370
- )
3371
- elif not isinstance(cursor, FinalStateCursor):
3372
- LOGGER.warning(
3373
- "Unknown cursor for PaginationTracker. Pagination resets might not work properly"
3374
- )
3377
+ cursor_factory: Callable[[], Optional[ConcurrentCursor]] = lambda: None
3378
+ if model.action == PaginationResetActionModel.RESET:
3379
+ # in that case, we will let cursor_factory to return None even if the stream has a cursor
3380
+ pass
3381
+ elif model.action == PaginationResetActionModel.SPLIT_USING_CURSOR:
3382
+ if isinstance(cursor, ConcurrentCursor):
3383
+ cursor_factory = lambda: cursor.copy_without_state() # type: ignore # the if condition validates that it is a ConcurrentCursor
3384
+ elif isinstance(cursor, ConcurrentPerPartitionCursor):
3385
+ cursor_factory = lambda: cursor._cursor_factory.create( # type: ignore # if this becomes a problem, we would need to extract the cursor_factory instantiation logic and make it accessible here
3386
+ {}, datetime.timedelta(0)
3387
+ )
3388
+ elif not isinstance(cursor, FinalStateCursor):
3389
+ LOGGER.warning(
3390
+ "Unknown cursor for PaginationTracker. Pagination resets might not work properly"
3391
+ )
3392
+ else:
3393
+ raise ValueError(f"Unknown PaginationReset action: {model.action}")
3375
3394
 
3376
3395
  limit = model.limits.number_of_records if model and model.limits else None
3377
- return lambda: PaginationTracker(cursor_for_pagination_tracking, limit)
3396
+ return lambda: PaginationTracker(cursor_factory(), limit)
3378
3397
 
3379
3398
  def _get_log_formatter(
3380
3399
  self, log_formatter: Callable[[Response], Any] | None, name: str
@@ -3402,7 +3421,7 @@ class ModelToComponentFactory:
3402
3421
  return bool(self._limit_slices_fetched or self._emit_connector_builder_messages)
3403
3422
 
3404
3423
  @staticmethod
3405
- def _query_properties_in_request_parameters(
3424
+ def _has_query_properties_in_request_parameters(
3406
3425
  requester: Union[HttpRequesterModel, CustomRequesterModel],
3407
3426
  ) -> bool:
3408
3427
  if not hasattr(requester, "request_parameters"):
@@ -4216,3 +4235,26 @@ class ModelToComponentFactory:
4216
4235
  deduplicate=model.deduplicate if model.deduplicate is not None else True,
4217
4236
  config=config,
4218
4237
  )
4238
+
4239
+ def _ensure_query_properties_to_model(
4240
+ self, requester: Union[HttpRequesterModel, CustomRequesterModel]
4241
+ ) -> None:
4242
+ """
4243
+ For some reason, it seems like CustomRequesterModel request_parameters stays as dictionaries which means that
4244
+ the other conditions relying on it being QueryPropertiesModel instead of a dict fail. Here, we migrate them to
4245
+ proper model.
4246
+ """
4247
+ if not hasattr(requester, "request_parameters"):
4248
+ return
4249
+
4250
+ request_parameters = requester.request_parameters
4251
+ if request_parameters and isinstance(request_parameters, Dict):
4252
+ for request_parameter_key in request_parameters.keys():
4253
+ request_parameter = request_parameters[request_parameter_key]
4254
+ if (
4255
+ isinstance(request_parameter, Dict)
4256
+ and request_parameter.get("type") == "QueryProperties"
4257
+ ):
4258
+ request_parameters[request_parameter_key] = QueryPropertiesModel.parse_obj(
4259
+ request_parameter
4260
+ )
@@ -66,14 +66,14 @@ class CompositeErrorHandler(ErrorHandler):
66
66
  if not isinstance(matched_error_resolution, ErrorResolution):
67
67
  continue
68
68
 
69
- if matched_error_resolution.response_action == ResponseAction.SUCCESS:
69
+ if matched_error_resolution.response_action in [
70
+ ResponseAction.SUCCESS,
71
+ ResponseAction.RETRY,
72
+ ResponseAction.IGNORE,
73
+ ResponseAction.RESET_PAGINATION,
74
+ ]:
70
75
  return matched_error_resolution
71
76
 
72
- if (
73
- matched_error_resolution.response_action == ResponseAction.RETRY
74
- or matched_error_resolution.response_action == ResponseAction.IGNORE
75
- ):
76
- return matched_error_resolution
77
77
  if matched_error_resolution:
78
78
  return matched_error_resolution
79
79
 
@@ -22,7 +22,7 @@ class PaginationTracker:
22
22
  """
23
23
  self._cursor = cursor
24
24
  self._limit = max_number_of_records
25
- self.reset()
25
+ self._reset()
26
26
 
27
27
  """
28
28
  Given we have a cursor, we do not allow for the same slice to be processed twice because we assume we will
@@ -41,9 +41,8 @@ class PaginationTracker:
41
41
  def has_reached_limit(self) -> bool:
42
42
  return self._limit is not None and self._record_count >= self._limit
43
43
 
44
- def reset(self) -> None:
44
+ def _reset(self) -> None:
45
45
  self._record_count = 0
46
- self._number_of_attempt_with_same_slice = 0
47
46
 
48
47
  def reduce_slice_range_if_possible(self, stream_slice: StreamSlice) -> StreamSlice:
49
48
  new_slice = self._cursor.reduce_slice_range(stream_slice) if self._cursor else stream_slice
@@ -61,4 +60,5 @@ class PaginationTracker:
61
60
  else:
62
61
  self._number_of_attempt_with_same_slice = 0
63
62
 
63
+ self._reset()
64
64
  return new_slice
@@ -437,7 +437,6 @@ class SimpleRetriever(Retriever):
437
437
  break
438
438
 
439
439
  if reset_pagination or pagination_tracker.has_reached_limit():
440
- pagination_tracker.reset()
441
440
  next_page_token = self._get_initial_next_page_token()
442
441
  previous_slice = stream_slice
443
442
  stream_slice = pagination_tracker.reduce_slice_range_if_possible(stream_slice)
@@ -3,13 +3,15 @@
3
3
  #
4
4
 
5
5
  import logging
6
+ import time
6
7
  from abc import ABC, abstractmethod
7
8
  from datetime import datetime
8
9
  from enum import Enum
9
10
  from io import IOBase
10
11
  from os import makedirs, path
11
- from typing import Any, Callable, Iterable, List, MutableMapping, Optional, Set, Tuple
12
+ from typing import Any, Iterable, List, MutableMapping, Optional, Set, Tuple
12
13
 
14
+ from airbyte_protocol_dataclasses.models import FailureType
13
15
  from wcmatch.glob import GLOBSTAR, globmatch
14
16
 
15
17
  from airbyte_cdk.models import AirbyteRecordMessageFileReference
@@ -19,8 +21,9 @@ from airbyte_cdk.sources.file_based.config.validate_config_transfer_modes import
19
21
  preserve_directory_structure,
20
22
  use_file_transfer,
21
23
  )
24
+ from airbyte_cdk.sources.file_based.exceptions import FileSizeLimitError
22
25
  from airbyte_cdk.sources.file_based.file_record_data import FileRecordData
23
- from airbyte_cdk.sources.file_based.remote_file import RemoteFile
26
+ from airbyte_cdk.sources.file_based.remote_file import RemoteFile, UploadableRemoteFile
24
27
 
25
28
 
26
29
  class FileReadMode(Enum):
@@ -34,6 +37,7 @@ class AbstractFileBasedStreamReader(ABC):
34
37
  FILE_NAME = "file_name"
35
38
  LOCAL_FILE_PATH = "local_file_path"
36
39
  FILE_FOLDER = "file_folder"
40
+ FILE_SIZE_LIMIT = 1_500_000_000
37
41
 
38
42
  def __init__(self) -> None:
39
43
  self._config = None
@@ -113,16 +117,6 @@ class AbstractFileBasedStreamReader(ABC):
113
117
  seen.add(file.uri)
114
118
  yield file
115
119
 
116
- @abstractmethod
117
- def file_size(self, file: RemoteFile) -> int:
118
- """Utility method to get size of the remote file.
119
-
120
- This is required for connectors that will support writing to
121
- files. If the connector does not support writing files, then the
122
- subclass can simply `return 0`.
123
- """
124
- ...
125
-
126
120
  @staticmethod
127
121
  def file_matches_globs(file: RemoteFile, globs: List[str]) -> bool:
128
122
  # Use the GLOBSTAR flag to enable recursive ** matching
@@ -153,9 +147,8 @@ class AbstractFileBasedStreamReader(ABC):
153
147
  return include_identities_stream(self.config)
154
148
  return False
155
149
 
156
- @abstractmethod
157
150
  def upload(
158
- self, file: RemoteFile, local_directory: str, logger: logging.Logger
151
+ self, file: UploadableRemoteFile, local_directory: str, logger: logging.Logger
159
152
  ) -> Tuple[FileRecordData, AirbyteRecordMessageFileReference]:
160
153
  """
161
154
  This is required for connectors that will support writing to
@@ -173,7 +166,53 @@ class AbstractFileBasedStreamReader(ABC):
173
166
  - file_size_bytes (int): The size of the referenced file in bytes.
174
167
  - source_file_relative_path (str): The relative path to the referenced file in source.
175
168
  """
176
- ...
169
+ if not isinstance(file, UploadableRemoteFile):
170
+ raise TypeError(f"Expected UploadableRemoteFile, got {type(file)}")
171
+
172
+ file_size = file.size
173
+
174
+ if file_size > self.FILE_SIZE_LIMIT:
175
+ message = f"File size exceeds the {self.FILE_SIZE_LIMIT / 1e9} GB limit."
176
+ raise FileSizeLimitError(
177
+ message=message, internal_message=message, failure_type=FailureType.config_error
178
+ )
179
+
180
+ file_paths = self._get_file_transfer_paths(
181
+ source_file_relative_path=file.source_file_relative_path,
182
+ staging_directory=local_directory,
183
+ )
184
+ local_file_path = file_paths[self.LOCAL_FILE_PATH]
185
+ file_relative_path = file_paths[self.FILE_RELATIVE_PATH]
186
+ file_name = file_paths[self.FILE_NAME]
187
+
188
+ logger.info(
189
+ f"Starting to download the file {file.file_uri_for_logging} with size: {file_size / (1024 * 1024):,.2f} MB ({file_size / (1024 * 1024 * 1024):.2f} GB)"
190
+ )
191
+ start_download_time = time.time()
192
+
193
+ file.download_to_local_directory(local_file_path)
194
+
195
+ write_duration = time.time() - start_download_time
196
+ logger.info(
197
+ f"Finished downloading the file {file.file_uri_for_logging} and saved to {local_file_path} in {write_duration:,.2f} seconds."
198
+ )
199
+
200
+ file_record_data = FileRecordData(
201
+ folder=file_paths[self.FILE_FOLDER],
202
+ file_name=file_name,
203
+ bytes=file_size,
204
+ id=file.id,
205
+ mime_type=file.mime_type,
206
+ created_at=file.created_at,
207
+ updated_at=file.updated_at,
208
+ source_uri=file.uri,
209
+ )
210
+ file_reference = AirbyteRecordMessageFileReference(
211
+ staging_file_url=local_file_path,
212
+ source_file_relative_path=file_relative_path,
213
+ file_size_bytes=file_size,
214
+ )
215
+ return file_record_data, file_reference
177
216
 
178
217
  def _get_file_transfer_paths(
179
218
  self, source_file_relative_path: str, staging_directory: str
@@ -7,7 +7,7 @@ from typing import Iterable, Tuple
7
7
  from airbyte_cdk.models import AirbyteRecordMessageFileReference
8
8
  from airbyte_cdk.sources.file_based.file_based_stream_reader import AbstractFileBasedStreamReader
9
9
  from airbyte_cdk.sources.file_based.file_record_data import FileRecordData
10
- from airbyte_cdk.sources.file_based.remote_file import RemoteFile
10
+ from airbyte_cdk.sources.file_based.remote_file import UploadableRemoteFile
11
11
  from airbyte_cdk.sources.utils.files_directory import get_files_directory
12
12
 
13
13
 
@@ -17,7 +17,7 @@ class FileTransfer:
17
17
 
18
18
  def upload(
19
19
  self,
20
- file: RemoteFile,
20
+ file: UploadableRemoteFile,
21
21
  stream_reader: AbstractFileBasedStreamReader,
22
22
  logger: logging.Logger,
23
23
  ) -> Iterable[Tuple[FileRecordData, AirbyteRecordMessageFileReference]]:
@@ -1,7 +1,7 @@
1
1
  #
2
2
  # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
3
3
  #
4
-
4
+ from abc import ABC, abstractmethod
5
5
  from datetime import datetime
6
6
  from typing import Optional
7
7
 
@@ -16,3 +16,42 @@ class RemoteFile(BaseModel):
16
16
  uri: str
17
17
  last_modified: datetime
18
18
  mime_type: Optional[str] = None
19
+
20
+
21
+ class UploadableRemoteFile(RemoteFile, ABC):
22
+ """
23
+ A file in a file-based stream that supports uploading(file transferring).
24
+ """
25
+
26
+ id: Optional[str] = None
27
+ created_at: Optional[str] = None
28
+ updated_at: Optional[str] = None
29
+
30
+ @property
31
+ @abstractmethod
32
+ def size(self) -> int:
33
+ """
34
+ Returns the file size in bytes.
35
+ """
36
+ ...
37
+
38
+ @abstractmethod
39
+ def download_to_local_directory(self, local_file_path: str) -> None:
40
+ """
41
+ Download the file from remote source to local storage.
42
+ """
43
+ ...
44
+
45
+ @property
46
+ def source_file_relative_path(self) -> str:
47
+ """
48
+ Returns the relative path of the source file.
49
+ """
50
+ return self.uri
51
+
52
+ @property
53
+ def file_uri_for_logging(self) -> str:
54
+ """
55
+ Returns the URI for the file being logged.
56
+ """
57
+ return self.uri
@@ -19,7 +19,7 @@ from typing import (
19
19
  )
20
20
 
21
21
  from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
22
- from airbyte_cdk.sources.message import MessageRepository
22
+ from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository
23
23
  from airbyte_cdk.sources.streams import NO_CURSOR_STATE_KEY
24
24
  from airbyte_cdk.sources.streams.concurrent.clamping import ClampingStrategy, NoClamping
25
25
  from airbyte_cdk.sources.streams.concurrent.cursor_types import CursorValueType, GapType
@@ -136,6 +136,24 @@ class ConcurrentCursor(Cursor):
136
136
  _START_BOUNDARY = 0
137
137
  _END_BOUNDARY = 1
138
138
 
139
+ def copy_without_state(self) -> "ConcurrentCursor":
140
+ return self.__class__(
141
+ stream_name=self._stream_name,
142
+ stream_namespace=self._stream_namespace,
143
+ stream_state={},
144
+ message_repository=NoopMessageRepository(),
145
+ connector_state_manager=ConnectorStateManager(),
146
+ connector_state_converter=self._connector_state_converter,
147
+ cursor_field=self._cursor_field,
148
+ slice_boundary_fields=self._slice_boundary_fields,
149
+ start=self._start,
150
+ end_provider=self._end_provider,
151
+ lookback_window=self._lookback_window,
152
+ slice_range=self._slice_range,
153
+ cursor_granularity=self._cursor_granularity,
154
+ clamping_strategy=self._clamping_strategy,
155
+ )
156
+
139
157
  def __init__(
140
158
  self,
141
159
  stream_name: str,
@@ -528,17 +546,22 @@ class ConcurrentCursor(Cursor):
528
546
  "Attempting to reduce slice while records are not returned in incremental order might lead to missing records"
529
547
  )
530
548
 
531
- return StreamSlice(
532
- partition=stream_slice.partition,
533
- cursor_slice={
534
- self._slice_boundary_fields_wrapper[
535
- self._START_BOUNDARY
536
- ]: self._connector_state_converter.output_format(
537
- self._most_recent_cursor_value_per_partition[stream_slice]
538
- ),
539
- self._slice_boundary_fields_wrapper[self._END_BOUNDARY]: stream_slice.cursor_slice[
540
- self._slice_boundary_fields_wrapper[self._END_BOUNDARY]
541
- ],
542
- },
543
- extra_fields=stream_slice.extra_fields,
544
- )
549
+ if stream_slice in self._most_recent_cursor_value_per_partition:
550
+ return StreamSlice(
551
+ partition=stream_slice.partition,
552
+ cursor_slice={
553
+ self._slice_boundary_fields_wrapper[
554
+ self._START_BOUNDARY
555
+ ]: self._connector_state_converter.output_format(
556
+ self._most_recent_cursor_value_per_partition[stream_slice]
557
+ ),
558
+ self._slice_boundary_fields_wrapper[
559
+ self._END_BOUNDARY
560
+ ]: stream_slice.cursor_slice[
561
+ self._slice_boundary_fields_wrapper[self._END_BOUNDARY]
562
+ ],
563
+ },
564
+ extra_fields=stream_slice.extra_fields,
565
+ )
566
+ else:
567
+ return stream_slice
@@ -75,6 +75,25 @@ class NestedPath(Path):
75
75
  return f"NestedPath(path={self._path})"
76
76
 
77
77
 
78
+ class RootPath:
79
+ """
80
+ Path to use when the root of the response is an array.
81
+ """
82
+
83
+ def write(self, template: List[Dict[str, Any]], value: List[Dict[str, Any]]) -> None:
84
+ template.extend(value)
85
+
86
+ def update(self, template: List[Dict[str, Any]], value: List[Any]) -> None:
87
+ template.clear()
88
+ template.extend(value)
89
+
90
+ def extract(self, template: List[Dict[str, Any]]) -> Any:
91
+ return template
92
+
93
+ def __str__(self) -> str:
94
+ return f"RootPath"
95
+
96
+
78
97
  class PaginationStrategy(ABC):
79
98
  @abstractmethod
80
99
  def update(self, response: Dict[str, Any]) -> None:
@@ -149,12 +168,14 @@ class RecordBuilder:
149
168
  class HttpResponseBuilder:
150
169
  def __init__(
151
170
  self,
152
- template: Dict[str, Any],
153
- records_path: Union[FieldPath, NestedPath],
171
+ template: Union[Dict[str, Any], List[Dict[str, Any]]],
172
+ records_path: Union[FieldPath, NestedPath, RootPath],
154
173
  pagination_strategy: Optional[PaginationStrategy],
155
174
  ):
156
- self._response = template
175
+ _validate_path_with_response(records_path, template)
176
+
157
177
  self._records: List[RecordBuilder] = []
178
+ self._response = template
158
179
  self._records_path = records_path
159
180
  self._pagination_strategy = pagination_strategy
160
181
  self._status_code = 200
@@ -169,6 +190,9 @@ class HttpResponseBuilder:
169
190
  "`pagination_strategy` was not provided and hence, fields related to the pagination can't be modified. Please provide "
170
191
  "`pagination_strategy` while instantiating ResponseBuilder to leverage this capability"
171
192
  )
193
+ elif isinstance(self._response, List):
194
+ raise ValueError("pagination_strategy requires the response to be a dict but was list")
195
+
172
196
  self._pagination_strategy.update(self._response)
173
197
  return self
174
198
 
@@ -177,7 +201,7 @@ class HttpResponseBuilder:
177
201
  return self
178
202
 
179
203
  def build(self) -> HttpResponse:
180
- self._records_path.update(self._response, [record.build() for record in self._records])
204
+ self._records_path.update(self._response, [record.build() for record in self._records]) # type: ignore # validated using _validate_path_with_response
181
205
  return HttpResponse(json.dumps(self._response), self._status_code)
182
206
 
183
207
 
@@ -208,15 +232,16 @@ def find_binary_response(resource: str, execution_folder: str) -> bytes:
208
232
 
209
233
  def create_record_builder(
210
234
  response_template: Dict[str, Any],
211
- records_path: Union[FieldPath, NestedPath],
235
+ records_path: Union[FieldPath, NestedPath, RootPath],
212
236
  record_id_path: Optional[Path] = None,
213
237
  record_cursor_path: Optional[Union[FieldPath, NestedPath]] = None,
214
238
  ) -> RecordBuilder:
215
239
  """
216
240
  This will use the first record define at `records_path` as a template for the records. If more records are defined, they will be ignored
217
241
  """
242
+ _validate_path_with_response(records_path, response_template)
218
243
  try:
219
- record_template = records_path.extract(response_template)[0]
244
+ record_template = records_path.extract(response_template)[0] # type: ignore # validated using _validate_path_with_response
220
245
  if not record_template:
221
246
  raise ValueError(
222
247
  f"Could not extract any record from template at path `{records_path}`. "
@@ -230,8 +255,20 @@ def create_record_builder(
230
255
 
231
256
 
232
257
  def create_response_builder(
233
- response_template: Dict[str, Any],
234
- records_path: Union[FieldPath, NestedPath],
258
+ response_template: Union[Dict[str, Any], List[Dict[str, Any]]],
259
+ records_path: Union[FieldPath, NestedPath, RootPath],
235
260
  pagination_strategy: Optional[PaginationStrategy] = None,
236
261
  ) -> HttpResponseBuilder:
237
262
  return HttpResponseBuilder(response_template, records_path, pagination_strategy)
263
+
264
+
265
+ def _validate_path_with_response(
266
+ records_path: Union[FieldPath, NestedPath, RootPath],
267
+ response_template: Union[Dict[str, Any], List[Dict[str, Any]]],
268
+ ) -> None:
269
+ if isinstance(response_template, List) and not isinstance(records_path, RootPath):
270
+ raise ValueError("templates of type lists require RootPath")
271
+ elif isinstance(response_template, Dict) and not isinstance(
272
+ records_path, (FieldPath, NestedPath)
273
+ ):
274
+ raise ValueError("templates of type dict either require FieldPath or NestedPath")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: airbyte-cdk
3
- Version: 7.3.2.dev0
3
+ Version: 7.3.3
4
4
  Summary: A framework for writing Airbyte Connectors.
5
5
  Home-page: https://airbyte.com
6
6
  License: MIT
@@ -172,7 +172,7 @@ airbyte_cdk/sources/declarative/parsers/custom_exceptions.py,sha256=wnRUP0Xeru9R
172
172
  airbyte_cdk/sources/declarative/parsers/manifest_component_transformer.py,sha256=la9Ulpc0lQewiBLKJ0FpsWxyU5XISv-ulmFRHJLJ1Pc,11292
173
173
  airbyte_cdk/sources/declarative/parsers/manifest_normalizer.py,sha256=EtKjS9c94yNp3AwQC8KUCQaAYW5T3zvFYxoWYjc_buI,19729
174
174
  airbyte_cdk/sources/declarative/parsers/manifest_reference_resolver.py,sha256=pJmg78vqE5VfUrF_KJnWjucQ4k9IWFULeAxHCowrHXE,6806
175
- airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=AZGtNkPHxL8WbJMOknSHYmJxSPZP4x0pq6xqQYiNdaM,185641
175
+ airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py,sha256=AgZ7mRbXVZHWTftPuDdH6wTanZPGi_dxtiu2dSxyWjs,188331
176
176
  airbyte_cdk/sources/declarative/partition_routers/__init__.py,sha256=TBC9AkGaUqHm2IKHMPN6punBIcY5tWGULowcLoAVkfw,1109
177
177
  airbyte_cdk/sources/declarative/partition_routers/async_job_partition_router.py,sha256=VelO7zKqKtzMJ35jyFeg0ypJLQC0plqqIBNXoBW1G2E,3001
178
178
  airbyte_cdk/sources/declarative/partition_routers/cartesian_product_stream_slicer.py,sha256=ocm4hZ4k-tEGs5HLrtI8ecWSK0hGqNH0Rvz2byx_HZk,6927
@@ -191,7 +191,7 @@ airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/hea
191
191
  airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_time_from_header_backoff_strategy.py,sha256=I3KYCJHhPiRfxYUzOa293YH4U3wGFISDsdY1OMHWRtw,2942
192
192
  airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategies/wait_until_time_from_header_backoff_strategy.py,sha256=T2JTIdHdPzPiW0MpkCNYPsuaHUtF9V-ijNqUqdTDl6U,3069
193
193
  airbyte_cdk/sources/declarative/requesters/error_handlers/backoff_strategy.py,sha256=ZN5kcaVAQDinX0Ld5NXA8M_7Sax5BoPsknVwH7v06as,634
194
- airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py,sha256=4_PegbHBUiNbqa5ndZ2n9rm69O2iEfWU-NcIhSXZDIs,4137
194
+ airbyte_cdk/sources/declarative/requesters/error_handlers/composite_error_handler.py,sha256=yVMyKI4C-9vbIrTtQPdAndZQzdzpgRzqxtXDed2xwcE,4050
195
195
  airbyte_cdk/sources/declarative/requesters/error_handlers/default_error_handler.py,sha256=BGED9TcbA3mlvd9D7sog_u5AiyjWGVOUq_00aK3PNzg,5111
196
196
  airbyte_cdk/sources/declarative/requesters/error_handlers/default_http_response_filter.py,sha256=q0YkeYUUWO6iErUy0vjqiOkhg8_9d5YcCmtlpXAJJ9E,1314
197
197
  airbyte_cdk/sources/declarative/requesters/error_handlers/error_handler.py,sha256=Tan66odx8VHzfdyyXMQkXz2pJYksllGqvxmpoajgcK4,669
@@ -240,9 +240,9 @@ airbyte_cdk/sources/declarative/retrievers/file_uploader/file_uploader.py,sha256
240
240
  airbyte_cdk/sources/declarative/retrievers/file_uploader/file_writer.py,sha256=V8gAFjQXkhX5mwj1NafdcUrMfMBNF1hi0mrdXIl5qEc,359
241
241
  airbyte_cdk/sources/declarative/retrievers/file_uploader/local_file_system_file_writer.py,sha256=jLpdonre1UHfbjGSD5AK_T0codLABJByTvbqepDZtEQ,422
242
242
  airbyte_cdk/sources/declarative/retrievers/file_uploader/noop_file_writer.py,sha256=1yfimzxm09d2j605cu_HhiYVDNVL1rUMi3vs_jYlIyY,330
243
- airbyte_cdk/sources/declarative/retrievers/pagination_tracker.py,sha256=7IDpP9MwwMdiL-ZiHUMlopFbQ1P04RCtDmFojqNx4tc,2893
243
+ airbyte_cdk/sources/declarative/retrievers/pagination_tracker.py,sha256=h-3GfksrWaQUa1xIefq9eG-6_DuW77Vq8XDenv-hCps,2865
244
244
  airbyte_cdk/sources/declarative/retrievers/retriever.py,sha256=os5psYh8z7ZdCAvbfZeTpmjvPa7Qpx0mblpKf47ZaZM,1876
245
- airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=HVxD66NqLqUPmCKRGAi-z9NM9ZlcCsmwHAdZMQZ8Uc4,29686
245
+ airbyte_cdk/sources/declarative/retrievers/simple_retriever.py,sha256=8nb87fsBno1SOxHxYA-sGxAy48sapcF3aZszBu4Ew_s,29643
246
246
  airbyte_cdk/sources/declarative/schema/__init__.py,sha256=xU45UvM5O4c1PSM13UHpCdh5hpW3HXy9vRRGEiAC1rg,795
247
247
  airbyte_cdk/sources/declarative/schema/composite_schema_loader.py,sha256=ymGbvxS_QyGc4nnjEyRo5ch8bVedELO41PAUxKXZyMw,1113
248
248
  airbyte_cdk/sources/declarative/schema/default_schema_loader.py,sha256=UnbzlExmwoQiVV8zDg4lhAEaqA_0pRfwbMRe8yqOuWk,1834
@@ -299,18 +299,18 @@ airbyte_cdk/sources/file_based/discovery_policy/default_discovery_policy.py,sha2
299
299
  airbyte_cdk/sources/file_based/exceptions.py,sha256=WP0qkG6fpWoBpOyyicgp5YNE393VWyegq5qSy0v4QtM,7362
300
300
  airbyte_cdk/sources/file_based/file_based_source.py,sha256=Xg8OYWnGc-OcVBglvS08uwAWGWHBhEqsBnyODIkOK-4,20051
301
301
  airbyte_cdk/sources/file_based/file_based_stream_permissions_reader.py,sha256=4e7FXqQ9hueacexC0SyrZyjF8oREYHza8pKF9CgKbD8,5050
302
- airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=rwz8AhEIqYB9gBF7uW9eR--eUiHOntzuwLH8jFHNacE,7854
302
+ airbyte_cdk/sources/file_based/file_based_stream_reader.py,sha256=Yg9KRXpyAtElBrUOO8oX4WHQH6k6Lk7keklrZmB5Klg,9614
303
303
  airbyte_cdk/sources/file_based/file_record_data.py,sha256=Vkr5AyZzlsOezjVCLhFrm_WpymlQdolWCnFAwqLJ9Iw,453
304
304
  airbyte_cdk/sources/file_based/file_types/__init__.py,sha256=blCLn0-2LC-ZdgcNyDEhqM2RiUvEjEBh-G4-t32ZtuM,1268
305
305
  airbyte_cdk/sources/file_based/file_types/avro_parser.py,sha256=USEYqiICXBWpDV443VtNOCmUA-GINzY_Zah74_5w3qQ,10860
306
306
  airbyte_cdk/sources/file_based/file_types/csv_parser.py,sha256=QlCXB-ry3np67Q_VerQEPoWDOTcPTB6Go4ydZxY9ae4,20445
307
307
  airbyte_cdk/sources/file_based/file_types/excel_parser.py,sha256=BeplCq0hmojELU6bZCvvpRLpQ9us81TqbGYwrhd3INo,7188
308
- airbyte_cdk/sources/file_based/file_types/file_transfer.py,sha256=5l2Jo6bp6neDmgM427PrZMZeqU0hCIZVWnzUZ_7BT10,1100
308
+ airbyte_cdk/sources/file_based/file_types/file_transfer.py,sha256=rFxWaqItBux9tPf4xU03LT6b-wDZf1QolM92mP8Diuk,1120
309
309
  airbyte_cdk/sources/file_based/file_types/file_type_parser.py,sha256=JgpH21PrbRqwK92BJklZWvh2TndA6xZ-eP1LPMo44oQ,2832
310
310
  airbyte_cdk/sources/file_based/file_types/jsonl_parser.py,sha256=GwyNyxmST4RX-XpXy7xVH0D-znYWWBmGv_pVAu95oHQ,5886
311
311
  airbyte_cdk/sources/file_based/file_types/parquet_parser.py,sha256=XenFg5sJ-UBnIkSmsiNJRou11NO0zZXx-RXgPHMT2NA,10487
312
312
  airbyte_cdk/sources/file_based/file_types/unstructured_parser.py,sha256=2TYOQl62FQPCa8otLbkDIk_j01EP3oWaKSfXGhCjCHg,19492
313
- airbyte_cdk/sources/file_based/remote_file.py,sha256=yqRz93vPe8PBXLIMJ5W5u2JRlZRhg6sBrAjn3pPjJ8A,315
313
+ airbyte_cdk/sources/file_based/remote_file.py,sha256=1Afzr2WFWwjiUz8R2vNFepeI192UNeHOZAXIGTWOzOM,1248
314
314
  airbyte_cdk/sources/file_based/schema_helpers.py,sha256=dKXAOTmMI3YmC5u7PeHC9AaZmlL6ft7CYSFQKCg0sXw,9911
315
315
  airbyte_cdk/sources/file_based/schema_validation_policies/__init__.py,sha256=FkByIyEy56x2_awYnxGPqGaOp7zAzpAoRkPZHKySI9M,536
316
316
  airbyte_cdk/sources/file_based/schema_validation_policies/abstract_schema_validation_policy.py,sha256=kjvX7nOmUALYd7HuZHilUzgJPZ-MnZ08mtvuBnt2tQ0,618
@@ -353,7 +353,7 @@ airbyte_cdk/sources/streams/concurrent/abstract_stream_facade.py,sha256=QTry1QCB
353
353
  airbyte_cdk/sources/streams/concurrent/adapters.py,sha256=h4ZewhWn2PzPTt0lZZjcUL4rrpW9E_of7prnI3bm-c4,14004
354
354
  airbyte_cdk/sources/streams/concurrent/availability_strategy.py,sha256=M0XmvF3vjlr4GbCM0XH1hAj7udiAONM9SnmXjqufzLM,1035
355
355
  airbyte_cdk/sources/streams/concurrent/clamping.py,sha256=i26GVyui2ScEXSP-IP_61K2HaTp1-6lTlYHsZVYpuZA,3240
356
- airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=ujGZIKhOD24a76mqo00EKrNDh2oMHkFZFU_MWTW-ZmY,24668
356
+ airbyte_cdk/sources/streams/concurrent/cursor.py,sha256=oEcqUyswPyOg6SnjrFr8c8YYxVvaaInWNCHRLQcKjmk,25713
357
357
  airbyte_cdk/sources/streams/concurrent/cursor_types.py,sha256=ZyWLPpeLX1qXcP5MwS-wxK11IBMsnVPCw9zx8gA2_Ro,843
358
358
  airbyte_cdk/sources/streams/concurrent/default_stream.py,sha256=SSufbo5f7OOYS8DZaABXeJVvodcfp9wb8J9lT5Xik3s,4744
359
359
  airbyte_cdk/sources/streams/concurrent/exceptions.py,sha256=JOZ446MCLpmF26r9KfS6OO_6rGjcjgJNZdcw6jccjEI,468
@@ -421,7 +421,7 @@ airbyte_cdk/test/mock_http/matcher.py,sha256=4Qj8UnJKZIs-eodshryce3SN1Ayc8GZpBET
421
421
  airbyte_cdk/test/mock_http/mocker.py,sha256=XgsjMtVoeMpRELPyALgrkHFauH9H5irxrz1Kcxh2yFY,8013
422
422
  airbyte_cdk/test/mock_http/request.py,sha256=tdB8cqk2vLgCDTOKffBKsM06llYs4ZecgtH6DKyx6yY,4112
423
423
  airbyte_cdk/test/mock_http/response.py,sha256=s4-cQQqTtmeej0pQDWqmG0vUWpHS-93lIWMpW3zSVyU,662
424
- airbyte_cdk/test/mock_http/response_builder.py,sha256=F-v7ebftqGj7YVIMLKdodmU9U8Dq8aIyllWGo2NGwHc,8331
424
+ airbyte_cdk/test/mock_http/response_builder.py,sha256=N9DovhVtLqIGyubWcPGomr9CNy8KLg-EJoDk6x_t4js,9857
425
425
  airbyte_cdk/test/models/__init__.py,sha256=5f5oFcuUA3dyNTfvvTWav2pTD8WX4nznObKgMTmvdus,290
426
426
  airbyte_cdk/test/models/outcome.py,sha256=niSX6gkP4P-_kQUF1jkbBXq72FC3Rtkvtdl0gJsUyho,2263
427
427
  airbyte_cdk/test/models/scenario.py,sha256=M6vq4btxUI6ZiSQNNoNFOgUsZNDFdoieGOTe-AVHstc,6435
@@ -459,9 +459,9 @@ airbyte_cdk/utils/slice_hasher.py,sha256=EDxgROHDbfG-QKQb59m7h_7crN1tRiawdf5uU7G
459
459
  airbyte_cdk/utils/spec_schema_transformations.py,sha256=9YDJmnIGFsT51CVQf2tSSvTapGimITjEFGbUTSZAGTI,963
460
460
  airbyte_cdk/utils/stream_status_utils.py,sha256=ZmBoiy5HVbUEHAMrUONxZvxnvfV9CesmQJLDTAIWnWw,1171
461
461
  airbyte_cdk/utils/traced_exception.py,sha256=C8uIBuCL_E4WnBAOPSxBicD06JAldoN9fGsQDp463OY,6292
462
- airbyte_cdk-7.3.2.dev0.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
463
- airbyte_cdk-7.3.2.dev0.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
464
- airbyte_cdk-7.3.2.dev0.dist-info/METADATA,sha256=KkRSWb9jIulH2Hl6GmxMQNFstPn9tmri4oOLEeipXAY,6803
465
- airbyte_cdk-7.3.2.dev0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
466
- airbyte_cdk-7.3.2.dev0.dist-info/entry_points.txt,sha256=eLZ2UYvJZGm1s07Pplcs--1Gim60YhZWTb53j_dghwU,195
467
- airbyte_cdk-7.3.2.dev0.dist-info/RECORD,,
462
+ airbyte_cdk-7.3.3.dist-info/LICENSE.txt,sha256=Wfe61S4BaGPj404v8lrAbvhjYR68SHlkzeYrg3_bbuM,1051
463
+ airbyte_cdk-7.3.3.dist-info/LICENSE_SHORT,sha256=aqF6D1NcESmpn-cqsxBtszTEnHKnlsp8L4x9wAh3Nxg,55
464
+ airbyte_cdk-7.3.3.dist-info/METADATA,sha256=oy27N1IyxbkA8M82Maa4L-hRO-Njevgu_4AqS89jyFU,6798
465
+ airbyte_cdk-7.3.3.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
466
+ airbyte_cdk-7.3.3.dist-info/entry_points.txt,sha256=eLZ2UYvJZGm1s07Pplcs--1Gim60YhZWTb53j_dghwU,195
467
+ airbyte_cdk-7.3.3.dist-info/RECORD,,