airbyte-cdk 6.37.2.dev1__py3-none-any.whl → 6.38.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. airbyte_cdk/sources/declarative/concurrent_declarative_source.py +28 -7
  2. airbyte_cdk/sources/declarative/datetime/__init__.py +0 -4
  3. airbyte_cdk/sources/declarative/datetime/datetime_parser.py +11 -1
  4. airbyte_cdk/sources/declarative/declarative_component_schema.yaml +72 -5
  5. airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +2 -3
  6. airbyte_cdk/sources/declarative/interpolation/macros.py +3 -3
  7. airbyte_cdk/sources/declarative/models/declarative_component_schema.py +34 -8
  8. airbyte_cdk/sources/declarative/parsers/custom_code_compiler.py +18 -4
  9. airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py +120 -8
  10. airbyte_cdk/sources/declarative/requesters/README.md +5 -5
  11. airbyte_cdk/sources/declarative/requesters/http_job_repository.py +18 -13
  12. airbyte_cdk/sources/declarative/requesters/http_requester.py +49 -17
  13. airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py +25 -4
  14. airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py +6 -1
  15. airbyte_cdk/sources/declarative/requesters/paginators/paginator.py +7 -2
  16. airbyte_cdk/sources/declarative/requesters/requester.py +7 -1
  17. airbyte_cdk/sources/declarative/retrievers/simple_retriever.py +21 -4
  18. airbyte_cdk/sources/declarative/transformations/keys_to_snake_transformation.py +2 -2
  19. airbyte_cdk/sources/declarative/yaml_declarative_source.py +0 -1
  20. airbyte_cdk/sources/streams/concurrent/state_converters/abstract_stream_state_converter.py +2 -1
  21. airbyte_cdk/sources/streams/concurrent/state_converters/incrementing_count_stream_state_converter.py +92 -0
  22. airbyte_cdk/sources/streams/http/error_handlers/default_error_mapping.py +3 -3
  23. airbyte_cdk/sources/types.py +1 -0
  24. airbyte_cdk/utils/mapping_helpers.py +18 -1
  25. {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.38.0.dist-info}/METADATA +4 -4
  26. {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.38.0.dist-info}/RECORD +30 -34
  27. airbyte_cdk/sources/embedded/__init__.py +0 -3
  28. airbyte_cdk/sources/embedded/base_integration.py +0 -61
  29. airbyte_cdk/sources/embedded/catalog.py +0 -57
  30. airbyte_cdk/sources/embedded/runner.py +0 -57
  31. airbyte_cdk/sources/embedded/tools.py +0 -27
  32. {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.38.0.dist-info}/LICENSE.txt +0 -0
  33. {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.38.0.dist-info}/LICENSE_SHORT +0 -0
  34. {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.38.0.dist-info}/WHEEL +0 -0
  35. {airbyte_cdk-6.37.2.dev1.dist-info → airbyte_cdk-6.38.0.dist-info}/entry_points.txt +0 -0
@@ -56,7 +56,7 @@ from airbyte_cdk.sources.declarative.auth.token_provider import (
56
56
  )
57
57
  from airbyte_cdk.sources.declarative.checks import CheckDynamicStream, CheckStream
58
58
  from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel
59
- from airbyte_cdk.sources.declarative.datetime import MinMaxDatetime
59
+ from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime
60
60
  from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
61
61
  from airbyte_cdk.sources.declarative.decoders import (
62
62
  Decoder,
@@ -245,6 +245,9 @@ from airbyte_cdk.sources.declarative.models.declarative_component_schema import
245
245
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
246
246
  HttpResponseFilter as HttpResponseFilterModel,
247
247
  )
248
+ from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
249
+ IncrementingCountCursor as IncrementingCountCursorModel,
250
+ )
248
251
  from airbyte_cdk.sources.declarative.models.declarative_component_schema import (
249
252
  InlineSchemaLoader as InlineSchemaLoaderModel,
250
253
  )
@@ -496,6 +499,9 @@ from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_sta
496
499
  CustomFormatConcurrentStreamStateConverter,
497
500
  DateTimeStreamStateConverter,
498
501
  )
502
+ from airbyte_cdk.sources.streams.concurrent.state_converters.incrementing_count_stream_state_converter import (
503
+ IncrementingCountStreamStateConverter,
504
+ )
499
505
  from airbyte_cdk.sources.streams.http.error_handlers.response_models import ResponseAction
500
506
  from airbyte_cdk.sources.types import Config
501
507
  from airbyte_cdk.sources.utils.transform import TransformConfig, TypeTransformer
@@ -584,6 +590,7 @@ class ModelToComponentFactory:
584
590
  FlattenFieldsModel: self.create_flatten_fields,
585
591
  DpathFlattenFieldsModel: self.create_dpath_flatten_fields,
586
592
  IterableDecoderModel: self.create_iterable_decoder,
593
+ IncrementingCountCursorModel: self.create_incrementing_count_cursor,
587
594
  XmlDecoderModel: self.create_xml_decoder,
588
595
  JsonFileSchemaLoaderModel: self.create_json_file_schema_loader,
589
596
  DynamicSchemaLoaderModel: self.create_dynamic_schema_loader,
@@ -1189,6 +1196,70 @@ class ModelToComponentFactory:
1189
1196
  clamping_strategy=clamping_strategy,
1190
1197
  )
1191
1198
 
1199
+ def create_concurrent_cursor_from_incrementing_count_cursor(
1200
+ self,
1201
+ model_type: Type[BaseModel],
1202
+ component_definition: ComponentDefinition,
1203
+ stream_name: str,
1204
+ stream_namespace: Optional[str],
1205
+ config: Config,
1206
+ message_repository: Optional[MessageRepository] = None,
1207
+ **kwargs: Any,
1208
+ ) -> ConcurrentCursor:
1209
+ # Per-partition incremental streams can dynamically create child cursors which will pass their current
1210
+ # state via the stream_state keyword argument. Incremental syncs without parent streams use the
1211
+ # incoming state and connector_state_manager that is initialized when the component factory is created
1212
+ stream_state = (
1213
+ self._connector_state_manager.get_stream_state(stream_name, stream_namespace)
1214
+ if "stream_state" not in kwargs
1215
+ else kwargs["stream_state"]
1216
+ )
1217
+
1218
+ component_type = component_definition.get("type")
1219
+ if component_definition.get("type") != model_type.__name__:
1220
+ raise ValueError(
1221
+ f"Expected manifest component of type {model_type.__name__}, but received {component_type} instead"
1222
+ )
1223
+
1224
+ incrementing_count_cursor_model = model_type.parse_obj(component_definition)
1225
+
1226
+ if not isinstance(incrementing_count_cursor_model, IncrementingCountCursorModel):
1227
+ raise ValueError(
1228
+ f"Expected {model_type.__name__} component, but received {incrementing_count_cursor_model.__class__.__name__}"
1229
+ )
1230
+
1231
+ interpolated_start_value = (
1232
+ InterpolatedString.create(
1233
+ incrementing_count_cursor_model.start_value, # type: ignore
1234
+ parameters=incrementing_count_cursor_model.parameters or {},
1235
+ )
1236
+ if incrementing_count_cursor_model.start_value
1237
+ else 0
1238
+ )
1239
+
1240
+ interpolated_cursor_field = InterpolatedString.create(
1241
+ incrementing_count_cursor_model.cursor_field,
1242
+ parameters=incrementing_count_cursor_model.parameters or {},
1243
+ )
1244
+ cursor_field = CursorField(interpolated_cursor_field.eval(config=config))
1245
+
1246
+ connector_state_converter = IncrementingCountStreamStateConverter(
1247
+ is_sequential_state=True, # ConcurrentPerPartitionCursor only works with sequential state
1248
+ )
1249
+
1250
+ return ConcurrentCursor(
1251
+ stream_name=stream_name,
1252
+ stream_namespace=stream_namespace,
1253
+ stream_state=stream_state,
1254
+ message_repository=message_repository or self._message_repository,
1255
+ connector_state_manager=self._connector_state_manager,
1256
+ connector_state_converter=connector_state_converter,
1257
+ cursor_field=cursor_field,
1258
+ slice_boundary_fields=None,
1259
+ start=interpolated_start_value, # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1260
+ end_provider=connector_state_converter.get_end_provider(), # type: ignore # Having issues w/ inspection for GapType and CursorValueType as shown in existing tests. Confirmed functionality is working in practice
1261
+ )
1262
+
1192
1263
  def _assemble_weekday(self, weekday: str) -> Weekday:
1193
1264
  match weekday:
1194
1265
  case "MONDAY":
@@ -1622,6 +1693,31 @@ class ModelToComponentFactory:
1622
1693
  config=config,
1623
1694
  parameters=model.parameters or {},
1624
1695
  )
1696
+ elif model.incremental_sync and isinstance(
1697
+ model.incremental_sync, IncrementingCountCursorModel
1698
+ ):
1699
+ cursor_model: IncrementingCountCursorModel = model.incremental_sync # type: ignore
1700
+
1701
+ start_time_option = (
1702
+ self._create_component_from_model(
1703
+ cursor_model.start_value_option, # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
1704
+ config,
1705
+ parameters=cursor_model.parameters or {},
1706
+ )
1707
+ if cursor_model.start_value_option # type: ignore # mypy still thinks cursor_model of type DatetimeBasedCursor
1708
+ else None
1709
+ )
1710
+
1711
+ # The concurrent engine defaults the start/end fields on the slice to "start" and "end", but
1712
+ # the default DatetimeBasedRequestOptionsProvider() sets them to start_time/end_time
1713
+ partition_field_start = "start"
1714
+
1715
+ request_options_provider = DatetimeBasedRequestOptionsProvider(
1716
+ start_time_option=start_time_option,
1717
+ partition_field_start=partition_field_start,
1718
+ config=config,
1719
+ parameters=model.parameters or {},
1720
+ )
1625
1721
  else:
1626
1722
  request_options_provider = None
1627
1723
 
@@ -2111,6 +2207,22 @@ class ModelToComponentFactory:
2111
2207
  stream_response=False if self._emit_connector_builder_messages else True,
2112
2208
  )
2113
2209
 
2210
+ @staticmethod
2211
+ def create_incrementing_count_cursor(
2212
+ model: IncrementingCountCursorModel, config: Config, **kwargs: Any
2213
+ ) -> DatetimeBasedCursor:
2214
+ # This should not actually get used anywhere at runtime, but needed to add this to pass checks since
2215
+ # we still parse models into components. The issue is that there's no runtime implementation of a
2216
+ # IncrementingCountCursor.
2217
+ # A known and expected issue with this stub is running a check with the declared IncrementingCountCursor because it is run without ConcurrentCursor.
2218
+ return DatetimeBasedCursor(
2219
+ cursor_field=model.cursor_field,
2220
+ datetime_format="%Y-%m-%d",
2221
+ start_datetime="2024-12-12",
2222
+ config=config,
2223
+ parameters={},
2224
+ )
2225
+
2114
2226
  @staticmethod
2115
2227
  def create_iterable_decoder(
2116
2228
  model: IterableDecoderModel, config: Config, **kwargs: Any
@@ -2744,32 +2856,32 @@ class ModelToComponentFactory:
2744
2856
  if model.delete_requester
2745
2857
  else None
2746
2858
  )
2747
- url_requester = (
2859
+ download_target_requester = (
2748
2860
  self._create_component_from_model(
2749
- model=model.url_requester,
2861
+ model=model.download_target_requester,
2750
2862
  decoder=decoder,
2751
2863
  config=config,
2752
2864
  name=f"job extract_url - {name}",
2753
2865
  )
2754
- if model.url_requester
2866
+ if model.download_target_requester
2755
2867
  else None
2756
2868
  )
2757
2869
  status_extractor = self._create_component_from_model(
2758
2870
  model=model.status_extractor, decoder=decoder, config=config, name=name
2759
2871
  )
2760
- urls_extractor = self._create_component_from_model(
2761
- model=model.urls_extractor, decoder=decoder, config=config, name=name
2872
+ download_target_extractor = self._create_component_from_model(
2873
+ model=model.download_target_extractor, decoder=decoder, config=config, name=name
2762
2874
  )
2763
2875
  job_repository: AsyncJobRepository = AsyncHttpJobRepository(
2764
2876
  creation_requester=creation_requester,
2765
2877
  polling_requester=polling_requester,
2766
2878
  download_retriever=download_retriever,
2767
- url_requester=url_requester,
2879
+ download_target_requester=download_target_requester,
2768
2880
  abort_requester=abort_requester,
2769
2881
  delete_requester=delete_requester,
2770
2882
  status_extractor=status_extractor,
2771
2883
  status_mapping=self._create_async_job_status_mapping(model.status_mapping, config),
2772
- urls_extractor=urls_extractor,
2884
+ download_target_extractor=download_target_extractor,
2773
2885
  )
2774
2886
 
2775
2887
  async_job_partition_router = AsyncJobPartitionRouter(
@@ -1,8 +1,8 @@
1
1
  # AsyncHttpJobRepository sequence diagram
2
2
 
3
3
  - Components marked as optional are not required and can be ignored.
4
- - if `url_requester` is not provided, `urls_extractor` will get urls from the `polling_job_response`
5
- - interpolation_context, e.g. `create_job_response` or `polling_job_response` can be obtained from stream_slice
4
+ - if `download_target_requester` is not provided, `download_target_extractor` will get urls from the `polling_response`
5
+ - interpolation_context, e.g. `creation_response` or `polling_response` can be obtained from stream_slice
6
6
 
7
7
  ```mermaid
8
8
  ---
@@ -12,7 +12,7 @@ sequenceDiagram
12
12
  participant AsyncHttpJobRepository as AsyncOrchestrator
13
13
  participant CreationRequester as creation_requester
14
14
  participant PollingRequester as polling_requester
15
- participant UrlRequester as url_requester (Optional)
15
+ participant UrlRequester as download_target_requester (Optional)
16
16
  participant DownloadRetriever as download_retriever
17
17
  participant AbortRequester as abort_requester (Optional)
18
18
  participant DeleteRequester as delete_requester (Optional)
@@ -25,14 +25,14 @@ sequenceDiagram
25
25
 
26
26
  loop Poll for job status
27
27
  AsyncHttpJobRepository ->> PollingRequester: Check job status
28
- PollingRequester ->> Reporting Server: Status request (interpolation_context: `create_job_response`)
28
+ PollingRequester ->> Reporting Server: Status request (interpolation_context: `creation_response`)
29
29
  Reporting Server -->> PollingRequester: Status response
30
30
  PollingRequester -->> AsyncHttpJobRepository: Job status
31
31
  end
32
32
 
33
33
  alt Status: Ready
34
34
  AsyncHttpJobRepository ->> UrlRequester: Request download URLs (if applicable)
35
- UrlRequester ->> Reporting Server: URL request (interpolation_context: `polling_job_response`)
35
+ UrlRequester ->> Reporting Server: URL request (interpolation_context: `polling_response`)
36
36
  Reporting Server -->> UrlRequester: Download URLs
37
37
  UrlRequester -->> AsyncHttpJobRepository: Download URLs
38
38
 
@@ -43,13 +43,13 @@ class AsyncHttpJobRepository(AsyncJobRepository):
43
43
  delete_requester: Optional[Requester]
44
44
  status_extractor: DpathExtractor
45
45
  status_mapping: Mapping[str, AsyncJobStatus]
46
- urls_extractor: DpathExtractor
46
+ download_target_extractor: DpathExtractor
47
47
 
48
48
  job_timeout: Optional[timedelta] = None
49
49
  record_extractor: RecordExtractor = field(
50
50
  init=False, repr=False, default_factory=lambda: ResponseToFileExtractor({})
51
51
  )
52
- url_requester: Optional[Requester] = (
52
+ download_target_requester: Optional[Requester] = (
53
53
  None # use it in case polling_requester provides some <id> and extra request is needed to obtain list of urls to download from
54
54
  )
55
55
 
@@ -211,12 +211,15 @@ class AsyncHttpJobRepository(AsyncJobRepository):
211
211
 
212
212
  """
213
213
 
214
- for url in self._get_download_url(job):
214
+ for target_url in self._get_download_targets(job):
215
215
  job_slice = job.job_parameters()
216
216
  stream_slice = StreamSlice(
217
217
  partition=job_slice.partition,
218
218
  cursor_slice=job_slice.cursor_slice,
219
- extra_fields={**job_slice.extra_fields, "url": url},
219
+ extra_fields={
220
+ **job_slice.extra_fields,
221
+ "download_target": target_url,
222
+ },
220
223
  )
221
224
  for message in self.download_retriever.read_records({}, stream_slice):
222
225
  if isinstance(message, Record):
@@ -269,27 +272,29 @@ class AsyncHttpJobRepository(AsyncJobRepository):
269
272
  del self._polling_job_response_by_id[job_id]
270
273
 
271
274
  def _get_create_job_stream_slice(self, job: AsyncJob) -> StreamSlice:
275
+ creation_response = self._create_job_response_by_id[job.api_job_id()].json()
272
276
  stream_slice = StreamSlice(
273
- partition={"create_job_response": self._create_job_response_by_id[job.api_job_id()]},
277
+ partition={},
274
278
  cursor_slice={},
279
+ extra_fields={"creation_response": creation_response},
275
280
  )
276
281
  return stream_slice
277
282
 
278
- def _get_download_url(self, job: AsyncJob) -> Iterable[str]:
279
- if not self.url_requester:
283
+ def _get_download_targets(self, job: AsyncJob) -> Iterable[str]:
284
+ if not self.download_target_requester:
280
285
  url_response = self._polling_job_response_by_id[job.api_job_id()]
281
286
  else:
287
+ polling_response = self._polling_job_response_by_id[job.api_job_id()].json()
282
288
  stream_slice: StreamSlice = StreamSlice(
283
- partition={
284
- "polling_job_response": self._polling_job_response_by_id[job.api_job_id()]
285
- },
289
+ partition={},
286
290
  cursor_slice={},
291
+ extra_fields={"polling_response": polling_response},
287
292
  )
288
- url_response = self.url_requester.send_request(stream_slice=stream_slice) # type: ignore # we expect url_requester to always be presented, otherwise raise an exception as we cannot proceed with the report
293
+ url_response = self.download_target_requester.send_request(stream_slice=stream_slice) # type: ignore # we expect download_target_requester to always be presented, otherwise raise an exception as we cannot proceed with the report
289
294
  if not url_response:
290
295
  raise AirbyteTracedException(
291
- internal_message="Always expect a response or an exception from url_requester",
296
+ internal_message="Always expect a response or an exception from download_target_requester",
292
297
  failure_type=FailureType.system_error,
293
298
  )
294
299
 
295
- yield from self.urls_extractor.extract_records(url_response) # type: ignore # we expect urls_extractor to always return list of strings
300
+ yield from self.download_target_extractor.extract_records(url_response) # type: ignore # we expect download_target_extractor to always return list of strings
@@ -25,8 +25,8 @@ from airbyte_cdk.sources.message import MessageRepository, NoopMessageRepository
25
25
  from airbyte_cdk.sources.streams.call_rate import APIBudget
26
26
  from airbyte_cdk.sources.streams.http import HttpClient
27
27
  from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler
28
- from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
29
- from airbyte_cdk.utils.mapping_helpers import combine_mappings
28
+ from airbyte_cdk.sources.types import Config, EmptyString, StreamSlice, StreamState
29
+ from airbyte_cdk.utils.mapping_helpers import combine_mappings, get_interpolation_context
30
30
 
31
31
 
32
32
  @dataclass
@@ -49,9 +49,10 @@ class HttpRequester(Requester):
49
49
 
50
50
  name: str
51
51
  url_base: Union[InterpolatedString, str]
52
- path: Union[InterpolatedString, str]
53
52
  config: Config
54
53
  parameters: InitVar[Mapping[str, Any]]
54
+
55
+ path: Optional[Union[InterpolatedString, str]] = None
55
56
  authenticator: Optional[DeclarativeAuthenticator] = None
56
57
  http_method: Union[str, HttpMethod] = HttpMethod.GET
57
58
  request_options_provider: Optional[InterpolatedRequestOptionsProvider] = None
@@ -66,7 +67,9 @@ class HttpRequester(Requester):
66
67
 
67
68
  def __post_init__(self, parameters: Mapping[str, Any]) -> None:
68
69
  self._url_base = InterpolatedString.create(self.url_base, parameters=parameters)
69
- self._path = InterpolatedString.create(self.path, parameters=parameters)
70
+ self._path = InterpolatedString.create(
71
+ self.path if self.path else EmptyString, parameters=parameters
72
+ )
70
73
  if self.request_options_provider is None:
71
74
  self._request_options_provider = InterpolatedRequestOptionsProvider(
72
75
  config=self.config, parameters=parameters
@@ -85,7 +88,7 @@ class HttpRequester(Requester):
85
88
  self._parameters = parameters
86
89
 
87
90
  if self.error_handler is not None and hasattr(self.error_handler, "backoff_strategies"):
88
- backoff_strategies = self.error_handler.backoff_strategies
91
+ backoff_strategies = self.error_handler.backoff_strategies # type: ignore
89
92
  else:
90
93
  backoff_strategies = None
91
94
 
@@ -112,21 +115,33 @@ class HttpRequester(Requester):
112
115
  def get_authenticator(self) -> DeclarativeAuthenticator:
113
116
  return self._authenticator
114
117
 
115
- def get_url_base(self) -> str:
116
- return os.path.join(self._url_base.eval(self.config), "")
118
+ def get_url_base(
119
+ self,
120
+ *,
121
+ stream_state: Optional[StreamState] = None,
122
+ stream_slice: Optional[StreamSlice] = None,
123
+ next_page_token: Optional[Mapping[str, Any]] = None,
124
+ ) -> str:
125
+ interpolation_context = get_interpolation_context(
126
+ stream_state=stream_state,
127
+ stream_slice=stream_slice,
128
+ next_page_token=next_page_token,
129
+ )
130
+ return os.path.join(self._url_base.eval(self.config, **interpolation_context), EmptyString)
117
131
 
118
132
  def get_path(
119
133
  self,
120
134
  *,
121
- stream_state: Optional[StreamState],
122
- stream_slice: Optional[StreamSlice],
123
- next_page_token: Optional[Mapping[str, Any]],
135
+ stream_state: Optional[StreamState] = None,
136
+ stream_slice: Optional[StreamSlice] = None,
137
+ next_page_token: Optional[Mapping[str, Any]] = None,
124
138
  ) -> str:
125
- kwargs = {
126
- "stream_slice": stream_slice,
127
- "next_page_token": next_page_token,
128
- }
129
- path = str(self._path.eval(self.config, **kwargs))
139
+ interpolation_context = get_interpolation_context(
140
+ stream_state=stream_state,
141
+ stream_slice=stream_slice,
142
+ next_page_token=next_page_token,
143
+ )
144
+ path = str(self._path.eval(self.config, **interpolation_context))
130
145
  return path.lstrip("/")
131
146
 
132
147
  def get_method(self) -> HttpMethod:
@@ -324,7 +339,20 @@ class HttpRequester(Requester):
324
339
 
325
340
  @classmethod
326
341
  def _join_url(cls, url_base: str, path: str) -> str:
327
- return urljoin(url_base, path)
342
+ """
343
+ Joins a base URL with a given path and returns the resulting URL with any trailing slash removed.
344
+
345
+ This method ensures that there are no duplicate slashes when concatenating the base URL and the path,
346
+ which is useful when the full URL is provided from an interpolation context.
347
+
348
+ Args:
349
+ url_base (str): The base URL to which the path will be appended.
350
+ path (str): The path to join with the base URL.
351
+
352
+ Returns:
353
+ str: The concatenated URL with the trailing slash (if any) removed.
354
+ """
355
+ return urljoin(url_base, path).rstrip("/")
328
356
 
329
357
  def send_request(
330
358
  self,
@@ -341,7 +369,11 @@ class HttpRequester(Requester):
341
369
  request, response = self._http_client.send_request(
342
370
  http_method=self.get_method().value,
343
371
  url=self._join_url(
344
- self.get_url_base(),
372
+ self.get_url_base(
373
+ stream_state=stream_state,
374
+ stream_slice=stream_slice,
375
+ next_page_token=next_page_token,
376
+ ),
345
377
  path
346
378
  or self.get_path(
347
379
  stream_state=stream_state,
@@ -25,6 +25,7 @@ from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath
25
25
  from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState
26
26
  from airbyte_cdk.utils.mapping_helpers import (
27
27
  _validate_component_request_option_paths,
28
+ get_interpolation_context,
28
29
  )
29
30
 
30
31
 
@@ -150,11 +151,22 @@ class DefaultPaginator(Paginator):
150
151
  else:
151
152
  return None
152
153
 
153
- def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
154
+ def path(
155
+ self,
156
+ next_page_token: Optional[Mapping[str, Any]],
157
+ stream_state: Optional[Mapping[str, Any]] = None,
158
+ stream_slice: Optional[StreamSlice] = None,
159
+ ) -> Optional[str]:
154
160
  token = next_page_token.get("next_page_token") if next_page_token else None
155
161
  if token and self.page_token_option and isinstance(self.page_token_option, RequestPath):
162
+ # make additional interpolation context
163
+ interpolation_context = get_interpolation_context(
164
+ stream_state=stream_state,
165
+ stream_slice=stream_slice,
166
+ next_page_token=next_page_token,
167
+ )
156
168
  # Replace url base to only return the path
157
- return str(token).replace(self.url_base.eval(self.config), "") # type: ignore # url_base is casted to a InterpolatedString in __post_init__
169
+ return str(token).replace(self.url_base.eval(self.config, **interpolation_context), "") # type: ignore # url_base is casted to a InterpolatedString in __post_init__
158
170
  else:
159
171
  return None
160
172
 
@@ -258,8 +270,17 @@ class PaginatorTestReadDecorator(Paginator):
258
270
  response, last_page_size, last_record, last_page_token_value
259
271
  )
260
272
 
261
- def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
262
- return self._decorated.path(next_page_token)
273
+ def path(
274
+ self,
275
+ next_page_token: Optional[Mapping[str, Any]],
276
+ stream_state: Optional[Mapping[str, Any]] = None,
277
+ stream_slice: Optional[StreamSlice] = None,
278
+ ) -> Optional[str]:
279
+ return self._decorated.path(
280
+ next_page_token=next_page_token,
281
+ stream_state=stream_state,
282
+ stream_slice=stream_slice,
283
+ )
263
284
 
264
285
  def get_request_params(
265
286
  self,
@@ -19,7 +19,12 @@ class NoPagination(Paginator):
19
19
 
20
20
  parameters: InitVar[Mapping[str, Any]]
21
21
 
22
- def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
22
+ def path(
23
+ self,
24
+ next_page_token: Optional[Mapping[str, Any]],
25
+ stream_state: Optional[Mapping[str, Any]] = None,
26
+ stream_slice: Optional[StreamSlice] = None,
27
+ ) -> Optional[str]:
23
28
  return None
24
29
 
25
30
  def get_request_params(
@@ -11,7 +11,7 @@ import requests
11
11
  from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import (
12
12
  RequestOptionsProvider,
13
13
  )
14
- from airbyte_cdk.sources.types import Record
14
+ from airbyte_cdk.sources.types import Record, StreamSlice
15
15
 
16
16
 
17
17
  @dataclass
@@ -49,7 +49,12 @@ class Paginator(ABC, RequestOptionsProvider):
49
49
  pass
50
50
 
51
51
  @abstractmethod
52
- def path(self, next_page_token: Optional[Mapping[str, Any]]) -> Optional[str]:
52
+ def path(
53
+ self,
54
+ next_page_token: Optional[Mapping[str, Any]],
55
+ stream_state: Optional[Mapping[str, Any]] = None,
56
+ stream_slice: Optional[StreamSlice] = None,
57
+ ) -> Optional[str]:
53
58
  """
54
59
  Returns the URL path to hit to fetch the next page of records
55
60
 
@@ -35,7 +35,13 @@ class Requester(RequestOptionsProvider):
35
35
  pass
36
36
 
37
37
  @abstractmethod
38
- def get_url_base(self) -> str:
38
+ def get_url_base(
39
+ self,
40
+ *,
41
+ stream_state: Optional[StreamState],
42
+ stream_slice: Optional[StreamSlice],
43
+ next_page_token: Optional[Mapping[str, Any]],
44
+ ) -> str:
39
45
  """
40
46
  :return: URL base for the API endpoint e.g: if you wanted to hit https://myapi.com/v1/some_entity then this should return "https://myapi.com/v1/"
41
47
  """
@@ -234,13 +234,22 @@ class SimpleRetriever(Retriever):
234
234
  raise ValueError("Request body json cannot be a string")
235
235
  return body_json
236
236
 
237
- def _paginator_path(self, next_page_token: Optional[Mapping[str, Any]] = None) -> Optional[str]:
237
+ def _paginator_path(
238
+ self,
239
+ next_page_token: Optional[Mapping[str, Any]] = None,
240
+ stream_state: Optional[Mapping[str, Any]] = None,
241
+ stream_slice: Optional[StreamSlice] = None,
242
+ ) -> Optional[str]:
238
243
  """
239
244
  If the paginator points to a path, follow it, else return nothing so the requester is used.
240
245
  :param next_page_token:
241
246
  :return:
242
247
  """
243
- return self._paginator.path(next_page_token=next_page_token)
248
+ return self._paginator.path(
249
+ next_page_token=next_page_token,
250
+ stream_state=stream_state,
251
+ stream_slice=stream_slice,
252
+ )
244
253
 
245
254
  def _parse_response(
246
255
  self,
@@ -299,7 +308,11 @@ class SimpleRetriever(Retriever):
299
308
  next_page_token: Optional[Mapping[str, Any]] = None,
300
309
  ) -> Optional[requests.Response]:
301
310
  return self.requester.send_request(
302
- path=self._paginator_path(next_page_token=next_page_token),
311
+ path=self._paginator_path(
312
+ next_page_token=next_page_token,
313
+ stream_state=stream_state,
314
+ stream_slice=stream_slice,
315
+ ),
303
316
  stream_state=stream_state,
304
317
  stream_slice=stream_slice,
305
318
  next_page_token=next_page_token,
@@ -570,7 +583,11 @@ class SimpleRetrieverTestReadDecorator(SimpleRetriever):
570
583
  next_page_token: Optional[Mapping[str, Any]] = None,
571
584
  ) -> Optional[requests.Response]:
572
585
  return self.requester.send_request(
573
- path=self._paginator_path(next_page_token=next_page_token),
586
+ path=self._paginator_path(
587
+ next_page_token=next_page_token,
588
+ stream_state=stream_state,
589
+ stream_slice=stream_slice,
590
+ ),
574
591
  stream_state=stream_state,
575
592
  stream_slice=stream_slice,
576
593
  next_page_token=next_page_token,
@@ -6,7 +6,7 @@ import re
6
6
  from dataclasses import dataclass
7
7
  from typing import Any, Dict, List, Optional
8
8
 
9
- import unidecode
9
+ import anyascii
10
10
 
11
11
  from airbyte_cdk.sources.declarative.transformations import RecordTransformation
12
12
  from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
@@ -48,7 +48,7 @@ class KeysToSnakeCaseTransformation(RecordTransformation):
48
48
  return self.tokens_to_snake_case(tokens)
49
49
 
50
50
  def normalize_key(self, key: str) -> str:
51
- return unidecode.unidecode(key)
51
+ return str(anyascii.anyascii(key))
52
52
 
53
53
  def tokenize_key(self, key: str) -> List[str]:
54
54
  tokens = []
@@ -50,7 +50,6 @@ class YamlDeclarativeSource(ConcurrentDeclarativeSource[List[AirbyteStateMessage
50
50
 
51
51
  def _emit_manifest_debug_message(self, extra_args: dict[str, Any]) -> None:
52
52
  extra_args["path_to_yaml"] = self._path_to_yaml
53
- self.logger.debug("declarative source created from parsed YAML manifest", extra=extra_args)
54
53
 
55
54
  @staticmethod
56
55
  def _parse(connection_definition_str: str) -> ConnectionDefinition:
@@ -4,7 +4,7 @@
4
4
 
5
5
  from abc import ABC, abstractmethod
6
6
  from enum import Enum
7
- from typing import TYPE_CHECKING, Any, List, MutableMapping, Optional, Tuple
7
+ from typing import TYPE_CHECKING, Any, Callable, List, MutableMapping, Optional, Tuple
8
8
 
9
9
  if TYPE_CHECKING:
10
10
  from airbyte_cdk.sources.streams.concurrent.cursor import CursorField
@@ -12,6 +12,7 @@ if TYPE_CHECKING:
12
12
 
13
13
  class ConcurrencyCompatibleStateType(Enum):
14
14
  date_range = "date-range"
15
+ integer = "integer"
15
16
 
16
17
 
17
18
  class AbstractStreamStateConverter(ABC):